Skip to content
Snippets Groups Projects
Commit 4fbe162b authored by Laura Clarke's avatar Laura Clarke
Browse files

AnalysisAdaptor - logic_name must be unique so fetch_by_logic_name now only...

AnalysisAdaptor - logic_name must be unique so fetch_by_logic_name now only returns one analysis object
DnaAlignFeatureAdaptor - formatting print statements etc
ProteinAlignFeatureAdaptor - Now follows the same format as DnaAlignFeatureAdaptor, has methods to fetch by contig, slice and assembly location and when constrainted by score
or percent id all methods can take an optional type which is logic name
RepeatFeatureAdaptor
PredictionTranscriptAdaptor all have methods which fetch by slice, contig_id and assembly location and each method has optional type which should be logic_name
SimpleFeatureAdaptor
parent c5e689d0
No related branches found
No related tags found
No related merge requests found
......@@ -171,32 +171,16 @@ sub fetch_by_dbID {
sub fetch_by_newest_logic_name {
my $self = shift;
my $logic_name = shift;
my $sth = $self->prepare( q{
SELECT analysis_id, logic_name,
program, program_version, program_file,
db, db_version, db_file,
module, module_version,
gff_source, gff_feature,
created, parameters
FROM analysis
WHERE logic_name = ?
ORDER BY created DESC } );
$sth->execute( $logic_name );
my $rowHashRef = $sth->fetchrow_hashref;
if( ! defined $rowHashRef ) {
return undef;
}
$self->warn("logic_names should now be unique should need to use this method use fetch_by_logic_name\n");
return $self->_objFromHashref( $rowHashRef );
return $self->fetch_by_logic_name($logic_name);
}
sub fetch_by_logic_name {
my $self = shift;
my $logic_name = shift;
my @result;
my $analysis;
my $rowHash;
......@@ -208,18 +192,14 @@ sub fetch_by_logic_name {
gff_source, gff_feature,
created, parameters
FROM analysis
WHERE logic_name = ?
ORDER BY created DESC } );
WHERE logic_name = ? } );
$sth->execute( $logic_name );
my $rowHashRef;
while( $rowHashRef = $sth->fetchrow_hashref ) {
$analysis = $self->_objFromHashref( $rowHashRef );
if( defined $analysis ) {
push( @result, $analysis );
}
}
return @result;
$rowHashRef = $sth->fetchrow_hashref;
$analysis = $self->_objFromHashref( $rowHashRef );
return $analysis;
}
......
......@@ -98,7 +98,7 @@ sub fetch_by_dbID{
}
=head2 fetch_by_contig_id_and_constraint
=head2 fetch_by_contig_id_constraint
Title : fetch_by_contig_id
Usage :
......@@ -110,7 +110,7 @@ sub fetch_by_dbID{
=cut
sub fetch_by_contig_id_and_constraint{
sub fetch_by_contig_id_constraint{
my ($self,$cid, $constraint) = @_;
if( !defined $cid ) {
......@@ -153,12 +153,11 @@ sub fetch_by_contig_id{
my $constraint = undef;
if($logic_name){
#print "fetching analysis obj as logic_name = ".$logic_name."\n";
my $aa = $self->db->get_AnalysisAdaptor($logic_name);
$analysis = $aa->fetch_by_logic_name($logic_name);
$constraint = " d.analysis_id = ".$analysis->dbID();
}
my @features = $self->fetch_by_contig_id_and_constraint($cid, $constraint);
my @features = $self->fetch_by_contig_id_constraint($cid, $constraint);
return @features;
}
......@@ -183,7 +182,7 @@ sub fetch_by_contig_id_and_score{
}
my @features = $self->fetch_by_contig_id_and_constraint($cid, $constraint);
my @features = $self->fetch_by_contig_id_constraint($cid, $constraint);
return @features;
......@@ -210,7 +209,7 @@ sub fetch_by_contig_id_and_pid{
}
my @features = $self->fetch_by_contig_id_and_constraint($cid, $constraint);
my @features = $self->fetch_by_contig_id_constraint($cid, $constraint);
return @features;
......@@ -233,23 +232,21 @@ sub fetch_by_Slice{
$analysis = $aa->fetch_by_logic_name($logic_name);
$constraint .= " d.analysis_id = ".$analysis->dbID();
}
#print "fetching feature for chr ".$slice->chr_name." start ".$slice->chr_start." end ".$slice->chr_end." type ".$slice->assembly_type."\n";
my @features = $self->fetch_by_assembly_location_constraint($slice->chr_start,$slice->chr_end,$slice->chr_name,$slice->assembly_type, $constraint);
my @out_f;
#print STDERR "started converting coordinates\n";
foreach my $f(@features){
#print STDERR "chr start = ".$f->start."\n";
#print STDERR "chr end = ".$f->end."\n";
my $start = ($f->start - ($slice->chr_start - 1));
my $end = ($f->end - ($slice->chr_start - 1));
#print STDERR "slice start = ".$start."\n";
#print STDERR "slice end = ".$end."\n";
my $out = $self->_new_feature($start,$end,$f->strand,$f->score,$f->hstart,$f->hend,$f->hstrand,$f->hseqname,$f->cigar_string,$f->analysis,$f->percent_id,$f->p_value,$f->seqname,undef);
push(@out_f, $out);
}
# print STDERR "finished converting cooridinate\n";
return @out_f;
}
......@@ -274,7 +271,7 @@ sub fetch_by_Slice_and_score {
$analysis = $aa->fetch_by_logic_name($logic_name);
$constraint .= " and d.analysis_id = ".$analysis->dbID();
}
#print "constraint ".$constraint."\n";
my @features = $self->fetch_by_assembly_location_constraint($slice->chr_start,$slice->chr_end,$slice->chr_name,$slice->assembly_type, $constraint);
my @out_f;
......@@ -419,7 +416,7 @@ sub fetch_by_assembly_location_and_pid{
sub fetch_by_assembly_location_constraint{
my ($self,$chr_start,$chr_end,$chr,$type,$constraint) = @_;
#print STDERR "started fetch_by assembly location\n";
if( !defined $type ) {
$self->throw("Assembly location must be start,end,chr,type");
}
......@@ -437,7 +434,7 @@ sub fetch_by_assembly_location_constraint{
# build the SQL
#print STDERR "have @cids contig ids\n";
if( scalar(@cids) == 0 ) {
return ();
......@@ -450,7 +447,7 @@ sub fetch_by_assembly_location_constraint{
if($constraint) {
$sql .= " AND $constraint";
}
print STDERR "SQL $sql\n";
#print STDERR "SQL $sql\n";
my $sth = $self->prepare($sql);
......@@ -495,8 +492,7 @@ sub fetch_by_assembly_location_constraint{
push(@f,$out);
}
#print STDERR "have ".$counter." gaps\n";
#print STDERR "finished fetch by assembly location\n";
return @f;
}
......@@ -558,11 +554,7 @@ sub _new_feature {
if( !defined $seqname ) {
$self->throw("Internal error - wrong number of arguments to new_feature");
}
#print STDERR "start = ".$start."\n";
#print STDERR "end = ".$end."\n";
#print STDERR "hstart = ".$hstart."\n";
#print STDERR "hend = ".$hend."\n";
#print STDERR "cigar ".$cigar."\n";
my $f1 = Bio::EnsEMBL::SeqFeature->new();
my $f2 = Bio::EnsEMBL::SeqFeature->new();
......@@ -590,7 +582,7 @@ sub _new_feature {
my $out = Bio::EnsEMBL::DnaDnaAlignFeature->new( -cigar_string => $cigar, -feature1 => $f1, -feature2 => $f2);
#print "outputting feature with cigar ".$out->cigar_string."\n";
return $out;
}
......
......@@ -87,38 +87,31 @@ sub fetch_by_dbID {
return $res[0];
}
sub fetch_by_Contig{
my ($self, $contig, $logic_name) = @_;
my @results = $self->fetch_by_contig_id($contig->dbID, $logic_name);
return @results;
}
=head2 fetch_by_Slice
Arg 1 : Bio::EnsEMBL::Slice $slice
start, end, chromosome and type are used from the slice.
Function : retrieves PTs from database which overlap with given slice.
PTs not fully in the slice are retrieved partially, with exons
set to undef.
Returntype: listref Bio::EnsEMBL::PredictionTranscript
Exceptions: none, list can be empty.
Caller : Slice or WebSlice
=cut
sub fetch_by_contig_id{
my ($self, $contig_id, $logic_name) = @_;
my $constraint = undef;
sub fetch_by_Slice {
my $self = shift;
my $slice = shift;
if($logic_name){
my $analysis = $self->db->get_AnalysisAdaptor->fetch_by_logic_name($logic_name);
$constraint = " analysis_id = ".$analysis->dbID;
}
$self->throw( "Not implemented yet" );
}
my @results = $self->fetch_by_contig_id_constraint($contig_id, $constraint);
sub fetch_by_Slice_and_logic_name {
my $self = shift;
my $slice = shift;
my $logic_name = shift;
return @results;
$self->throw( "Not implemented yet" );
}
=head2 fetch_by_Contig
=head2 fetch_by_contig_id_constraint
Arg 1 : Bio::EnsEMBL::RawContig $contig
Only dbID in Contig is used.
......@@ -130,9 +123,10 @@ sub fetch_by_Slice_and_logic_name {
=cut
sub fetch_by_Contig {
sub fetch_by_contig_id_constraint {
my $self = shift;
my $contig = shift;
my $contig_id = shift;
my $constraint = shift;
my $query = qq {
SELECT p.prediction_transcript_id
......@@ -149,49 +143,171 @@ sub fetch_by_Contig {
FROM prediction_transcript p
WHERE p.contig_id = ?
ORDER BY p.prediction_transcript_id, p.exon_rank
};
};
if($constraint){
$query .= " and ".$constraint;
}
$query .= " order by p.prediction_transcript_id, p.exon_rank";
#print $query."\n";
my $sth = $self->prepare( $query );
$sth->execute( $contig->dbID );
$sth->execute( $contig_id );
my @res = $self->_ptrans_from_sth( $sth );
return \@res;
return @res;
}
sub fetch_by_Contig_and_logic_name {
my $self = shift;
my $contig = shift;
my $logic_name = shift;
my $query = qq {
sub fetch_by_Slice{
my ($self, $slice, $logic_name) = @_;
my $constraint = undef;
if($logic_name){
my $analysis = $self->db->get_AnalysisAdaptor->fetch_by_logic_name($logic_name);
$constraint = " analysis_id = ".$analysis->dbID;
}
my @results = $self->fetch_by_assembly_location_constraint($slice->chr_start, $slice->chr_end, $slice->chr_name, $slice->assembly_type, $constraint);
my @out;
GENE: foreach my $transcript(@results){
my $exon_count = 1;
my $pred_t = Bio::EnsEMBL::PredictionTranscript->new();
$pred_t->dbID($transcript->dbID);
$pred_t->adaptor($self);
$pred_t->analysis($transcript->analysis);
$pred_t->set_exon_count($transcript->get_exon_count);
my @exons = $transcript->get_all_Exons;
my @sorted_exons;
if($exons[0]->strand == 1){
@sorted_exons = sort{$a->start <=> $b->start} @exons;
}else{
@sorted_exons = sort{$b->start <=> $a->start} @exons;
}
my $contig = $sorted_exons[0]->contig;
EXON:foreach my $e(@sorted_exons){
my $start = ($e->start - ($slice->chr_start - 1));
my $end = ($e->end - ($slice->chr_start - 1));
my $exon = $self->_new_Exon($start, $end, $e->strand, $e->phase, $e->score, $e->p_value, $contig);
$pred_t->add_Exon( $exon, $exon_count );
$exon_count++;
}
push(@out, $pred_t);
}
return @out;
}
sub fetch_by_assembly_location{
my ($self, $chr_start, $chr_end, $chr, $type, $logic_name) = @_;
my $constraint = undef;
if($logic_name){
my $analysis = $self->db->get_AnalysisAdaptor->fetch_by_logic_name($logic_name);
$constraint = " analysis_id = ".$analysis->dbID;
}
my @results = $self->fetch_by_assembly_location_constraint($chr_start, $chr_end, $chr, $type, $constraint);
return @results;
}
sub fetch_by_assembly_location_constraint{
my ($self, $chr_start, $chr_end, $chr, $type, $constraint) = @_;
if( !defined $type ) {
$self->throw("Assembly location must be start,end,chr,type");
}
if( $chr_start !~ /^\d/ || $chr_end !~ /^\d/ ) {
$self->throw("start/end must be numbers not $chr_start,$chr_end (have you typed the location in the right way around - start,end,chromosome,type)?");
}
my $mapper = $self->db->get_AssemblyMapperAdaptor->fetch_by_type($type);
$mapper->register_region($chr,$chr_start,$chr_end);
my @cids = $mapper->list_contig_ids($chr, $chr_start ,$chr_end);
my %ana;
my $cid_list = join(',',@cids);
my $sql = qq {
SELECT p.prediction_transcript_id
, p.contig_id
, p.contig_start
, p.contig_end
, p.contig_strand
, p.start_phase
, p.exon_rank
, p.score
, p.p_value
, p.analysis_id
, p.exon_count
, p.contig_id
, p.contig_start
, p.contig_end
, p.contig_strand
, p.start_phase
, p.exon_rank
, p.score
, p.p_value
, p.analysis_id
, p.exon_count
FROM prediction_transcript p, analysis a
WHERE p.contig_id = ?
AND a.analysis_id = p.analysis_id
AND a.logic_name = ?
ORDER BY p.prediction_transcript_id, p.exon_rank
};
FROM prediction_transcript p
WHERE
};
my $sth = $self->prepare( $query );
$sth->execute( $contig->dbID, $logic_name );
$sql .= "contig_id in($cid_list) ";
my @res = $self->_ptrans_from_sth( $sth );
return \@res;
if($constraint){
$sql .= " and $constraint";
}
my $sth = $self->prepare($sql);
$sth->execute;
my @results = $self->_ptrans_from_sth($sth);
my @out;
GENE: foreach my $transcript(@results){
my $exon_count = 1;
my $pred_t = Bio::EnsEMBL::PredictionTranscript->new();
$pred_t->dbID($transcript->dbID);
$pred_t->adaptor($self);
$pred_t->analysis($transcript->analysis);
$pred_t->set_exon_count($transcript->get_exon_count);
my @exons = $transcript->get_all_Exons;
my @sorted_exons;
if($exons[0]->strand == 1){
@sorted_exons = sort{$a->start <=> $b->start} @exons;
}else{
@sorted_exons = sort{$b->start <=> $a->start} @exons;
}
my $contig = $sorted_exons[0]->contig;
EXON:foreach my $e(@sorted_exons){
my @coord_list = $mapper->map_coordinates_to_assembly($e->contig->dbID, $e->start, $e->end, $e->strand, "rawcontig");
if( scalar(@coord_list) > 1 ) {
#$self->warn("maps to ".scalar(@coord_list)." coordinate objs not all of feature will be on golden path skipping\n");
next GENE;
}
if($coord_list[0]->isa("Bio::EnsEMBL::Mapper::Gap")){
#$self->warn("this feature is on a part of $contig_id which isn't on the golden path skipping");
next GENE;
}
if(!($coord_list[0]->start >= $chr_start) ||
!($coord_list[0]->end <= $chr_end)) {
next GENE;
}
my $exon = $self->_new_Exon($coord_list[0]->start, $coord_list[0]->end, $coord_list[0]->strand, $e->phase, $e->score, $e->p_value, $contig);
$pred_t->add_Exon( $exon, $exon_count );
$exon_count++;
}
push(@out, $pred_t);
}
return @out;
}
=head2 _ptrans_from_sth
Arg 1 : DBI:st $statement_handle
......@@ -214,11 +330,12 @@ sub _ptrans_from_sth {
my $pre_trans = undef;
my $pre_trans_id = undef;
my @result = ();
my $count = 0;
my $exon_count = 0;
while( my $hashRef = $sth->fetchrow_hashref() ) {
if(( ! defined $pre_trans ) ||
( $pre_trans_id != $hashRef->{'prediction_transcript_id'} )) {
$count++;
$pre_trans = Bio::EnsEMBL::PredictionTranscript->new();
$pre_trans_id = $hashRef->{'prediction_transcript_id'};
$pre_trans->dbID( $pre_trans_id );
......@@ -232,7 +349,9 @@ sub _ptrans_from_sth {
my $exon = $self->_new_Exon_from_hashRef( $hashRef );
$pre_trans->add_Exon( $exon, $hashRef->{'exon_rank'} );
$exon_count++;
}
#print "have created ".$count." transcripts and ".$exon_count." exons\n";
return @result;
}
......@@ -280,7 +399,27 @@ sub _new_Exon_from_hashRef {
sub _new_Exon{
my ($self, $start, $end, $strand, $phase, $score, $pvalue, $contig) = @_;
my $exon = Bio::EnsEMBL::Exon->new();
$exon->start( $start);
$exon->end( $end );
$exon->strand( $strand );
$exon->phase( $phase );
$exon->contig( $contig );
$exon->attach_seq( $contig->seq() );
$exon->ori_start( $start );
$exon->ori_end( $end );
$exon->ori_strand( $strand );
# does exon not have score?
$exon->score( $score );
$exon->p_value( $pvalue );
return $exon;
}
=head2 store
......
This diff is collapsed.
......@@ -12,9 +12,9 @@ use vars qw(@ISA);
@ISA = ('Bio::EnsEMBL::DBSQL::BaseAdaptor');
sub fetch_by_RawContig {
my( $self, $contig ) = @_;
my( $self, $contig, $logic_name ) = @_;
my @repeats = $self->fetch_by_contig_id($contig->dbID);
my @repeats = $self->fetch_by_contig_id($contig->dbID, $logic_name);
foreach my $r (@repeats) {
$r->attach_seq($contig);
}
......@@ -22,11 +22,15 @@ sub fetch_by_RawContig {
}
sub fetch_by_contig_id {
my( $self, $contig_id ) = @_;
my( $self, $contig_id, $logic_name ) = @_;
my $constraint = "contig_id = $contig_id";
return $self->_generic_fetch(
qq{ contig_id = $contig_id }
);
if($logic_name){
my $analysis = $self->db->get_AnalysisAdaptor->fetch_by_logic_name($logic_name);
$constraint .= " AND analysis_id = ".$analysis->dbID;
}
return $self->_generic_fetch($constraint);
}
sub fetch_by_dbID {
......@@ -38,40 +42,9 @@ sub fetch_by_dbID {
return $rf;
}
sub fetch_by_logic_name {
my( $self, $logic_name ) = @_;
my $analysis = $self->db->get_AnalysisAdaptor->fetch_by_newest_logic_name($logic_name);
my $db_id = $analysis->dbID;
my @rf = $self->_generic_fetch(
qq{ analysis_id = $db_id }
);
return @rf;
}
sub fetch_by_logic_name_and_contig_id {
my( $self, $logic_name, $contig_id ) = @_;
my $analysis = $self->db->get_AnalysisAdaptor->fetch_by_newest_logic_name($logic_name);
my $db_id = $analysis->dbID;
my @rf = $self->_generic_fetch(
qq{ analysis_id = $db_id and contig_id = $contig_id }
);
return @rf;
}
sub fetch_by_logic_name_and_RawContig {
my( $self, $contig, $logic_name ) = @_;
my @repeats = $self->fetch_by_logic_name_and_contig_id($logic_name, $contig->dbID);
foreach my $r (@repeats) {
$r->attach_seq($contig);
}
return @repeats;
}
sub _generic_fetch {
my( $self, $where_clause ) = @_;
......@@ -119,33 +92,120 @@ sub _generic_fetch {
my( @repeats, %analysis_cache );
while ($sth->fetch) {
# new in RepeatFeature takes no arguments
my $r = Bio::EnsEMBL::RepeatFeature->new;
$r->dbID($repeat_feature_id);
# So RepeatFeature can get its repeat
$r->repeat_consensus_adaptor($rca);
$r->repeat_id($repeat_id);
$r->contig_id( $contig_id );
$r->start ( $contig_start );
$r->end ( $contig_end );
$r->strand ( $contig_strand );
$r->hstart ( $repeat_start );
$r->hend ( $repeat_end );
my( $ana_obj );
unless ($ana_obj = $analysis_cache{$analysis_id}) {
$ana_obj = $aa->fetch_by_dbID($analysis_id)
or $self->throw("No analysis object for ID '$analysis_id'");
$analysis_cache{$analysis_id} = $ana_obj;
}
$r->analysis($ana_obj);
push(@repeats, $r);
my( $ana_obj );
unless ($ana_obj = $analysis_cache{$analysis_id}) {
$ana_obj = $aa->fetch_by_dbID($analysis_id)
or $self->throw("No analysis object for ID '$analysis_id'");
$analysis_cache{$analysis_id} = $ana_obj;
}
my $r = $self->_new_repeat($contig_start, $contig_end, $contig_strand, $repeat_start, $repeat_end, $ana_obj, $contig_id, $repeat_id, $rca, $repeat_feature_id);
push(@repeats, $r);
}
return( @repeats );
}
sub fetch_by_assembly_location{
my ($self,$start,$end,$chr,$type, $logic_name) = @_;
my $constraint;
if($logic_name){
my $analysis = $self->db->get_AnalysisAdaptor->fetch_by_logic_name($logic_name);
$constraint = " analysis_id = ".$analysis->dbID;
}
my @repeats = $self->fetch_by_assembly_location_constraint($start, $end, $chr, $type, $constraint);
return @repeats;
}
sub fetch_by_Slice{
my ($self, $slice, $logic_name) = @_;
my $constraint;
if($logic_name){
my $analysis = $self->db->get_AnalysisAdaptor->fetch_by_logic_name($logic_name);
$constraint = " analysis_id = ".$analysis->dbID;
}
my @repeats = $self->fetch_by_assembly_location_constraint($slice->chr_start, $slice->chr_end, $slice->chr_name, $slice->assembly_type, $constraint);
my @out;
foreach my $r(@repeats){
my $start = ($r->start - ($slice->chr_start - 1));
my $end = ($r->end - ($slice->chr_start - 1));
my $repeat = $self->_new_repeat($start, $end, $r->strand, $r->hstart, $r->hend, $r->analysis, $r->contig_id, $r->repeat_id, $r->repeat_consensus_adaptor, $r->dbID);
push(@out, $repeat);
}
return(@out);
}
sub fetch_by_assembly_location_constraint{
my ($self,$chr_start,$chr_end,$chr,$type,$constraint) = @_;
if( !defined $type ) {
$self->throw("Assembly location must be start,end,chr,type");
}
if( $chr_start !~ /^\d/ || $chr_end !~ /^\d/ ) {
$self->throw("start/end must be numbers not $chr_start,$chr_end (have you typed the location in the right way around - start,end,chromosome,type)?");
}
my $mapper = $self->db->get_AssemblyMapperAdaptor->fetch_by_type($type);
$mapper->register_region($chr,$chr_start,$chr_end);
my @cids = $mapper->list_contig_ids($chr, $chr_start ,$chr_end);
my %ana;
my $cid_list = join(',',@cids);
my $sql = "contig_id in($cid_list) ";
if($constraint){
$sql .= "AND $constraint";
}
my @repeats = $self->_generic_fetch(qq{$sql});
my @out;
foreach my $r(@repeats){
my $analysis_id = $r->analysis->dbID();
my @coord_list = $mapper->map_coordinates_to_assembly($r->contig_id, $r->start, $r->end, $r->strand, "rawcontig");
if(scalar(@coord_list) > 1){
#$self->warn("this feature doesn't cleanly map skipping\n");
next;
}
if($coord_list[0]->isa("Bio::EnsEMBL::Mapper::Gap")){
#$self->warn("this feature is on a part of ".$r->contig_id." which isn't on the golden path skipping");
next;
}
if(!($coord_list[0]->start >= $chr_start) ||
!($coord_list[0]->end <= $chr_end)){
next;
}
my $repeat = $self->_new_repeat($coord_list[0]->start, $coord_list[0]->end, $coord_list[0]->strand, $r->hstart, $r->hend, $r->analysis, $r->contig_id, $r->repeat_id, $r->repeat_consensus_adaptor, $r->dbID);
push(@out, $repeat);
}
return @out;
}
sub store {
my( $self, $contig_id, @repeats ) = @_;
......@@ -228,6 +288,35 @@ sub store {
or $self->throw("Didn't get an insertid from the INSERT statement");
$rf->dbID($db_id);
}
}
sub _new_repeat{
my($self, $start, $end, $strand, $hstart, $hend, $analysis, $contig_id, $repeat_id, $rca, $dbID) = @_;
my $r = Bio::EnsEMBL::RepeatFeature->new;
$r->dbID($dbID);
# So RepeatFeature can get its repeat
$r->repeat_consensus_adaptor($rca);
$r->repeat_id($repeat_id);
$r->contig_id( $contig_id);
$r->start ( $start );
$r->end ( $end );
if($strand == -0){
$strand = 0;
}
$r->strand ( $strand );
$r->hstart ( $hstart );
$r->hend ( $hend );
$r->analysis($analysis);
return $r;
}
1;
......
......@@ -74,24 +74,17 @@ sub fetch_by_dbID{
$self->throw("No simple feature with id $id");
}
my $contig = $self->db->get_RawContigAdaptor->fetch_by_dbID($contig_id);
my $out = Bio::EnsEMBL::SimpleFeature->new();
my $contig = $self->db->get_RawContigAdaptor->fetch_by_dbID($contig_id);
my $ana = $self->db->get_AnalysisAdaptor->fetch_by_dbID($analysis_id);
$out->start($start);
$out->end($end);
$out->strand($strand);
my $out = $self->_new_feature($start, $end, $strand, $display, $ana, $contig->id, $contig->seq);
$out->display_label($display);
$out->seqname($contig->id);
$out->attach_seq($contig->seq);
return $out;
}
=head2 fetch_by_contig_id
=head2 fetch_by_contig_id_constraint
Title : fetch_by_contig_id
Title : fetch_by_contig_id_constraint
Usage :
Function:
Example :
......@@ -101,55 +94,18 @@ sub fetch_by_dbID{
=cut
sub fetch_by_contig_id{
my ($self,$cid) = @_;
sub fetch_by_contig_id_constraint{
my ($self,$cid, $constraint) = @_;
if( !defined $cid ) {
$self->throw("fetch_by_contig_id must have an contig id");
}
my $sth = $self->prepare("select s.contig_id,s.contig_start,s.contig_end,s.contig_strand,s.display_label,s.analysis_id from simple_feature s where s.contig_id = $cid");
$sth->execute();
my ($contig_id,$start,$end,$strand,$display,$analysis_id);
$sth->bind_columns(undef,\$contig_id,\$start,\$end,\$strand,\$display,\$analysis_id);
my @f;
my $contig = $self->db->get_RawContigAdaptor->fetch_by_dbID($cid);
my %ana;
while( $sth->fetch ) {
if( !defined $ana{$analysis_id} ) {
$ana{$analysis_id} = $self->db->get_AnalysisAdaptor->fetch_by_dbID($analysis_id);
}
my $out = Bio::EnsEMBL::SimpleFeature->new();
$out->start($start);
$out->end($end);
$out->strand($strand);
$out->analysis($ana{$analysis_id});
$out->display_label($display);
$out->seqname($contig->name);
$out->attach_seq($contig->seq);
push(@f,$out);
}
return @f;
}
sub fetch_by_contig_id_and_logic_name{
my ($self,$cid, $logic_name) = @_;
print $cid." ".$logic_name."\n";
if( !defined $cid ) {
$self->throw("fetch_by_contig_id must have an contig id");
my $sql = "select s.contig_id,s.contig_start,s.contig_end,s.contig_strand,s.display_label,s.analysis_id from simple_feature s where s.contig_id = $cid";
if($constraint){
$sql .= " and $constraint";
}
if(!$logic_name){
$self->throw("need to have a logic_name to fetch by logic_name\n");
}
my $sth = $self->prepare("select s.contig_id,s.contig_start,s.contig_end,s.contig_strand,s.display_label,s.analysis_id from simple_feature s, analysis a where s.contig_id = $cid and s.analysis_id = a.analysis_id and a.logic_name = '$logic_name'");
my $sth = $self->prepare($sql);
$sth->execute();
my ($contig_id,$start,$end,$strand,$display,$analysis_id);
......@@ -165,85 +121,31 @@ sub fetch_by_contig_id_and_logic_name{
$ana{$analysis_id} = $self->db->get_AnalysisAdaptor->fetch_by_dbID($analysis_id);
}
my $out = Bio::EnsEMBL::SimpleFeature->new();
$out->start($start);
$out->end($end);
$out->strand($strand);
$out->analysis($ana{$analysis_id});
$out->display_label($display);
$out->seqname($contig->name);
$out->attach_seq($contig->seq);
my $out = $self->_new_feature($start, $end, $strand, $display, $ana{$analysis_id}, $contig->dbID, $contig->seq);
push(@f,$out);
}
return @f;
}
sub fetch_by_assembly_location_and_logic_name{
my ($self,$start,$end,$chr,$type, $logic_name) = @_;
if( !defined $type ) {
$self->throw("Assembly location must be start,end,chr,type");
}
if( $start !~ /^\d/ || $end !~ /^\d/ ) {
$self->throw("start/end must be numbers not $start,$end (have you typed the location in the right way around - start,end,chromosome,type");
}
my $mapper = $self->db->get_AssemblyMapperAdaptor->fetch_by_type($type);
$mapper->register_region($chr,$start,$end);
my @cids = $mapper->list_contig_ids($chr,$start,$end);
print "have ".scalar(@cids)." contig ids\n";
# build the SQL
my $cid_list = join(', ',@cids);
print $cid_list."\n";
print "select p.contig_id,p.contig_start,p.contig_end,p.contig_strand,p.analysis_id, p.score from simple_feature p, analysis a where p.contig_id in (".$cid_list.") and a.analysis_id = p.analysis_id and a.logic_name = '".$logic_name."'\n";
my $sth = $self->prepare("select p.contig_id,p.contig_start,p.contig_end,p.contig_strand,p.analysis_id, p.score from simple_feature p, analysis a where p.contig_id in ($cid_list) and a.analysis_id = p.analysis_id and a.logic_name = '$logic_name'");
$sth->execute();
my ($contig_id,$start,$end,$strand,$display,$analysis_id);
$sth->bind_columns(undef,\$contig_id,\$start,\$end,\$strand,\$display,\$analysis_id);
my @f;
my %ana;
while( $sth->fetch ) {
# we whether this is sensible to use or not
my @coord_list = $mapper->map_coordinates_to_assembly($contig_id, $start,$end,$strand);
# coord list > 1 - means does not cleanly map. At the moment, skip
if( scalar(@coord_list) > 1 ) {
next;
}
if( !defined $ana{$analysis_id} ) {
$ana{$analysis_id} = $self->db->get_AnalysisAdaptor->fetch_by_dbID($analysis_id);
}
# ok, ready to build a sequence feature: do we want this relative or not?
my $out = Bio::EnsEMBL::SimpleFeature->new();
$out->start($coord_list[0]->start);
$out->end($coord_list[0]->end);
$out->seqname($coord_list[0]->id);
$out->strand($coord_list[0]->strand);
$out->analysis($ana{$analysis_id});
$out->display_label($display);
push(@f,$out);
}
sub fetch_by_contig_id{
my($self, $cid, $logic_name) = @_;
my $constraint;
return @f;
if($logic_name){
my $analysis = $self->db->get_AnalysisAdaptor->fetch_by_logic_name($logic_name);
$constraint .= " s.analysis_id = ".$analysis->dbID;
}
my @results = $self->fetch_by_contig_id_constraint($cid, $constraint);
return @results;
}
=head2 fetch_by_assembly_location
Title : fetch_by_assembly_location
......@@ -256,26 +158,29 @@ sub fetch_by_assembly_location_and_logic_name{
=cut
sub fetch_by_assembly_location{
my ($self,$start,$end,$chr,$type) = @_;
sub fetch_by_assembly_location_constraint{
my ($self,$chr_start,$chr_end,$chr,$type, $constraint) = @_;
if( !defined $type ) {
$self->throw("Assembly location must be start,end,chr,type");
}
if( $start !~ /^\d/ || $end !~ /^\d/ ) {
$self->throw("start/end must be numbers not $start,$end (have you typed the location in the right way around - start,end,chromosome,type");
if( $chr_start !~ /^\d/ || $chr_end !~ /^\d/ ) {
$self->throw("start/end must be numbers not $chr_start,$chr_end (have you typed the location in the right way around - start,end,chromosome,type");
}
my $mapper = $self->db->get_AssemblyMapperAdaptor->fetch_by_type($type);
$mapper->register_region($chr, $start,$end);
my @cids = $mapper->list_contig_ids($chr, $start,$end);
$mapper->register_region($chr, $chr_start,$chr_end);
my @cids = $mapper->list_contig_ids($chr, $chr_start,$chr_end);
# build the SQL
my $cid_list = join(',',@cids);
my $sth = $self->prepare("select s.contig_id,s.contig_start,s.contig_end,s.contig_strand,s.display_label,s.analysis_id from simple_feature s where s.contig_id in ($cid_list)");
my $sql = "select s.contig_id,s.contig_start,s.contig_end,s.contig_strand,s.display_label,s.analysis_id from simple_feature s where s.contig_id in ($cid_list)";
if($constraint){
$sql .= " AND $constraint";
}
my $sth = $self->prepare($sql);
$sth->execute();
my ($contig_id,$start,$end,$strand,$display,$analysis_id);
......@@ -302,15 +207,15 @@ sub fetch_by_assembly_location{
#$counter++;
next;
}
if(!($coord_list[0]->start >= $chr_start) ||
!($coord_list[0]->end <= $chr_end)) {
next;
}
# ok, ready to build a sequence feature: do we want this relative or not?
my $out = Bio::EnsEMBL::SimpleFeature->new();
$out->start($coord_list[0]->start);
$out->end($coord_list[0]->end);
$out->seqname($coord_list[0]->id);
$out->strand($coord_list[0]->strand);
$out->analysis($ana{$analysis_id});
$out->display_label($display);
my $slice_a = $self->db->get_SliceAdaptor;
my $slice = $slice_a->new_slice($chr, $chr_start, $chr_end, $type);
my $out = $self->_new_feature($coord_list[0]->start, $coord_list[0]->end, $coord_list[0]->strand, $display, $ana{$analysis_id}, $coord_list[0]->id, undef);
push(@f,$out);
}
......@@ -319,6 +224,51 @@ sub fetch_by_assembly_location{
}
sub fetch_by_assembly_location{
my ($self,$chr_start,$chr_end,$chr,$type, $logic_name) = @_;
my $constraint;
my $analysis;
if($logic_name){
my $aa = $self->db->get_AnalysisAdaptor();
$analysis = $aa->fetch_by_logic_name($logic_name);
$constraint .= " s.analysis_id = ".$analysis->dbID();
}
my @results = $self->fetch_by_assembly_location_constraint($chr_start,$chr_end,$chr,$type, $constraint);
return @results;
}
sub fetch_by_Slice{
my($self, $slice, $logic_name) = @_;
my $constraint;
my $analysis;
if($logic_name){
my $aa = $self->db->get_AnalysisAdaptor();
$analysis = $aa->fetch_by_logic_name($logic_name);
$constraint .= " s.analysis_id = ".$analysis->dbID();
}
my @results = $self->fetch_by_assembly_location_constraint($slice->chr_start,$slice->chr_end,$slice->chr_name,$slice->assembly_type, $constraint);
my @out;
foreach my $s(@results){
my $start = ($s->start - ($slice->chr_start - 1));
my $end = ($s->end - ($slice->chr_start - 1));
my $simple = $self->_new_feature($start, $end, $s->strand, $s->display_label, $s->analysis, $s->seqname, undef);
push(@out, $simple);
}
return @out;
}
=head2 store
Title : store
......@@ -365,4 +315,21 @@ sub store{
sub _new_feature{
my ($self, $start, $end, $strand, $display, $analysis, $seqname, $seq) = @_;
my $out = Bio::EnsEMBL::SimpleFeature->new();
$out->start($start);
$out->end($end);
$out->strand($strand);
$out->analysis($analysis);
$out->display_label($display);
$out->seqname($seqname);
if($seq){
$out->attach_seq($seq);
}
return $out;
}
1;
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment