diff --git a/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm index 80a6ac250a837d4982742ed961030b29b13fc0d6..dfa1504f8edcb6eaac2c98dd798eb788dc95c85a 100644 --- a/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm @@ -1821,7 +1821,7 @@ sub fetch_all_by_exon_supporting_evidence { Description: Gets all the genes with transcripts with evidence for a specified hit on a particular type of feature. Optionally filter by analysis. - Returntype : Listref of Bio::EnsEMBL::Gene + Returntype : Listref of Bio::EnsEMBL::Gene. Exceptions : If feature_type is not of correct type. Caller : general Status : Stable @@ -1877,7 +1877,7 @@ sub fetch_all_by_transcript_supporting_evidence { Arg [1] : Feature object Example : $genes = $gene_adaptor->fetch_nearest_Gene_by_Feature($feat); Description: Gets the nearest gene to the feature - Returntype : Listref of Bio::EnsEMBL::Gene + Returntype : Listref of Bio::EnsEMBL::Gene, EMPTY list if no nearest Caller : general Status : UnStable @@ -1887,60 +1887,149 @@ sub fetch_nearest_Gene_by_Feature{ my $self = shift; my $feat = shift; - my $stranded = shift; + my $stranded = shift; + my $stream = shift; # 1 up stream -1 downstream + my @genes; - my $min_dist = 0; + + my $strand = $feat->strand; + if(defined($stream) and !$strand){ + warn("stream specified but feature has no strand so +ve strand will be used"); + $strand = 1; + } + my $min_dist = 999; my $gene_id = 0; my $overlapping = $feat->get_overlapping_Genes(); - return @{$overlapping}[0] if(defined(@{$overlapping}[0])); -# print "found overlapping\n"; - -# } -# else{ -# print "No overlapping trying nearest\n"; -# } + return $overlapping if(defined(@{$overlapping}[0])); my $seq_region_id = $feat->slice->adaptor->get_seq_region_id($feat->slice); my $start = ($feat->start + $feat->slice->start) -1; my $end = ($feat->end + $feat->slice->start) -1; - my $sql1 = "select g.gene_id, (? - g.seq_region_end) as 'dist' from gene g where "; - if($stranded){ - $sql1 .= "g.seq_region_strand = ".$feat->strand." and "; - } - $sql1 .= "seq_region_id = ? and g.seq_region_end < ? order by dist limit 1"; + my @gene_ids; + if(!defined($stream) or $stream == 0){ + + my $sql1 = "select g.gene_id, (? - g.seq_region_end) as 'dist' from gene g where "; + if($stranded){ + $sql1 .= "g.seq_region_strand = ".$strand." and "; + } + $sql1 .= "seq_region_id = ? and g.seq_region_end < ? order by dist limit 10"; + + # + # MAYBE set the result of prepare to be static in case lots of calls. + # + my $sql1_sth = $self->prepare($sql1) || die "Could not prepare $sql1"; + $sql1_sth->execute($start, $seq_region_id, $start) || die "Could not execute sql"; + $sql1_sth->bind_columns(\$gene_id, \$min_dist) || die "Could mot bin columns"; + + my $last_dist = 99999999999999999; + while($sql1_sth->fetch()){ + if($min_dist <= $last_dist){ + push @gene_ids, $gene_id; + $last_dist = $min_dist; + } + } + $sql1_sth->finish(); + + + + my $sql2 = "select g.gene_id, (g.seq_region_start - ?) as 'dist' from gene g where "; + if($stranded){ + $sql2 .= "g.seq_region_strand = ".$feat->strand." and "; + } + $sql2 .= "seq_region_id = ? and g.seq_region_start > ? order by dist limit 10"; + + my $sql2_sth = $self->prepare($sql2) || die "could not prepare $sql2"; + + my ($tmp_min_dist, $tmp_gene_id); + $sql2_sth->execute($end, $seq_region_id, $end) || die "Could not execute sql"; + $sql2_sth->bind_columns(\$tmp_gene_id, \$tmp_min_dist) || die "Could mot bin columns"; + my $first =1; + while($sql2_sth->fetch()){ + if( $tmp_min_dist <= $last_dist){ + if($first){ + $first = 0; + if($tmp_min_dist < $last_dist){ + @gene_ids = (); #reset + } + } + push @gene_ids, $tmp_gene_id; + $last_dist = $tmp_min_dist; + } + } + $sql2_sth->finish(); - my $sql1_sth = $self->prepare($sql1) || die "Could not prepare $sql1"; - $sql1_sth->execute($start, $seq_region_id, $start) || die "Could not execute sql"; - $sql1_sth->bind_columns(\$gene_id, \$min_dist) || die "Could mot bin columns"; - $sql1_sth->fetch() || die "Could not fetch result"; - $sql1_sth->finish(); - my $sql2 = "select g.gene_id, (g.seq_region_start - ?) as 'dist' from gene g where "; - if($stranded){ - $sql2 .= "g.seq_region_strand = ".$feat->strand." and "; + } + elsif(($stream*$strand) == 1){ + my $sql1 = "select g.gene_id, (? - g.seq_region_end) as 'dist' from gene g where "; + if($stranded){ + $sql1 .= "g.seq_region_strand = ".$strand." and "; + } + $sql1 .= "seq_region_id = ? and g.seq_region_end < ? order by dist limit 10"; + + # + # MAYBE set the result of prepare to be static in case lots of calls. + # + my $sql1_sth = $self->prepare($sql1) || die "Could not prepare $sql1"; + $sql1_sth->execute($start, $seq_region_id, $start) || die "Could not execute sql"; + $sql1_sth->bind_columns(\$gene_id, \$min_dist) || die "Could mot bin columns"; + + my $last_dist; + my $first = 1; + while($sql1_sth->fetch()){ + if($first){ + $first = 0; + } + else{ + next if ($min_dist > $last_dist); + } + push @gene_ids, $gene_id; + $last_dist = $min_dist; + } + $sql1_sth->finish(); + } + elsif(($stream * $strand) == -1){ + + my $sql2 = "select g.gene_id, (g.seq_region_start - ?) as 'dist' from gene g where "; + if($stranded){ + $sql2 .= "g.seq_region_strand = ".$feat->strand." and "; + } + $sql2 .= "seq_region_id = ? and g.seq_region_start > ? order by dist limit 10"; + + my $sql2_sth = $self->prepare($sql2) || die "could not prepare $sql2"; + + my ($tmp_min_dist, $tmp_gene_id); + $sql2_sth->execute($end, $seq_region_id, $end) || die "Could not execute sql"; + $sql2_sth->bind_columns(\$tmp_gene_id, \$tmp_min_dist) || die "Could mot bin columns"; + my $first =1; + my $last_dist; + while($sql2_sth->fetch()){ + if($first){ + $first = 0; + } + else{ + next if ($tmp_min_dist > $last_dist); + } + push @gene_ids, $tmp_gene_id; + $last_dist = $tmp_min_dist; + } + $sql2_sth->finish(); } - $sql2 .= "seq_region_id = ? and g.seq_region_start > ? order by dist limit 1"; - my $sql2_sth = $self->prepare($sql2) || die "could not prepare $sql2"; - - my ($tmp_min_dist, $tmp_gene_id); - $sql2_sth->execute($end, $seq_region_id, $end) || die "Could not execute sql"; - $sql2_sth->bind_columns(\$tmp_gene_id, \$tmp_min_dist) || die "Could mot bin columns"; - $sql2_sth->fetch() || die "Could not fetch result"; - $sql2_sth->finish(); - - if($tmp_min_dist < $min_dist || !$gene_id){ - $gene_id = $tmp_gene_id + else{ + die "Invalid stream or strand must be -1, 0 or 1\n"; } - - if($gene_id){ - return $self->fetch_by_dbID($gene_id); + + + + foreach my $gene_id (@gene_ids){ + push @genes, $self->fetch_by_dbID($gene_id); } + return \@genes; - return undef; } ########################## diff --git a/modules/Bio/EnsEMBL/Feature.pm b/modules/Bio/EnsEMBL/Feature.pm index b8de50b9712ed739b5db708086e4c9ba39a80fcd..49d9138c89bb1e024a9d70b6036d48047c80d6b8 100644 --- a/modules/Bio/EnsEMBL/Feature.pm +++ b/modules/Bio/EnsEMBL/Feature.pm @@ -1234,10 +1234,11 @@ sub get_overlapping_Genes{ sub get_nearest_Gene { my $self = shift; my $stranded = shift; + my $stream = shift; my $ga = Bio::EnsEMBL::Registry->get_adaptor($self->adaptor->db->species,"core","Gene"); - return $ga->fetch_nearest_Gene_by_Feature($self, $stranded); + return $ga->fetch_nearest_Gene_by_Feature($self, $stranded, $stream); }