From 4c9ef6521077c7eb4c62fa8f16e9cae735933782 Mon Sep 17 00:00:00 2001
From: Ian Longden <ianl@sanger.ac.uk>
Date: Fri, 7 May 2010 09:53:03 +0000
Subject: [PATCH] more changes to nearest gene to feature code

---
 modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm | 167 +++++++++++++++++------
 modules/Bio/EnsEMBL/Feature.pm           |   3 +-
 2 files changed, 130 insertions(+), 40 deletions(-)

diff --git a/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm
index 80a6ac250a..dfa1504f8e 100644
--- a/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm
+++ b/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm
@@ -1821,7 +1821,7 @@ sub fetch_all_by_exon_supporting_evidence {
   Description: Gets all the genes with transcripts with evidence for a
                specified hit on a particular type of feature. Optionally filter
                by analysis.
-  Returntype : Listref of Bio::EnsEMBL::Gene
+  Returntype : Listref of Bio::EnsEMBL::Gene.
   Exceptions : If feature_type is not of correct type.
   Caller     : general
   Status     : Stable
@@ -1877,7 +1877,7 @@ sub fetch_all_by_transcript_supporting_evidence {
   Arg [1]    : Feature object
   Example    : $genes = $gene_adaptor->fetch_nearest_Gene_by_Feature($feat);
   Description: Gets the nearest gene to the feature 
-  Returntype : Listref of Bio::EnsEMBL::Gene
+  Returntype : Listref of Bio::EnsEMBL::Gene, EMPTY list if no nearest
   Caller     : general
   Status     : UnStable
 
@@ -1887,60 +1887,149 @@ sub fetch_nearest_Gene_by_Feature{
   my $self = shift;
   my $feat = shift;
 
-  my $stranded = shift;
+  my $stranded = shift; 
+  my $stream = shift;  # 1 up stream -1 downstream
+  my @genes;
 
-  my $min_dist = 0;
+
+  my $strand = $feat->strand;
+  if(defined($stream) and !$strand){
+    warn("stream specified but feature has no strand so +ve strand will be used");
+    $strand = 1;
+  }
+  my $min_dist = 999;
   my $gene_id = 0;
 
   my $overlapping = $feat->get_overlapping_Genes();
 
-  return @{$overlapping}[0]  if(defined(@{$overlapping}[0]));
-#    print "found overlapping\n";
-
-#  }
-#  else{
-#    print "No overlapping trying nearest\n";
-#  }
+  return $overlapping if(defined(@{$overlapping}[0]));
 
   my $seq_region_id = $feat->slice->adaptor->get_seq_region_id($feat->slice);
   my $start = ($feat->start + $feat->slice->start) -1;
   my $end   = ($feat->end   + $feat->slice->start) -1;
 
-  my $sql1 = "select g.gene_id, (? - g.seq_region_end)  as 'dist' from gene g where ";
-  if($stranded){
-    $sql1 .= "g.seq_region_strand = ".$feat->strand." and ";
-  }
-  $sql1 .= "seq_region_id = ? and g.seq_region_end < ? order by dist limit 1";
 
+  my @gene_ids;
+  if(!defined($stream) or $stream == 0){
+
+    my $sql1 = "select g.gene_id, (? - g.seq_region_end)  as 'dist' from gene g where ";
+    if($stranded){
+      $sql1 .= "g.seq_region_strand = ".$strand." and ";
+    }
+    $sql1 .= "seq_region_id = ? and g.seq_region_end < ? order by dist limit 10";
+    
+    #
+    # MAYBE set the result of prepare to be static in case lots of calls.
+    #
+    my $sql1_sth = $self->prepare($sql1) || die "Could not prepare $sql1";
+    $sql1_sth->execute($start, $seq_region_id, $start) || die "Could not execute sql";
+    $sql1_sth->bind_columns(\$gene_id, \$min_dist)     || die "Could mot bin columns";
+ 
+    my $last_dist = 99999999999999999;
+    while($sql1_sth->fetch()){
+      if($min_dist <= $last_dist){
+	push @gene_ids, $gene_id;
+	$last_dist = $min_dist;
+      }
+    }	
+    $sql1_sth->finish();
+    
+
+
+    my $sql2 = "select g.gene_id, (g.seq_region_start - ?)  as 'dist' from gene g  where ";
+    if($stranded){
+      $sql2 .= "g.seq_region_strand = ".$feat->strand." and ";
+    }
+    $sql2 .= "seq_region_id = ? and g.seq_region_start > ? order by dist limit 10";
+
+    my $sql2_sth = $self->prepare($sql2) || die "could not prepare $sql2";
+    
+    my ($tmp_min_dist, $tmp_gene_id);
+    $sql2_sth->execute($end, $seq_region_id, $end)             || die "Could not execute sql";
+    $sql2_sth->bind_columns(\$tmp_gene_id, \$tmp_min_dist)     || die "Could mot bin columns";
+    my $first =1;
+    while($sql2_sth->fetch()){
+      if( $tmp_min_dist <= $last_dist){
+	if($first){
+	  $first = 0;
+	  if($tmp_min_dist < $last_dist){
+	    @gene_ids = (); #reset
+	  }
+	}	
+	push @gene_ids, $tmp_gene_id;
+	$last_dist = $tmp_min_dist;
+      }
+    }
+    $sql2_sth->finish();
 
-  my $sql1_sth = $self->prepare($sql1) || die "Could not prepare $sql1";
-  $sql1_sth->execute($start, $seq_region_id, $start) || die "Could not execute sql";
-  $sql1_sth->bind_columns(\$gene_id, \$min_dist)     || die "Could mot bin columns";
-  $sql1_sth->fetch()                                 || die "Could not fetch result";
-  $sql1_sth->finish();
 
-  my $sql2 = "select g.gene_id, (g.seq_region_start - ?)  as 'dist' from gene g  where ";
-  if($stranded){
-    $sql2 .= "g.seq_region_strand = ".$feat->strand." and ";
+  }	
+  elsif(($stream*$strand) == 1){
+    my $sql1 = "select g.gene_id, (? - g.seq_region_end)  as 'dist' from gene g where ";
+    if($stranded){
+      $sql1 .= "g.seq_region_strand = ".$strand." and ";
+    }
+    $sql1 .= "seq_region_id = ? and g.seq_region_end < ? order by dist limit 10";
+    
+    #
+    # MAYBE set the result of prepare to be static in case lots of calls.
+    #
+    my $sql1_sth = $self->prepare($sql1) || die "Could not prepare $sql1";
+    $sql1_sth->execute($start, $seq_region_id, $start) || die "Could not execute sql";
+    $sql1_sth->bind_columns(\$gene_id, \$min_dist)     || die "Could mot bin columns";
+    
+    my $last_dist;
+    my $first = 1;
+    while($sql1_sth->fetch()){
+      if($first){
+	$first = 0;
+      }
+      else{
+	next if ($min_dist > $last_dist);
+      } 
+      push @gene_ids, $gene_id;
+      $last_dist = $min_dist;
+    }	
+    $sql1_sth->finish();
+  }	
+  elsif(($stream * $strand) == -1){
+
+    my $sql2 = "select g.gene_id, (g.seq_region_start - ?)  as 'dist' from gene g  where ";
+    if($stranded){
+      $sql2 .= "g.seq_region_strand = ".$feat->strand." and ";
+    }
+    $sql2 .= "seq_region_id = ? and g.seq_region_start > ? order by dist limit 10";
+    
+    my $sql2_sth = $self->prepare($sql2) || die "could not prepare $sql2";
+    
+    my ($tmp_min_dist, $tmp_gene_id);
+    $sql2_sth->execute($end, $seq_region_id, $end)             || die "Could not execute sql";
+    $sql2_sth->bind_columns(\$tmp_gene_id, \$tmp_min_dist)     || die "Could mot bin columns";
+    my $first =1;
+    my $last_dist;
+    while($sql2_sth->fetch()){
+      if($first){
+	$first = 0;	
+      }	
+      else{
+	next if ($tmp_min_dist > $last_dist);
+      }
+      push @gene_ids, $tmp_gene_id;
+      $last_dist = $tmp_min_dist;
+    }
+    $sql2_sth->finish();
   }
-  $sql2 .= "seq_region_id = ? and g.seq_region_start > ? order by dist limit 1";
-  my $sql2_sth = $self->prepare($sql2) || die "could not prepare $sql2";
-
-  my ($tmp_min_dist, $tmp_gene_id);
-  $sql2_sth->execute($end, $seq_region_id, $end)             || die "Could not execute sql";
-  $sql2_sth->bind_columns(\$tmp_gene_id, \$tmp_min_dist)     || die "Could mot bin columns";
-  $sql2_sth->fetch()                                         || die "Could not fetch result";
-  $sql2_sth->finish();
- 
-  if($tmp_min_dist < $min_dist || !$gene_id){
-    $gene_id = $tmp_gene_id
+  else{
+    die "Invalid stream or strand must be -1, 0 or 1\n";
   }
-  
-  if($gene_id){
-    return $self->fetch_by_dbID($gene_id);
+
+
+
+  foreach my $gene_id (@gene_ids){
+    push @genes, $self->fetch_by_dbID($gene_id);
   }
+  return \@genes;
 
-  return undef;
 }
 
 ##########################
diff --git a/modules/Bio/EnsEMBL/Feature.pm b/modules/Bio/EnsEMBL/Feature.pm
index b8de50b971..49d9138c89 100644
--- a/modules/Bio/EnsEMBL/Feature.pm
+++ b/modules/Bio/EnsEMBL/Feature.pm
@@ -1234,10 +1234,11 @@ sub get_overlapping_Genes{
 sub get_nearest_Gene {
   my $self = shift;
   my $stranded = shift;
+  my $stream = shift;
 
   my $ga = Bio::EnsEMBL::Registry->get_adaptor($self->adaptor->db->species,"core","Gene");
 
-  return $ga->fetch_nearest_Gene_by_Feature($self, $stranded);
+  return $ga->fetch_nearest_Gene_by_Feature($self, $stranded, $stream);
 
 }
 
-- 
GitLab