From 260001a7dc60ef5e3aa4d18ba3f9903a7ce7c86f Mon Sep 17 00:00:00 2001
From: Nathan Johnson <njohnson@ebi.ac.uk>
Date: Fri, 13 May 2011 08:52:13 +0000
Subject: [PATCH] added fetch_Iterator_by_Slice_method

---
 .../Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm   | 117 +++++++++++++++++-
 1 file changed, 116 insertions(+), 1 deletion(-)

diff --git a/modules/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm
index aef74058f8..5ec5323b41 100644
--- a/modules/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm
+++ b/modules/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm
@@ -151,6 +151,114 @@ sub fetch_all_by_Slice {
 }
 
 
+
+=head2 fetch_Iterator_by_Slice_method
+
+  Arg [1]    : CODE ref of Slice fetch method
+  Arg [2]    : ARRAY ref of parameters for Slice fetch method
+  Arg [3]    : Optional int: Slice index in parameters array
+  Arg [4]    : Optional int: Slice chunk size. Default=500000
+  Example    : my $slice_iter = $feature_adaptor->fetch_Iterator_by_Slice_method
+                               	      ($feature_adaptor->can('fetch_all_by_Slice_Arrays'),
+	                                   \@fetch_method_params,
+	                                   0,#Slice idx
+	                                   #500 #chunk length
+	                                  );
+
+               while(my $feature = $slice_iter->next && defined $feature){
+                 #Do something here
+               }
+
+  Description: Creates an Iterator which chunks the query Slice to facilitate
+               large Slice queries which would have previously run out of memory
+  Returntype : Bio::EnsEMBL::Utils::Iterator
+  Exceptions : Throws if mandatory params not valid
+  Caller     : general
+  Status     : at risk
+
+=cut
+
+sub fetch_Iterator_by_Slice_method{
+  my ($self, $slice_method_ref, $params_ref, $slice_idx, $chunk_size) = @_;
+
+  if(! ( defined $slice_method_ref &&
+		 ref($slice_method_ref) eq 'CODE')
+	){
+	throw('Must pass a valid Slice fetch method CODE ref');
+  }
+
+  if (! ($params_ref && 
+		 ref($params_ref) eq 'ARRAY')) {
+	#Don't need to check size here so long as we have valid Slice
+	throw('You must pass a method params ARRAYREF');
+  }
+  
+  $slice_idx    = 0 if(! defined $slice_idx);
+  my $slice     = $params_ref->[$slice_idx];
+  $chunk_size ||= 1000000;
+		
+  my @feat_cache;
+  my $finished     = 0;
+  my $start        = 1;	#local coord for sub slice
+  my $end          = $slice->length;
+  my $num_overlaps = 0;
+  
+  my $coderef = 
+	sub {
+	  
+	  while (scalar(@feat_cache) == 0 &&
+			 ! $finished) {
+		
+		my $new_end = $start + $chunk_size;
+		
+		if ($new_end >= $end) {
+		  # this is our last chunk
+		  $new_end = $end;
+		  $finished = 1;  
+		}
+		
+		#Chunk by sub slicing
+		my $sub_slice             = $slice->sub_Slice($start, $new_end);
+		$params_ref->[$slice_idx] = $sub_slice;
+		@feat_cache = @{ $slice_method_ref->($self, @$params_ref)};
+		
+		
+		#Remove & count overlapping features
+		splice(@feat_cache, 0, $num_overlaps) if($num_overlaps);
+		my $i;
+		
+		if (scalar(@feat_cache) > 0) {
+		  
+		  my $feat_end  = $feat_cache[$#feat_cache]->end;
+		  my $slice_end = $sub_slice->end;
+		  $num_overlaps = 0;
+		  
+		  for ($i = $#feat_cache; $i >=0; $i--) {
+			
+			if ($feat_end > $slice_end) {
+			  $feat_end  = $feat_cache[$i]->end;
+			  $num_overlaps ++;
+			} else {
+			  last;
+			}
+			
+		  }
+		}
+		
+		# update the start coordinate
+		$start = $new_end + 1;
+			}
+	  
+	  #this maybe returning from an undef cache
+	  #Need to sub this out even more?
+	  
+	  return shift @feat_cache;
+	};
+
+  return Bio::EnsEMBL::Utils::Iterator->new($coderef);
+}
+
+
 =head2 fetch_all_by_Slice_and_score
 
   Arg [1]    : Bio::EnsEMBL::Slice $slice
@@ -216,6 +324,7 @@ sub fetch_all_by_Slice_and_score {
 sub fetch_all_by_Slice_constraint {
   my ( $self, $slice, $constraint, $logic_name ) = @_;
 
+
   my @result = ();
 
   if ( !ref($slice)
@@ -275,6 +384,8 @@ sub fetch_all_by_Slice_constraint {
   # Hap/PAR support: retrieve normalized 'non-symlinked' slices.
   my @proj = @{ $sa->fetch_normalized_slice_projection($slice) };
 
+
+ 
   if ( !@proj ) {
     throw( 'Could not retrieve normalized Slices. '
          . 'Database contains incorrect assembly_exception information.'
@@ -309,12 +420,15 @@ sub fetch_all_by_Slice_constraint {
 
   @bounds = map { $_->from_start() - $slice->start() + 1 } @ent_proj;
 
+
   # fetch features for the primary slice AND all symlinked slices
   foreach my $seg (@proj) {
+
+
     my $offset    = $seg->from_start();
     my $seg_slice = $seg->to_Slice();
     my $features =
-      $self->_slice_fetch( $seg_slice, $constraint );    ## NO RESULTS
+      $self->_slice_fetch( $seg_slice, $constraint );
 
     # If this was a symlinked slice offset the feature coordinates as
     # needed.
@@ -492,6 +606,7 @@ sub _slice_fetch {
           " AND ${tab_syn}.seq_region_start >= $min_start";
       }
 
+	  
       my $fs = $self->generic_fetch($constraint,undef,$slice);
 
       # features may still have to have coordinates made relative to slice
-- 
GitLab