From f7805e02580755f646fa1d678c20f8cb4d720cf3 Mon Sep 17 00:00:00 2001
From: Andrew Yates <ayates@ebi.ac.uk>
Date: Wed, 20 Mar 2013 17:18:57 +0000
Subject: [PATCH] removing the adding of species if they are in the
 force_speices list. unintented side effect of limiting it to just those force
 species

---
 modules/Bio/EnsEMBL/DBSQL/SequenceAdaptor.pm  | 77 +++++++++++--------
 .../Pipeline/FASTA/ReuseSpeciesFactory.pm     | 14 +---
 modules/t/sequenceAdaptor.t                   | 12 ++-
 3 files changed, 56 insertions(+), 47 deletions(-)

diff --git a/modules/Bio/EnsEMBL/DBSQL/SequenceAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/SequenceAdaptor.pm
index 8c9c192b6e..f59f61ceb6 100644
--- a/modules/Bio/EnsEMBL/DBSQL/SequenceAdaptor.pm
+++ b/modules/Bio/EnsEMBL/DBSQL/SequenceAdaptor.pm
@@ -39,7 +39,7 @@ An adaptor for the retrieval of DNA sequence from the EnsEMBL database
 
 package Bio::EnsEMBL::DBSQL::SequenceAdaptor;
 
-use vars qw(@ISA @EXPORT);
+# use vars qw(@ISA @EXPORT);
 use strict;
 use warnings;
 
@@ -49,13 +49,14 @@ use Bio::EnsEMBL::Utils::Sequence  qw(reverse_comp);
 use Bio::EnsEMBL::Utils::Cache;
 use Bio::EnsEMBL::Utils::Scalar qw( assert_ref );
 
-@ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor);
+# @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor Bio::EnsEMBL::DBSQL::BaseSequenceAdaptor);
+use base qw(Bio::EnsEMBL::DBSQL::BaseAdaptor Bio::EnsEMBL::DBSQL::BaseSequenceAdaptor);
+# 
+# our $SEQ_CHUNK_PWR   = 18; # 2^18 = approx. 250KB
+# our $SEQ_CACHE_SZ    = 5;
+# our $SEQ_CACHE_MAX   = (2 ** $SEQ_CHUNK_PWR) * $SEQ_CACHE_SZ;
 
-our $SEQ_CHUNK_PWR   = 18; # 2^18 = approx. 250KB
-our $SEQ_CACHE_SZ    = 5;
-our $SEQ_CACHE_MAX   = (2 ** $SEQ_CHUNK_PWR) * $SEQ_CACHE_SZ;
-
-@EXPORT = (@{$DBI::EXPORT_TAGS{'sql_types'}});
+our @EXPORT = (@{$DBI::EXPORT_TAGS{'sql_types'}});
 
 =head2 new
 
@@ -72,16 +73,15 @@ our $SEQ_CACHE_MAX   = (2 ** $SEQ_CHUNK_PWR) * $SEQ_CACHE_SZ;
 
 sub new {
   my $caller = shift;
-
   my $class = ref($caller) || $caller;
-
   my $self = $class->SUPER::new(@_);
+  $self->_init_seq_instance();
 
-  # use an LRU cache to limit the size
-  my %seq_cache;
-  tie(%seq_cache, 'Bio::EnsEMBL::Utils::Cache', $SEQ_CACHE_SZ);
-
-  $self->{'seq_cache'} = \%seq_cache;
+  # # use an LRU cache to limit the size
+  # my %seq_cache;
+  # tie(%seq_cache, 'Bio::EnsEMBL::Utils::Cache', $SEQ_CACHE_SZ);
+  # 
+  # $self->{'seq_cache'} = \%seq_cache;
 
 
 #
@@ -107,22 +107,22 @@ sub new {
   
   return $self;
 }
-
-=head2 clear_cache
-
-  Example			: $sa->clear_cache();
-  Description	: Removes all entries from the associcated sequence cache
-  Returntype 	: None
-  Exceptions 	: None
-
-=cut
-
-sub clear_cache {
-  my ($self) = @_;
-  %{$self->{seq_cache}} = ();
-  return;
-}
-
+# 
+# =head2 clear_cache
+# 
+#   Example     : $sa->clear_cache();
+#   Description : Removes all entries from the associcated sequence cache
+#   Returntype  : None
+#   Exceptions  : None
+# 
+# =cut
+# 
+# sub clear_cache {
+#   my ($self) = @_;
+#   %{$self->{seq_cache}} = ();
+#   return;
+# }
+# 
 
 =head2 fetch_by_Slice_start_end_strand
 
@@ -442,7 +442,7 @@ sub _rna_edit {
   return;
 }
 
-
+=cut
 sub _fetch_seq {
   my $self          = shift;
   my $seq_region_id = shift;
@@ -519,6 +519,21 @@ sub _fetch_seq {
     return \$tmp_seq;
   }
 }
+=cut
+sub _fetch_raw_seq {
+  my ($self, $id, $start, $length) = @_;
+  my $sql = <<'SQL';
+SELECT UPPER(SUBSTRING(d.sequence, ?, ?))
+FROM dna d
+WHERE d.seq_region_id =?
+SQL
+  my $seq = $self->dbc()->sql_helper()->execute_single_result(
+    -SQL => $sql, 
+    -PARAMS => [[$start, SQL_INTEGER], [$length, SQL_INTEGER], [$id, SQL_INTEGER]],
+    -NO_ERROR => 1
+  );
+  return \$seq;
+}
 
 
 =head2 store
diff --git a/modules/Bio/EnsEMBL/Pipeline/FASTA/ReuseSpeciesFactory.pm b/modules/Bio/EnsEMBL/Pipeline/FASTA/ReuseSpeciesFactory.pm
index 2dd69c4f90..2302627832 100644
--- a/modules/Bio/EnsEMBL/Pipeline/FASTA/ReuseSpeciesFactory.pm
+++ b/modules/Bio/EnsEMBL/Pipeline/FASTA/ReuseSpeciesFactory.pm
@@ -39,7 +39,7 @@ Allowed parameters are:
 =item force_species - Specify species we want to redump even though 
                       our queries of production could say otherwise
 
-=item run_all - Do not check a thing. Override and run every dump
+=item run_all - Do not check a thing. Schedule every species
 
 =back
 
@@ -72,7 +72,7 @@ sub param_defaults {
     %{$self->SUPER::param_defaults()},
     
     force_species => [],
-    force_all_species => 1,
+    run_all => 1,
   };
   return $p;
 }
@@ -94,16 +94,6 @@ sub fetch_input {
   return;
 }
 
-#Allows a user to use '-force_species human' rather than '-force_species human -species human'
-sub _add_force_species_to_species {
-  my ($self) = @_;
-  my $force_species = $self->param('force_species');
-  my $species = $self->param('species');
-  my %final_species = map { $_ => 1 } (@{$force_species}, @{$species});
-  $self->param('species', [keys %final_species]);
-  return;
-}
-
 sub dna_flow {
   my ($self, $dba) = @_;
   my $parent_flow = $self->SUPER::dna_flow($dba);
diff --git a/modules/t/sequenceAdaptor.t b/modules/t/sequenceAdaptor.t
index d72abfaabc..67868944c0 100644
--- a/modules/t/sequenceAdaptor.t
+++ b/modules/t/sequenceAdaptor.t
@@ -30,6 +30,10 @@ my $seq_adaptor = $db->get_SequenceAdaptor();
 my $slice = $slice_adaptor->fetch_by_region('chromosome', $CHR, $START, $END);
 compare_compliments($slice, $seq_adaptor);
 
+#Bigger than 1Mb
+$slice = $slice_adaptor->fetch_by_region('chromosome', $CHR, $START, $START+2_000_000);
+compare_compliments($slice, $seq_adaptor);
+
 $slice = $slice_adaptor->fetch_by_region('clone','AL031658.11');
 compare_compliments($slice, $seq_adaptor);
 
@@ -47,20 +51,20 @@ sub compare_compliments {
   my $seq = ${$seq_adaptor->fetch_by_Slice_start_end_strand($slice,1,undef,1)};
 
   debug('FORWARD STRAND SLICE SEQ for ' . $slice->name());
-  debug($seq);
+  debug($slice->length);
 
   my $invert_seq = 
     ${$seq_adaptor->fetch_by_Slice_start_end_strand($slice->invert,1,undef,1)};
 
   debug('REVERSE STRAND SLICE SEQ for ' . $slice->name());
-  debug($invert_seq);
+  
 
-  ok(length($seq) == $slice->length); #sequence is correct length
+  is(length($seq), $slice->length, 'sequence is correct length');
 
   $seq = reverse $seq;  #reverse complement seq
   $seq =~ tr/ACTG/TGAC/;
 
-  ok($seq eq $invert_seq); #revcom same as seq on inverted slice
+  ok($seq eq $invert_seq, 'revcom same as seq on inverted slice');
 }
 
 done_testing();
-- 
GitLab