From f7805e02580755f646fa1d678c20f8cb4d720cf3 Mon Sep 17 00:00:00 2001 From: Andrew Yates <ayates@ebi.ac.uk> Date: Wed, 20 Mar 2013 17:18:57 +0000 Subject: [PATCH] removing the adding of species if they are in the force_speices list. unintented side effect of limiting it to just those force species --- modules/Bio/EnsEMBL/DBSQL/SequenceAdaptor.pm | 77 +++++++++++-------- .../Pipeline/FASTA/ReuseSpeciesFactory.pm | 14 +--- modules/t/sequenceAdaptor.t | 12 ++- 3 files changed, 56 insertions(+), 47 deletions(-) diff --git a/modules/Bio/EnsEMBL/DBSQL/SequenceAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/SequenceAdaptor.pm index 8c9c192b6e..f59f61ceb6 100644 --- a/modules/Bio/EnsEMBL/DBSQL/SequenceAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/SequenceAdaptor.pm @@ -39,7 +39,7 @@ An adaptor for the retrieval of DNA sequence from the EnsEMBL database package Bio::EnsEMBL::DBSQL::SequenceAdaptor; -use vars qw(@ISA @EXPORT); +# use vars qw(@ISA @EXPORT); use strict; use warnings; @@ -49,13 +49,14 @@ use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp); use Bio::EnsEMBL::Utils::Cache; use Bio::EnsEMBL::Utils::Scalar qw( assert_ref ); -@ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor); +# @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor Bio::EnsEMBL::DBSQL::BaseSequenceAdaptor); +use base qw(Bio::EnsEMBL::DBSQL::BaseAdaptor Bio::EnsEMBL::DBSQL::BaseSequenceAdaptor); +# +# our $SEQ_CHUNK_PWR = 18; # 2^18 = approx. 250KB +# our $SEQ_CACHE_SZ = 5; +# our $SEQ_CACHE_MAX = (2 ** $SEQ_CHUNK_PWR) * $SEQ_CACHE_SZ; -our $SEQ_CHUNK_PWR = 18; # 2^18 = approx. 250KB -our $SEQ_CACHE_SZ = 5; -our $SEQ_CACHE_MAX = (2 ** $SEQ_CHUNK_PWR) * $SEQ_CACHE_SZ; - -@EXPORT = (@{$DBI::EXPORT_TAGS{'sql_types'}}); +our @EXPORT = (@{$DBI::EXPORT_TAGS{'sql_types'}}); =head2 new @@ -72,16 +73,15 @@ our $SEQ_CACHE_MAX = (2 ** $SEQ_CHUNK_PWR) * $SEQ_CACHE_SZ; sub new { my $caller = shift; - my $class = ref($caller) || $caller; - my $self = $class->SUPER::new(@_); + $self->_init_seq_instance(); - # use an LRU cache to limit the size - my %seq_cache; - tie(%seq_cache, 'Bio::EnsEMBL::Utils::Cache', $SEQ_CACHE_SZ); - - $self->{'seq_cache'} = \%seq_cache; + # # use an LRU cache to limit the size + # my %seq_cache; + # tie(%seq_cache, 'Bio::EnsEMBL::Utils::Cache', $SEQ_CACHE_SZ); + # + # $self->{'seq_cache'} = \%seq_cache; # @@ -107,22 +107,22 @@ sub new { return $self; } - -=head2 clear_cache - - Example : $sa->clear_cache(); - Description : Removes all entries from the associcated sequence cache - Returntype : None - Exceptions : None - -=cut - -sub clear_cache { - my ($self) = @_; - %{$self->{seq_cache}} = (); - return; -} - +# +# =head2 clear_cache +# +# Example : $sa->clear_cache(); +# Description : Removes all entries from the associcated sequence cache +# Returntype : None +# Exceptions : None +# +# =cut +# +# sub clear_cache { +# my ($self) = @_; +# %{$self->{seq_cache}} = (); +# return; +# } +# =head2 fetch_by_Slice_start_end_strand @@ -442,7 +442,7 @@ sub _rna_edit { return; } - +=cut sub _fetch_seq { my $self = shift; my $seq_region_id = shift; @@ -519,6 +519,21 @@ sub _fetch_seq { return \$tmp_seq; } } +=cut +sub _fetch_raw_seq { + my ($self, $id, $start, $length) = @_; + my $sql = <<'SQL'; +SELECT UPPER(SUBSTRING(d.sequence, ?, ?)) +FROM dna d +WHERE d.seq_region_id =? +SQL + my $seq = $self->dbc()->sql_helper()->execute_single_result( + -SQL => $sql, + -PARAMS => [[$start, SQL_INTEGER], [$length, SQL_INTEGER], [$id, SQL_INTEGER]], + -NO_ERROR => 1 + ); + return \$seq; +} =head2 store diff --git a/modules/Bio/EnsEMBL/Pipeline/FASTA/ReuseSpeciesFactory.pm b/modules/Bio/EnsEMBL/Pipeline/FASTA/ReuseSpeciesFactory.pm index 2dd69c4f90..2302627832 100644 --- a/modules/Bio/EnsEMBL/Pipeline/FASTA/ReuseSpeciesFactory.pm +++ b/modules/Bio/EnsEMBL/Pipeline/FASTA/ReuseSpeciesFactory.pm @@ -39,7 +39,7 @@ Allowed parameters are: =item force_species - Specify species we want to redump even though our queries of production could say otherwise -=item run_all - Do not check a thing. Override and run every dump +=item run_all - Do not check a thing. Schedule every species =back @@ -72,7 +72,7 @@ sub param_defaults { %{$self->SUPER::param_defaults()}, force_species => [], - force_all_species => 1, + run_all => 1, }; return $p; } @@ -94,16 +94,6 @@ sub fetch_input { return; } -#Allows a user to use '-force_species human' rather than '-force_species human -species human' -sub _add_force_species_to_species { - my ($self) = @_; - my $force_species = $self->param('force_species'); - my $species = $self->param('species'); - my %final_species = map { $_ => 1 } (@{$force_species}, @{$species}); - $self->param('species', [keys %final_species]); - return; -} - sub dna_flow { my ($self, $dba) = @_; my $parent_flow = $self->SUPER::dna_flow($dba); diff --git a/modules/t/sequenceAdaptor.t b/modules/t/sequenceAdaptor.t index d72abfaabc..67868944c0 100644 --- a/modules/t/sequenceAdaptor.t +++ b/modules/t/sequenceAdaptor.t @@ -30,6 +30,10 @@ my $seq_adaptor = $db->get_SequenceAdaptor(); my $slice = $slice_adaptor->fetch_by_region('chromosome', $CHR, $START, $END); compare_compliments($slice, $seq_adaptor); +#Bigger than 1Mb +$slice = $slice_adaptor->fetch_by_region('chromosome', $CHR, $START, $START+2_000_000); +compare_compliments($slice, $seq_adaptor); + $slice = $slice_adaptor->fetch_by_region('clone','AL031658.11'); compare_compliments($slice, $seq_adaptor); @@ -47,20 +51,20 @@ sub compare_compliments { my $seq = ${$seq_adaptor->fetch_by_Slice_start_end_strand($slice,1,undef,1)}; debug('FORWARD STRAND SLICE SEQ for ' . $slice->name()); - debug($seq); + debug($slice->length); my $invert_seq = ${$seq_adaptor->fetch_by_Slice_start_end_strand($slice->invert,1,undef,1)}; debug('REVERSE STRAND SLICE SEQ for ' . $slice->name()); - debug($invert_seq); + - ok(length($seq) == $slice->length); #sequence is correct length + is(length($seq), $slice->length, 'sequence is correct length'); $seq = reverse $seq; #reverse complement seq $seq =~ tr/ACTG/TGAC/; - ok($seq eq $invert_seq); #revcom same as seq on inverted slice + ok($seq eq $invert_seq, 'revcom same as seq on inverted slice'); } done_testing(); -- GitLab