Commit 0bf77c68 authored by Graham McVicker's avatar Graham McVicker
Browse files

added a caching algorithm to somewhat improve efficiency of certain usage...

added a caching algorithm to somewhat improve efficiency of certain usage patterns (such as transcript/prediction transcript dumps
parent ce3ff41c
# #
# EnsEMBL module for Bio::EnsEMBL::DBSQL::SequenceAdaptor # Ensembl module for Bio::EnsEMBL::DBSQL::SequenceAdaptor
# #
# Cared for by Arne Stabenau <stabenau@ebi.ac.uk> # Cared for by Arne Stabenau <stabenau@ebi.ac.uk>
# #
# Copyright EMBL/EBI # Copyright Ensembl
# #
# You may distribute this module under the same terms as perl itself # You may distribute this module under the same terms as perl itself
...@@ -37,9 +37,46 @@ use strict; ...@@ -37,9 +37,46 @@ use strict;
use Bio::EnsEMBL::DBSQL::BaseAdaptor; use Bio::EnsEMBL::DBSQL::BaseAdaptor;
use Bio::EnsEMBL::Utils::Exception qw(throw deprecate); use Bio::EnsEMBL::Utils::Exception qw(throw deprecate);
use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp); use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
use Bio::EnsEMBL::Utils::Cache;
@ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor); @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor);
our $SEQ_CHUNK_PWR = 18; # 2^18 = approx. 250KB
our $SEQ_CACHE_SZ = 5;
#our $SEQ_CACHE_MAX = (2 ** $SEQ_CHUNK_PWR) * $SEQ_CACHE_SZ;
our $SEQ_CACHE_MAX = 0;
=head2 new
Arg [1] : none
Example : my $sa = $db_adaptor->get_SequenceAdaptor();
Description: Constructor. Calls superclass constructor and initialises
internal cache structure.
Returntype : Bio::EnsEMBL:DBSQL::SequenceAdaptor
Exceptions : none
Caller : DBAdaptor::get_SequenceAdaptor
=cut
sub new {
my $caller = shift;
my $class = ref($caller) || $caller;
my $self = $class->SUPER::new(@_);
# use an LRU cache to limit the size
my %seq_cache;
tie(%seq_cache, 'Bio::EnsEMBL::Utils::Cache', $SEQ_CACHE_SZ);
$self->{'seq_cache'} = \%seq_cache;
return $self;
}
=head2 fetch_by_Slice_start_end_strand =head2 fetch_by_Slice_start_end_strand
Arg [1] : Bio::EnsEMBL::Slice slice Arg [1] : Bio::EnsEMBL::Slice slice
...@@ -180,18 +217,62 @@ sub _fetch_seq { ...@@ -180,18 +217,62 @@ sub _fetch_seq {
my $start = shift; my $start = shift;
my $length = shift; my $length = shift;
my $tmp_seq; my $cache = $self->{'seq_cache'};
if($length < $SEQ_CACHE_MAX) {
my $chunk_min = ($start-1) >> $SEQ_CHUNK_PWR;
my $chunk_max = ($start + $length - 1) >> $SEQ_CHUNK_PWR;
# piece together sequence from cached component parts
my $entire_seq = undef;
for(my $i = $chunk_min; $i <= $chunk_max; $i++) {
if($cache->{"$seq_region_id:$i"}) {
$entire_seq .= $cache->{"$seq_region_id:$i"};
} else {
# retrieve uncached portions of the sequence
my $sth = $self->prepare
("SELECT SUBSTRING( d.sequence, ?, ?)
FROM dna d
WHERE d.seq_region_id = ?");
my $tmp_seq;
my $min = ($i << $SEQ_CHUNK_PWR) + 1;
my $sth = $self->prepare( $sth->execute($min, 1 << $SEQ_CHUNK_PWR, $seq_region_id);
"SELECT SUBSTRING( d.sequence, ?, ?) $sth->bind_columns(\$tmp_seq);
FROM dna d $sth->fetch();
WHERE d.seq_region_id = ?"); $sth->finish();
$sth->execute($start, $length, $seq_region_id);
$sth->bind_columns(\$tmp_seq);
$sth->fetch();
$sth->finish();
return \$tmp_seq; $entire_seq .= $tmp_seq;
$cache->{"$seq_region_id:$i"} = $tmp_seq;
}
}
# return only the requested portion of the entire sequence
my $min = ($chunk_min << $SEQ_CHUNK_PWR) + 1;
my $max = ($chunk_max+1) << $SEQ_CHUNK_PWR;
my $seq = substr($entire_seq, $start-$min, $length);
return \$seq;
} else {
# do not do any caching for requests of very large sequences
my $sth = $self->prepare
("SELECT SUBSTRING( d.sequence, ?, ?)
FROM dna d
WHERE d.seq_region_id = ?");
my $tmp_seq;
$sth->execute($start, $length, $seq_region_id);
$sth->bind_columns(\$tmp_seq);
$sth->fetch();
$sth->finish();
return \$tmp_seq;
}
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment