From 0cd9f5b03018f055712b1b4596d9fb4845290258 Mon Sep 17 00:00:00 2001 From: Daniel Rios <dr2@sanger.ac.uk> Date: Wed, 13 Aug 2008 09:58:26 +0000 Subject: [PATCH] added functionality to allow turning off caching of most recent features --- .../Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm | 32 +++++---- modules/Bio/EnsEMBL/DBSQL/DBAdaptor.pm | 48 ++++++++++++- modules/Bio/EnsEMBL/Registry.pm | 72 ++++++++++++++----- 3 files changed, 121 insertions(+), 31 deletions(-) diff --git a/modules/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm index 4cc56046e6..309ad6ab7b 100644 --- a/modules/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm @@ -65,11 +65,15 @@ sub new { my $self = $class->SUPER::new(@_); - #initialize an LRU cache - my %cache; - tie( %cache, 'Bio::EnsEMBL::Utils::Cache', - { Debug => 0, MaxCount => $SLICE_FEATURE_CACHE_SIZE } ); - $self->{'_slice_feature_cache'} = \%cache; + if (defined $self->db->no_cache && $self->db->no_cache){ + warning("You are using the API without caching most recent features. Performance might be affected."); + } + else{ + #initialize an LRU cache + my %cache; + tie(%cache, 'Bio::EnsEMBL::Utils::Cache', $SLICE_FEATURE_CACHE_SIZE); + $self->{'_slice_feature_cache'} = \%cache; + } return $self; } @@ -211,10 +215,12 @@ sub fetch_all_by_Slice_constraint { #check the cache and return if we have already done this query my $key = uc(join(':', $slice->name, $constraint)); - if(exists($self->{'_slice_feature_cache'}->{$key})) { - return $self->{'_slice_feature_cache'}->{$key}; + #will only use feature_cache if hasn't been no_cache attribute set + if (!defined $self->db->no_cache || !$self->db->no_cache){ + if(exists($self->{'_slice_feature_cache'}->{$key})) { + return $self->{'_slice_feature_cache'}->{$key}; + } } - my $sa = $slice->adaptor(); # Hap/PAR support: retrieve normalized 'non-symlinked' slices @@ -281,11 +287,13 @@ sub fetch_all_by_Slice_constraint { else { push @result, @$features; } - } - - $self->{'_slice_feature_cache'}->{$key} = \@result; - +} + #will only use feature_cache when set attribute no_cache in DBAdaptor + if (!defined $self->db->no_cache || !$self->db->no_cache){ + $self->{'_slice_feature_cache'}->{$key} = \@result; + } + return \@result; } diff --git a/modules/Bio/EnsEMBL/DBSQL/DBAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/DBAdaptor.pm index fcc84cd23e..fd0b5b3f22 100755 --- a/modules/Bio/EnsEMBL/DBSQL/DBAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/DBAdaptor.pm @@ -58,6 +58,13 @@ my $reg = "Bio::EnsEMBL::Registry"; Arg [-DNADB]: (optional) Bio::EnsEMBL::DBSQL::DBAdaptor DNADB All sequence, assembly, contig information etc, will be retrieved from this database instead. + Arg [-NO_CACHE]: (optional) int 1 + This option will turn off caching for slice features, so, + every time a set of features is retrieved, they will come from + the database instead of the cache. This option is only recommended + for advanced users, specially if you need to store and retrieve + features. It might reduce performance when querying the database if + not used properly. If in doubt, do not use it or ask in ensembl-dev Arg [..] : Other args are passed to superclass Bio::EnsEMBL::DBSQL::DBConnection Example : $db = new Bio::EnsEMBL::DBSQL::DBAdaptor( @@ -85,9 +92,9 @@ sub new { my $self = bless {}, $class; - my ( $is_multispecies, $species, $species_id, $group, $con, $dnadb ) = + my ( $is_multispecies, $species, $species_id, $group, $con, $dnadb, $no_cache ) = rearrange( - [qw(MULTISPECIES_DB SPECIES SPECIES_ID GROUP DBCONN DNADB)], + [qw(MULTISPECIES_DB SPECIES SPECIES_ID GROUP DBCONN DNADB NO_CACHE)], @args ); if ( defined($con) ) { $self->dbc($con) } @@ -106,7 +113,13 @@ sub new { $self = Bio::EnsEMBL::Utils::ConfigRegistry::gen_load($self); - if ( defined $dnadb ) { $self->dnadb($dnadb) } + if(defined $dnadb) { + $self->dnadb($dnadb); + } + + if (defined $no_cache){ + $self->no_cache($no_cache); + } return $self; } @@ -707,6 +720,35 @@ sub species_id { } +=head2 no_cache + + Arg [1] : (optional) int $arg + The new value of the no cache attribute used by this DBAdaptor. + Example : $no_cache = $dba->no_cache(); + Description: Getter/Setter for the no_cache to use for + this connection. There is currently no point in setting + this value after the connection has already been established + by the constructor. + Returntype : int + Exceptions : none + Caller : new + Status : Stable + +=cut + +sub no_cache { + my ($self, $arg ) = @_; + + if ( defined $arg ){ + if ($arg != 1 && $arg != 0){ + throw("$arg is not allowed for this attribute. Only value 1|0 is allowed"); + } + $self->{_no_cache} = $arg; + } + $self->{_no_cache}; +} + + =head2 group Arg [1] : (optional) string $arg diff --git a/modules/Bio/EnsEMBL/Registry.pm b/modules/Bio/EnsEMBL/Registry.pm index 74e582d2e5..cdb6c98e1d 100644 --- a/modules/Bio/EnsEMBL/Registry.pm +++ b/modules/Bio/EnsEMBL/Registry.pm @@ -143,6 +143,13 @@ my $API_VERSION = 51; If not 0, the db connection will not be cleared, if 0 or if not set the db connections will be cleared (this is the default). + Arg [4]: (optional) int 1 + This option will turn off caching for slice features, so, + every time a set of features is retrieved, they will come from + the database instead of the cache. This option is only recommended + for advanced users, specially if you need to store and retrieve + features. It might reduce performance when querying the database if + not used properly. If in doubt, do not use it or ask in ensembl-dev Example : Bio::EnsEMBL::Registry->load_all(); Returntype : none Exceptions : none @@ -152,13 +159,14 @@ my $API_VERSION = 51; sub load_all { my $class = shift; - my ( $config_file, $verbose, $no_clear ) = @_; + my ( $config_file, $verbose, $no_clear, $no_cache ) = @_; $config_file ||= $ENV{ENSEMBL_REGISTRY} || $ENV{HOME} . "/.ensembl_init"; $verbose ||= 0; $no_clear ||= 0; + $no_cache ||= 0; if ( !defined($config_file) ) { if ($verbose) { @@ -302,6 +310,10 @@ sub load_all { foreach my $parameter ( $cfg->Parameters($section) ) { $adaptor_args{ '-' . $parameter } = $cfg->val( $section, $parameter ); + #when set, do not use the feature cache in the different adaptors + if ($no_cache){ + $adaptor_args{'-no_cache'} = 1; + } } if ($verbose) { @@ -1080,6 +1092,15 @@ my $self = shift; =head2 load_registry_from_url Arg [1] : string $url + Arg [2] : (optional) integer + If not 0, will print out all information. + Arg [3] : (optional) integer + This option will turn off caching for slice features, so, + every time a set of features is retrieved, they will come from + the database instead of the cache. This option is only recommended + for advanced users, specially if you need to store and retrieve + features. It might reduce performance when querying the database if + not used properly. If in doubt, do not use it or ask in ensembl-dev Example : load_registry_from_url(mysql://anonymous@ensembldb.ensembl.org:3306); Description: Will load the correct versions of the ensembl databases for the software release it can find on a database instance into the @@ -1094,7 +1115,7 @@ my $self = shift; =cut sub load_registry_from_url { - my ($self, $url, $verbose) = @_; + my ($self, $url, $verbose, $no_cache) = @_; if ($url =~ /mysql\:\/\/([^\@]+\@)?([^\:\/]+)(\:\d+)?(\/\d+)?/) { my $user_pass = $1; @@ -1114,7 +1135,8 @@ sub load_registry_from_url { -pass => $pass, -port => $port, -db_version => $version, - -verbose => $verbose); + -verbose => $verbose, + -no_cache => $no_cache); } else { throw("Only MySQL URLs are accepted at the moment"); } @@ -1143,6 +1165,13 @@ sub load_registry_from_url { the connection is deleted if not used. By default this is 28800 (8 hours) So set this to greater than this if your connection are getting deleted. Only set this if you are having problems and know what you are doing. + Arg [-NO_CACHE]: (optional) int 1 + This option will turn off caching for slice features, so, + every time a set of features is retrieved, they will come from + the database instead of the cache. This option is only recommended + for advanced users, specially if you need to store and retrieve + features. It might reduce performance when querying the database if + not used properly. If in doubt, do not use it or ask in ensembl-dev Example : load_registry_from_db( -host => 'ensembldb.ensembl.org', -user => 'anonymous', @@ -1161,9 +1190,9 @@ sub load_registry_from_db { my ( $self, @args ) = @_; my ( $host, $port, $user, $pass, $verbose, $db_version, - $wait_timeout ) = + $wait_timeout, $no_cache ) = rearrange( - [qw(HOST PORT USER PASS VERBOSE DB_VERSION WAIT_TIMEOUT )], + [qw(HOST PORT USER PASS VERBOSE DB_VERSION WAIT_TIMEOUT NO_CACHE)], @args ); my $go_version = 0; @@ -1250,7 +1279,8 @@ sub load_registry_from_db { -pass => $pass, -port => $port, -dbname => $coredb, - -wait_timeout => $wait_timeout + -wait_timeout => $wait_timeout, + -no_cache => $no_cache ); ( my $sp = $species ) =~ s/_/ /g; @@ -1289,7 +1319,8 @@ sub load_registry_from_db { -pass => $pass, -port => $port, -dbname => $multidb, - -wait_timeout => $wait_timeout + -wait_timeout => $wait_timeout, + -no_cache => $no_cache ); ( my $sp = $species ) =~ s/_/ /g; @@ -1317,7 +1348,8 @@ sub load_registry_from_db { -pass => $pass, -port => $port, -dbname => $cdnadb, - -wait_timeout => $wait_timeout + -wait_timeout => $wait_timeout, + -no_cache => $no_cache ); ( my $sp = $species ) =~ s/_/ /g; $self->add_alias( $species, $sp ); @@ -1338,7 +1370,8 @@ sub load_registry_from_db { -pass => $pass, -port => $port, -wait_timeout => $wait_timeout, - -dbname => $vegadb ); + -dbname => $vegadb, + -no_cache => $no_cache); ( my $sp = $species ) =~ s/_/ /g; $self->add_alias( $species, $sp ); print $vegadb. " loaded\n" if ($verbose); @@ -1360,7 +1393,8 @@ sub load_registry_from_db { -pass => $pass, -port => $port, -wait_timeout => $wait_timeout, - -dbname => $other_db ); + -dbname => $other_db, + -no_cache => $no_cache ); ( my $sp = $species ) =~ s/_/ /g; $self->add_alias( $species, $sp ); print $other_db. " loaded\n" if ($verbose); @@ -1379,7 +1413,8 @@ sub load_registry_from_db { -pass => $pass, -port => $port, -wait_timeout => $wait_timeout, - -dbname => $userupload_db + -dbname => $userupload_db, + -no_cache => $no_cache ); (my $sp = $species ) =~ s/_/ /g; $self->add_alias( $species, $sp ); @@ -1410,7 +1445,8 @@ sub load_registry_from_db { -pass => $pass, -port => $port, -wait_timeout => $wait_timeout, - -dbname => $variation_db + -dbname => $variation_db, + -no_cache => $no_cache ); print $variation_db. " loaded\n" if ($verbose); } @@ -1437,7 +1473,8 @@ sub load_registry_from_db { -pass => $pass, -port => $port, -wait_timeout => $wait_timeout, - -dbname => $funcgen_db ); + -dbname => $funcgen_db, + -no_cache => $no_cache ); print $funcgen_db. " loaded\n" if ($verbose); } } @@ -1466,7 +1503,8 @@ sub load_registry_from_db { -pass => $pass, -port => $port, -wait_timeout => $wait_timeout, - -dbname => $compara_db ); + -dbname => $compara_db, + -no_cache => $no_cache ); if ($verbose) { printf( "%s loaded\n", $compara_db ); } @@ -1488,7 +1526,8 @@ sub load_registry_from_db { -pass => $pass, -port => $port, -wait_timeout => $wait_timeout, - -dbname => $ancestral_db ); + -dbname => $ancestral_db, + -no_cache => $no_cache); print $ancestral_db. " loaded\n" if ($verbose); } else { print "No Ancestral database found" if ($verbose); @@ -1515,7 +1554,8 @@ sub load_registry_from_db { -user => $user, -pass => $pass, -port => $port, - -dbname => $go_db + -dbname => $go_db, + -no_cache => $no_cache ); print $go_db. " loaded\n" if ($verbose); } -- GitLab