From 0cd9f5b03018f055712b1b4596d9fb4845290258 Mon Sep 17 00:00:00 2001
From: Daniel Rios <dr2@sanger.ac.uk>
Date: Wed, 13 Aug 2008 09:58:26 +0000
Subject: [PATCH] added functionality to allow turning off caching of most
 recent features

---
 .../Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm   | 32 +++++----
 modules/Bio/EnsEMBL/DBSQL/DBAdaptor.pm        | 48 ++++++++++++-
 modules/Bio/EnsEMBL/Registry.pm               | 72 ++++++++++++++-----
 3 files changed, 121 insertions(+), 31 deletions(-)

diff --git a/modules/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm
index 4cc56046e6..309ad6ab7b 100644
--- a/modules/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm
+++ b/modules/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm
@@ -65,11 +65,15 @@ sub new {
 
   my $self = $class->SUPER::new(@_);
 
-  #initialize an LRU cache
-  my %cache;
-  tie( %cache, 'Bio::EnsEMBL::Utils::Cache',
-       { Debug => 0, MaxCount => $SLICE_FEATURE_CACHE_SIZE } );
-  $self->{'_slice_feature_cache'} = \%cache;
+  if (defined $self->db->no_cache && $self->db->no_cache){
+      warning("You are using the API without caching most recent features. Performance might be affected.");
+  }
+  else{
+      #initialize an LRU cache
+      my %cache;
+      tie(%cache, 'Bio::EnsEMBL::Utils::Cache', $SLICE_FEATURE_CACHE_SIZE);
+      $self->{'_slice_feature_cache'} = \%cache;
+  }
 
   return $self;
 }
@@ -211,10 +215,12 @@ sub fetch_all_by_Slice_constraint {
   #check the cache and return if we have already done this query
   my $key = uc(join(':', $slice->name, $constraint));
 
-  if(exists($self->{'_slice_feature_cache'}->{$key})) {
-    return $self->{'_slice_feature_cache'}->{$key};
+  #will only use feature_cache if hasn't been no_cache attribute set
+  if (!defined $self->db->no_cache || !$self->db->no_cache){
+      if(exists($self->{'_slice_feature_cache'}->{$key})) {
+	  return $self->{'_slice_feature_cache'}->{$key};
+      }
   }
-
   my $sa = $slice->adaptor();
 
   # Hap/PAR support: retrieve normalized 'non-symlinked' slices
@@ -281,11 +287,13 @@ sub fetch_all_by_Slice_constraint {
     else {
       push @result, @$features;
     }
-  }
-
-  $self->{'_slice_feature_cache'}->{$key} = \@result;
-
+}
 
+  #will only use feature_cache when set attribute no_cache in DBAdaptor
+  if (!defined $self->db->no_cache || !$self->db->no_cache){
+      $self->{'_slice_feature_cache'}->{$key} = \@result;
+  }
+  
   return \@result;
 }
 
diff --git a/modules/Bio/EnsEMBL/DBSQL/DBAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/DBAdaptor.pm
index fcc84cd23e..fd0b5b3f22 100755
--- a/modules/Bio/EnsEMBL/DBSQL/DBAdaptor.pm
+++ b/modules/Bio/EnsEMBL/DBSQL/DBAdaptor.pm
@@ -58,6 +58,13 @@ my $reg = "Bio::EnsEMBL::Registry";
   Arg [-DNADB]: (optional) Bio::EnsEMBL::DBSQL::DBAdaptor DNADB 
                All sequence, assembly, contig information etc, will be
                retrieved from this database instead.
+  Arg [-NO_CACHE]: (optional) int 1
+               This option will turn off caching for slice features, so, 
+               every time a set of features is retrieved, they will come from
+               the database instead of the cache. This option is only recommended
+               for advanced users, specially if you need to store and retrieve
+               features. It might reduce performance when querying the database if 
+               not used properly. If in doubt, do not use it or ask in ensembl-dev               
   Arg [..]   : Other args are passed to superclass
                Bio::EnsEMBL::DBSQL::DBConnection
   Example    : $db = new Bio::EnsEMBL::DBSQL::DBAdaptor(
@@ -85,9 +92,9 @@ sub new {
 
   my $self = bless {}, $class;
 
-  my ( $is_multispecies, $species, $species_id, $group, $con, $dnadb ) =
+  my ( $is_multispecies, $species, $species_id, $group, $con, $dnadb, $no_cache ) =
     rearrange(
-            [qw(MULTISPECIES_DB SPECIES SPECIES_ID GROUP DBCONN DNADB)],
+            [qw(MULTISPECIES_DB SPECIES SPECIES_ID GROUP DBCONN DNADB NO_CACHE)],
             @args );
 
   if ( defined($con) ) { $self->dbc($con) }
@@ -106,7 +113,13 @@ sub new {
 
   $self = Bio::EnsEMBL::Utils::ConfigRegistry::gen_load($self);
 
-  if ( defined $dnadb ) { $self->dnadb($dnadb) }
+  if(defined $dnadb) {
+    $self->dnadb($dnadb);
+  }
+ 
+  if (defined $no_cache){
+      $self->no_cache($no_cache);
+  }
 
   return $self;
 }
@@ -707,6 +720,35 @@ sub species_id {
 }
 
 
+=head2 no_cache
+
+  Arg [1]    : (optional) int $arg
+               The new value of the no cache attribute used by this DBAdaptor. 
+  Example    : $no_cache = $dba->no_cache();
+  Description: Getter/Setter for the no_cache to use for 
+               this connection.  There is currently no point in setting 
+               this value after the connection has already been established 
+               by the constructor.
+  Returntype : int
+  Exceptions : none
+  Caller     : new
+  Status     : Stable
+
+=cut
+
+sub no_cache {
+  my ($self, $arg ) = @_;
+
+  if ( defined $arg ){
+      if ($arg != 1 && $arg != 0){
+	  throw("$arg is not allowed for this attribute. Only value 1|0 is allowed");
+      }
+      $self->{_no_cache} = $arg;
+  }
+  $self->{_no_cache};
+}
+
+
 =head2 group
 
   Arg [1]    : (optional) string $arg
diff --git a/modules/Bio/EnsEMBL/Registry.pm b/modules/Bio/EnsEMBL/Registry.pm
index 74e582d2e5..cdb6c98e1d 100644
--- a/modules/Bio/EnsEMBL/Registry.pm
+++ b/modules/Bio/EnsEMBL/Registry.pm
@@ -143,6 +143,13 @@ my $API_VERSION = 51;
                If not 0, the db connection will not be cleared, if 0 or
                if not set the db connections will be cleared (this is
                the default).
+  Arg [4]:     (optional) int 1
+               This option will turn off caching for slice features, so, 
+               every time a set of features is retrieved, they will come from
+               the database instead of the cache. This option is only recommended
+               for advanced users, specially if you need to store and retrieve
+               features. It might reduce performance when querying the database if 
+               not used properly. If in doubt, do not use it or ask in ensembl-dev            
   Example    : Bio::EnsEMBL::Registry->load_all();
   Returntype : none
   Exceptions : none
@@ -152,13 +159,14 @@ my $API_VERSION = 51;
 
 sub load_all {
     my $class = shift;
-    my ( $config_file, $verbose, $no_clear ) = @_;
+    my ( $config_file, $verbose, $no_clear, $no_cache ) = @_;
 
     $config_file ||= $ENV{ENSEMBL_REGISTRY}
       || $ENV{HOME} . "/.ensembl_init";
 
     $verbose  ||= 0;
     $no_clear ||= 0;
+    $no_cache ||= 0;
 
     if ( !defined($config_file) ) {
         if ($verbose) {
@@ -302,6 +310,10 @@ sub load_all {
                 foreach my $parameter ( $cfg->Parameters($section) ) {
                     $adaptor_args{ '-' . $parameter } =
                       $cfg->val( $section, $parameter );
+		    #when set, do not use the feature cache in the different adaptors
+		    if ($no_cache){
+			$adaptor_args{'-no_cache'} = 1;
+		    }
                 }
 
                 if ($verbose) {
@@ -1080,6 +1092,15 @@ my $self = shift;
 =head2 load_registry_from_url
 
   Arg [1] : string $url
+  Arg [2] : (optional) integer
+               If not 0, will print out all information.
+  Arg [3] : (optional) integer
+               This option will turn off caching for slice features, so, 
+               every time a set of features is retrieved, they will come from
+               the database instead of the cache. This option is only recommended
+               for advanced users, specially if you need to store and retrieve
+               features. It might reduce performance when querying the database if 
+               not used properly. If in doubt, do not use it or ask in ensembl-dev           
   Example : load_registry_from_url(mysql://anonymous@ensembldb.ensembl.org:3306);
   Description: Will load the correct versions of the ensembl databases for the
                software release it can find on a database instance into the 
@@ -1094,7 +1115,7 @@ my $self = shift;
 =cut
 
 sub load_registry_from_url {
-  my ($self, $url, $verbose) = @_;
+  my ($self, $url, $verbose, $no_cache) = @_;
 
   if ($url =~ /mysql\:\/\/([^\@]+\@)?([^\:\/]+)(\:\d+)?(\/\d+)?/) {
     my $user_pass = $1;
@@ -1114,7 +1135,8 @@ sub load_registry_from_url {
         -pass => $pass,
         -port => $port,
         -db_version => $version,
-        -verbose => $verbose);
+        -verbose => $verbose,
+	-no_cache => $no_cache);
   } else {
     throw("Only MySQL URLs are accepted at the moment");
   }
@@ -1143,6 +1165,13 @@ sub load_registry_from_url {
                  the connection is deleted if not used. By default this is 28800 (8 hours)
                  So set this to greater than this if your connection are getting deleted.
                  Only set this if you are having problems and know what you are doing.
+   Arg [-NO_CACHE]: (optional) int 1
+                 This option will turn off caching for slice features, so, 
+                 every time a set of features is retrieved, they will come from
+                 the database instead of the cache. This option is only recommended
+                 for advanced users, specially if you need to store and retrieve
+                 features. It might reduce performance when querying the database if 
+                 not used properly. If in doubt, do not use it or ask in ensembl-dev       
 
   Example : load_registry_from_db( -host => 'ensembldb.ensembl.org',
 				   -user => 'anonymous',
@@ -1161,9 +1190,9 @@ sub load_registry_from_db {
   my ( $self, @args ) = @_;
 
   my ( $host, $port, $user, $pass, $verbose, $db_version,
-       $wait_timeout ) =
+       $wait_timeout, $no_cache ) =
     rearrange(
-             [qw(HOST PORT USER PASS VERBOSE DB_VERSION WAIT_TIMEOUT )],
+             [qw(HOST PORT USER PASS VERBOSE DB_VERSION WAIT_TIMEOUT NO_CACHE)],
              @args );
 
   my $go_version = 0;
@@ -1250,7 +1279,8 @@ sub load_registry_from_db {
                                           -pass         => $pass,
                                           -port         => $port,
                                           -dbname       => $coredb,
-                                          -wait_timeout => $wait_timeout
+                                          -wait_timeout => $wait_timeout,
+					  -no_cache     => $no_cache
       );
 
     ( my $sp = $species ) =~ s/_/ /g;
@@ -1289,7 +1319,8 @@ sub load_registry_from_db {
                                           -pass            => $pass,
                                           -port            => $port,
                                           -dbname          => $multidb,
-                                          -wait_timeout => $wait_timeout
+                                          -wait_timeout => $wait_timeout,
+					  -no_cache     => $no_cache
         );
 
       ( my $sp = $species ) =~ s/_/ /g;
@@ -1317,7 +1348,8 @@ sub load_registry_from_db {
                                           -pass         => $pass,
                                           -port         => $port,
                                           -dbname       => $cdnadb,
-                                          -wait_timeout => $wait_timeout
+                                          -wait_timeout => $wait_timeout,
+					  -no_cache     => $no_cache
       );
     ( my $sp = $species ) =~ s/_/ /g;
     $self->add_alias( $species, $sp );
@@ -1338,7 +1370,8 @@ sub load_registry_from_db {
                                          -pass         => $pass,
                                          -port         => $port,
                                          -wait_timeout => $wait_timeout,
-                                         -dbname       => $vegadb );
+                                         -dbname       => $vegadb,
+					 -no_cache     => $no_cache);
     ( my $sp = $species ) =~ s/_/ /g;
     $self->add_alias( $species, $sp );
     print $vegadb. " loaded\n" if ($verbose);
@@ -1360,7 +1393,8 @@ sub load_registry_from_db {
                                          -pass    => $pass,
                                          -port    => $port,
                                          -wait_timeout => $wait_timeout,
-                                         -dbname       => $other_db );
+                                         -dbname       => $other_db,
+					 -no_cache     => $no_cache );
     ( my $sp = $species ) =~ s/_/ /g;
     $self->add_alias( $species, $sp );
     print $other_db. " loaded\n" if ($verbose);
@@ -1379,7 +1413,8 @@ sub load_registry_from_db {
 	-pass => $pass,
 	-port => $port,
         -wait_timeout => $wait_timeout,
-	-dbname => $userupload_db
+	-dbname => $userupload_db,
+	-no_cache     => $no_cache
       );
       (my $sp = $species ) =~ s/_/ /g;
       $self->add_alias( $species, $sp );
@@ -1410,7 +1445,8 @@ sub load_registry_from_db {
                                          -pass         => $pass,
                                          -port         => $port,
                                          -wait_timeout => $wait_timeout,
-                                         -dbname       => $variation_db
+                                         -dbname       => $variation_db,
+					 -no_cache     => $no_cache
         );
       print $variation_db. " loaded\n" if ($verbose);
     }
@@ -1437,7 +1473,8 @@ sub load_registry_from_db {
                                          -pass         => $pass,
                                          -port         => $port,
                                          -wait_timeout => $wait_timeout,
-                                         -dbname       => $funcgen_db );
+                                         -dbname       => $funcgen_db,
+					 -no_cache     => $no_cache );
       print $funcgen_db. " loaded\n" if ($verbose);
     }
   }
@@ -1466,7 +1503,8 @@ sub load_registry_from_db {
                                          -pass         => $pass,
                                          -port         => $port,
                                          -wait_timeout => $wait_timeout,
-                                         -dbname       => $compara_db );
+                                         -dbname       => $compara_db,
+					 -no_cache     => $no_cache );
       if ($verbose) {
         printf( "%s loaded\n", $compara_db );
       }
@@ -1488,7 +1526,8 @@ sub load_registry_from_db {
                                       -pass    => $pass,
                                       -port    => $port,
                                       -wait_timeout => $wait_timeout,
-                                      -dbname       => $ancestral_db );
+                                      -dbname       => $ancestral_db,
+				      -no_cache     => $no_cache);
     print $ancestral_db. " loaded\n" if ($verbose);
   } else {
     print "No Ancestral database found" if ($verbose);
@@ -1515,7 +1554,8 @@ sub load_registry_from_db {
                                                     -user    => $user,
                                                     -pass    => $pass,
                                                     -port    => $port,
-                                                    -dbname  => $go_db
+                                                    -dbname  => $go_db,
+						    -no_cache     => $no_cache
         );
       print $go_db. " loaded\n" if ($verbose);
     }
-- 
GitLab