From cc88db72dca0a0ba5b01f8e8630ebfaf6f16ba9f Mon Sep 17 00:00:00 2001
From: Patrick Meidl <pm2@sanger.ac.uk>
Date: Wed, 19 Jul 2006 10:05:38 +0000
Subject: [PATCH] committing intermediate state of module before rewriting it
 to use lightweight feature object for caching

---
 modules/Bio/EnsEMBL/IdMapping/Cache.pm | 149 ++++++++++++++-----------
 1 file changed, 83 insertions(+), 66 deletions(-)

diff --git a/modules/Bio/EnsEMBL/IdMapping/Cache.pm b/modules/Bio/EnsEMBL/IdMapping/Cache.pm
index 6c45749100..82ea0d28da 100644
--- a/modules/Bio/EnsEMBL/IdMapping/Cache.pm
+++ b/modules/Bio/EnsEMBL/IdMapping/Cache.pm
@@ -89,14 +89,17 @@ sub new {
 
 sub cache_file_exists {
   my $self = shift;
+  my $type = shift;
+
+  throw("You must provide a cache type (old|new).") unless $type;
 
-  my $cache_path = $self->cache_path;
+  my $cache_path = $self->cache_path($type);
 
   if (-s $cache_path) {
-    $self->logger->log("Cache file exists. Will read cache from $cache_path.\n");
+    $self->logger->log("Cache file found. Will read from $cache_path.\n", 1);
     return(1);
   } else {
-    $self->logger->log("No cache file found. Will have to build cache from db.\n");
+    $self->logger->log("No cache file found. Will build cache from db.\n", 1);
     return(0);
   }
 }
@@ -104,98 +107,71 @@ sub cache_file_exists {
 
 sub cache_path {
   my $self = shift;
+  my $type = shift;
 
-  unless ($self->{'_cache_path'}) {
-    $self->{'_cache_path'} = ($self->conf->param('dumppath') || '.').'/'.
-      ($self->conf->param('cachefile') || 'object_cache.ser');
-  }
+  throw("You must provide a cache type (old|new).") unless $type;
 
-  return $self->{'_cache_path'};
+  my $cache_path = ($self->conf->param('dumppath') || '.')."/$type.".
+    ($self->conf->param('cachefile') || 'object_cache.ser');
+
+  return $cache_path;
 }
 
 
 sub write_to_file {
   my $self = shift;
+  my $type = shift;
+
+  throw("You must provide a cache type (old|new).") unless $type;
 
   # create dump directory if it doesn't exist
   if (my $dump_path = $self->conf->param('dumppath')) {
     unless (-d $dump_path) {
-      $self->logger->log("Cache directory $dump_path doesn't exist. Will create it.\n");
+      $self->logger->log("Cache directory $dump_path doesn't exist. Will create it.\n", 1);
       system("mkdir -p $dump_path") == 0 or
         throw("Unable to create directory $dump_path.\n");
     }
   }
   
-  my $cache_path = $self->cache_path;
+  my $cache_path = $self->cache_path($type);
 
-  $self->logger->log("Will dump to $cache_path.\n");
-  $self->logger->log_stamped("Dumping cache...\n");
+  $self->logger->log("Will dump to $cache_path.\n", 1);
+  $self->logger->log_stamped("Dumping cache...\n", 1);
 
-  eval { nstore($self->{'_cache'}, $cache_path) };
+  eval { nstore($self->{'_cache'}->{$type}, $cache_path) };
   if ($@) {
     throw("Unable to store $cache_path: $@\n");
   }
   my $size = -s $cache_path;
   $size = parse_bytes($size);
   
-  $self->logger->log_stamped("Done (cache file is $size).\n");
+  $self->logger->log_stamped("Done (cache file is $size).\n", 1);
 }
 
 
 sub read_from_file {
   my $self = shift;
+  my $type = shift;
+
+  throw("You must provide a cache type (old|new).") unless $type;
 
-  my $cache_path = $self->cache_path;
+  my $cache_path = $self->cache_path($type);
 
   unless (-s $cache_path) {
     throw("No valid cache file found at $cache_path.");
   }
 
-  eval { $self->{'_cache'} = retrieve($cache_path); };
+  $self->logger->log("Reading cache from $cache_path...\n");
+  eval { $self->{'_cache'}->{$type} = retrieve($cache_path); };
   if ($@) {
     throw("Unable to retrieve cache: $@");
   }
+  $self->logger->log_stamped("Done.\n");
 
-  return $self->{'_cache'};
+  return $self->{'_cache'}->{$type};
 }
 
 
-=head2 
-
-  Arg[1]      : 
-  Example     : 
-  Description : 
-  Return type : 
-  Exceptions  : 
-  Caller      : 
-  Status      :
-
-=cut
-
-sub build_cache {
-  my $self = shift;
-
-  # connect to db
-  my $old_dba = $self->get_DBAdaptor('old');
-  my $new_dba = $self->get_DBAdaptor('new');
-
-  #
-  # old database
-  #
-  $self->logger->log_stamped("Loading from old database...\n");
-
-  # fetch genes
-  my $old_genes = $self->fetch_genes($old_dba);
-
-  # fetch transcripts, translations and exons and build caches
-  $self->build_cache_from_genes('old', $old_genes);
-
-  #
-  # new database
-  #
-
-}
-
 sub fetch_genes {
   my $self = shift;
   my $dba = shift;
@@ -213,11 +189,11 @@ sub fetch_genes {
   # fetch genes, depending on filters to apply
   #
   my @all_genes = ();
-  my @genes = ();
+  my $genes = [];
 
   if ($self->conf->param('chromosomes')) {
     # filter by chromosome
-    $self->logger->log("Filtering by chromosome: ", 2);
+    $self->logger->log("Filtering by chromosome: ", 3);
     
     foreach my $chr ($self->conf->param('chromosomes')) {
       $self->logger->log("$chr ");
@@ -228,7 +204,7 @@ sub fetch_genes {
     
   } elsif ($self->conf->param('region')) {
     # filter by region (specific slice)
-    $self->logger->log("Filtering by region: ".$self->conf->param('region')."\n", 2);
+    $self->logger->log("Filtering by region: ".$self->conf->param('region')."\n", 3);
 
     my $slice = $sa->fetch_by_name($self->conf->param('region'));
     @all_genes = @{ $slice->get_all_Genes(undef, undef, 1) };
@@ -240,22 +216,22 @@ sub fetch_genes {
 
   # filter by biotype
   if ($self->conf->param('biotypes')) {
-    $self->logger->log("Filtering by biotype: ", 2);
+    $self->logger->log("Filtering by biotype: ", 3);
 
     foreach my $biotype ($self->conf->param('biotypes')) {
       $self->logger->log("$biotype ");
-      push @genes, grep { $_->biotype eq $biotype } @all_genes;
+      push @$genes, grep { $_->biotype eq $biotype } @all_genes;
     }
 
     $self->logger->log("\n");
 
   } else {
-    @genes = @all_genes;
+    $genes = \@all_genes;
   }
 
-  $self->logger->log_stamped("Done loading ".scalar(@genes)." genes.\n\n", 1);
+  $self->logger->log_stamped("Done loading ".scalar(@$genes)." genes.\n\n", 1);
 
-  return \@genes;
+  return $genes;
 }
 
 
@@ -273,10 +249,13 @@ sub build_cache_from_genes {
   my $num_genes = scalar(@$genes);
   $self->logger->init_progressbar('index_genes', $num_genes);
 
+#use Data::Dumper;
+#$Data::Dumper::Indent = 1;
+
   foreach my $gene (@$genes) {
     #$self->logger->log_progressbar('index_genes', ++$i, 2);
-    $self->logger->log_progress($num_genes, ++$i, 20, 2, 1);
-
+    $self->logger->log_progress($num_genes, ++$i, 20, 3, 1);
+    
     # build gene caches
     $self->add($type, 'genes_by_id', $gene->dbID, $gene);
     $self->add($type, 'genes_by_stable_id', $gene->stable_id, $gene);
@@ -308,10 +287,38 @@ sub build_cache_from_genes {
     }
   }
 
-  $self->logger->log_stamped("Done building the index:\n", 1);
+  foreach my $gene (@$genes) {
+    #
+    # delete all unwanted data from objects
+    #
+    $gene->adaptor(undef);
+    $gene->analysis(undef);
+    $gene->display_xref(undef);
+    $gene->slice->adaptor(undef);
+    $gene->slice->coord_system->adaptor(undef);
+
+    foreach my $tr (@{ $gene->get_all_Transcripts }) {
+      $tr->adaptor(undef);
+      $tr->display_xref(undef);
+      $tr->analysis(undef);
+      
+      if (my $tl = $tr->translation) {
+        $tl->adaptor(undef);
+      }
+
+      foreach my $exon (@{ $tr->get_all_Exons }) {
+        $exon->adaptor(undef);
+        $exon->analysis(undef);
+      }
+    }
+
+    #warn Data::Dumper::Dumper($gene) if ($i < 2);;
+  }
+
+  $self->logger->log_stamped("Done building the index:\n", 2);
   foreach my $name (qw(genes transcripts translations exons)) {
-    my $num = scalar(keys %{ $self->get_by_name('old', "${name}_by_id") });
-    $self->logger->log(sprintf("%12.0f %-20s\n", $num, $name), 2);
+    my $num = scalar(keys %{ $self->get_by_name($type, "${name}_by_id") });
+    $self->logger->log(sprintf("%12.0f %-20s\n", $num, $name), 3);
   }
   $self->logger->log("\n");
 }
@@ -374,6 +381,16 @@ sub get_by_name {
 }
 
 
+sub flush_by_type {
+  my $self = shift;
+  my $type = shift;
+
+  throw("You must provide a cache type (old|new).") unless $type;
+
+  undef $self->{'_cache'}->{$type};
+}
+
+
 sub get_DBAdaptor {
   my $self = shift;
   my $prefix = shift;
-- 
GitLab