From 4603dc5b81af06b7cbffb6ee108b9343cb81350d Mon Sep 17 00:00:00 2001
From: Patrick Meidl <pm2@sanger.ac.uk>
Date: Fri, 29 Jun 2007 14:35:43 +0000
Subject: [PATCH] latest state of the code (bugfixes and adaptations due to
 changes in ConfParser and Logger)

---
 modules/Bio/EnsEMBL/IdMapping/Cache.pm        |  66 +++++----
 .../Bio/EnsEMBL/IdMapping/ExonScoreBuilder.pm | 140 +++++++++---------
 .../Bio/EnsEMBL/IdMapping/GeneScoreBuilder.pm |  60 ++++----
 modules/Bio/EnsEMBL/IdMapping/ScoreBuilder.pm |   8 +-
 .../EnsEMBL/IdMapping/ScoredMappingMatrix.pm  |  21 ++-
 .../IdMapping/TranscriptScoreBuilder.pm       |  53 +++----
 6 files changed, 188 insertions(+), 160 deletions(-)

diff --git a/modules/Bio/EnsEMBL/IdMapping/Cache.pm b/modules/Bio/EnsEMBL/IdMapping/Cache.pm
index d4e8234161..2162d943a0 100644
--- a/modules/Bio/EnsEMBL/IdMapping/Cache.pm
+++ b/modules/Bio/EnsEMBL/IdMapping/Cache.pm
@@ -47,6 +47,10 @@ use Storable qw(nfreeze thaw nstore retrieve);
 # define available cache names here
 my @cache_names = qw(
     exons_by_id
+    transcripts_by_id
+    transcripts_by_exon_id
+    genes_by_id
+    genes_by_transcript_id
 );
 
 
@@ -101,12 +105,17 @@ sub build_cache {
   my $i = scalar(@$genes);
 
   # find common coord_system
-  $self->find_common_coord_systems;
+  my $common_cs_found = $self->find_common_coord_systems;
 
-  # find out whether native and common coord_system are identical
+  # find out whether native coord_system is a common coord_system.
+  # if so, you don't need to project.
+  # also don't project if no common coord_system present
   my $need_project = 1;
-  my $csid = join(':', $slice->coord_system_name, $slice->coord_system->version);
-  $need_project = 0 if ($self->is_common_cs($csid));
+  my $csid = join(':', $slice->coord_system_name,
+                       $slice->coord_system->version);
+  if ($self->is_common_cs($csid) or !$self->highest_common_cs) {
+    $need_project = 0;
+  }
   
   # build cache
   my $type = "$dbtype.$slice_name";
@@ -163,8 +172,8 @@ sub build_cache_from_genes {
     ]);
 
     # build gene caches
-    #$self->add($type, 'genes_by_id', $gene->dbID, $lgene);
-    #$self->add($type, 'genes_by_stable_id', $gene->stable_id, $lgene);
+    $self->add('genes_by_id', $type, $gene->dbID, $lgene);
+    #$self->add('genes_by_stable_id', $type, $gene->stable_id, $lgene);
     
     # transcripts
     foreach my $tr (@{ $gene->get_all_Transcripts }) {
@@ -180,9 +189,9 @@ sub build_cache_from_genes {
       #$lgene->add_Transcript($ltr);
 
       # build transcript caches
-      $self->add($type, 'transcripts_by_id', $tr->dbID, $ltr);
-      #$self->add($type, 'transcripts_by_stable_id', $tr->stable_id, $ltr);
-      $self->add($type, 'genes_by_transcript_id', $tr->dbID, $lgene);
+      $self->add('transcripts_by_id', $type, $tr->dbID, $ltr);
+      #$self->add('transcripts_by_stable_id', $type, $tr->stable_id, $ltr);
+      $self->add('genes_by_transcript_id', $type, $tr->dbID, $lgene);
 
       # translation (if there is one)
       if (my $tl = $tr->translation) {
@@ -193,9 +202,9 @@ sub build_cache_from_genes {
 
         #$ltr->add_Translation($ltl);
 
-        #$self->add($type, 'translations_by_id', $tl->dbID, $ltl);
-        #$self->add($type, 'translations_by_stable_id', $tl->stable_id, $ltl);
-        #$self->add($type, 'translations_by_transcript_id', $tr->dbID, $ltl);
+        #$self->add('translations_by_id', $type, $tl->dbID, $ltl);
+        #$self->add('translations_by_stable_id', $type, $tl->stable_id, $ltl);
+        #$self->add('translations_by_transcript_id', $type, $tr->dbID, $ltl);
 
         undef $tl;
       }
@@ -233,8 +242,8 @@ sub build_cache_from_genes {
         $ltr->add_Exon($lexon);
 
         $self->add('exons_by_id', $type, $exon->dbID, $lexon);
-        #$self->add($type, 'genes_by_exon_id', $exon->dbID, $lgene);
-        $self->add_list($type, 'transcripts_by_exon_id', $exon->dbID, $ltr);
+        #$self->add('genes_by_exon_id', $type, $exon->dbID, $lgene);
+        $self->add_list('transcripts_by_exon_id', $type, $exon->dbID, $ltr);
 
         undef $exon;
       }
@@ -295,7 +304,7 @@ sub get_by_key {
 
   # transparently load cache from file unless already loaded
   unless ($self->{'instance'}->{'loaded'}->{"$name:$type"}) {
-    $self->load_and_merge($name, $type);
+    $self->read_and_merge($name, $type);
   }
 
   return $self->{'cache'}->{$name}->{$type}->{$key};
@@ -311,7 +320,7 @@ sub get_by_name {
   
   # transparently load cache from file unless already loaded
   unless ($self->{'instance'}->{'loaded'}->{"$name:$type"}) {
-    $self->load_and_merge($name, $type);
+    $self->read_and_merge($name, $type);
   }
 
   return $self->{'cache'}->{$name}->{$type} || {};
@@ -328,7 +337,7 @@ sub get_count_by_name {
   
   # transparently load cache from file unless already loaded
   unless ($self->{'instance'}->{'loaded'}->{"$name:$type"}) {
-    $self->load_and_merge($name, $type);
+    $self->read_and_merge($name, $type);
   }
 
   return scalar(keys %{ $self->get_by_name($name, $type) });
@@ -382,6 +391,7 @@ sub find_common_coord_systems {
     }
   }
   
+  return $found_highest;
 }
 
 
@@ -427,7 +437,7 @@ sub seq_regions_compatible {
   if ($equal/$s_count > 0.5 and $equal/$t_count > 0.5) {
     return(1);
   } else {
-    $self->logger->log("Only $equal seq_regions identical for ".$cs->name." ".$cs->version."\n");
+    $self->logger->info("Only $equal seq_regions identical for ".$cs->name." ".$cs->version."\n");
     return(0);
   }
   
@@ -491,10 +501,10 @@ sub cache_file_exists {
   my $cache_file = $self->cache_file($name, $type);
 
   if (-s $cache_file) {
-    $self->logger->log("Cache file found. Will read from $cache_file.\n", 3);
+    $self->logger->info("Cache file found. Will read from $cache_file.\n", 3);
     return 1;
   } else {
-    $self->logger->log("No cache file found. Will build cache from db.\n", 3);
+    $self->logger->info("No cache file found for $name/$type. Will build cache from db.\n", 3);
     return 0;
   }
 }
@@ -573,7 +583,7 @@ sub write_to_file {
   throw("You must provide a cache type.") unless $type;
 
   unless ($self->{'cache'}->{$name}->{$type}) {
-    $self->logger->log_warning("No features found in $name/$type. Won't write cache file.\n");
+    $self->logger->warning("No features found in $name/$type. Won't write cache file.\n");
     return;
   }
 
@@ -639,13 +649,13 @@ sub read_from_file {
     throw("No valid cache file found at $cache_file.");
   }
 
-  #$self->logger->log_stamped("Reading cache from file...\n");
-  #$self->logger->log("Cache file $cache_file.\n", 1);
+  #$self->logger->info("Reading cache from file...\n", 0, 'stamped');
+  #$self->logger->info("Cache file $cache_file.\n", 1);
   eval { $self->{'cache'}->{$name}->{$type} = retrieve($cache_file); };
   if ($@) {
     throw("Unable to retrieve cache: $@");
   }
-  #$self->logger->log_stamped("Done.\n");
+  #$self->logger->info("Done.\n", 0, 'stamped');
 
   return $self->{'cache'}->{$name}->{$type};
 }
@@ -662,7 +672,7 @@ sub merge {
     
     foreach my $key (keys %{ $self->{'cache'}->{$name}->{$type} || {} }) {
       if (defined $self->{'cache'}->{$name}->{$merged_type}->{$key}) {
-        warning("Duplicate key in cache: $name|$merged_type|$key. Skipping.\n");
+        # warning("Duplicate key in cache: $name|$merged_type|$key. Skipping.\n");
       } else {
         $self->{'cache'}->{$name}->{$merged_type}->{$key} =
           $self->{'cache'}->{$name}->{$type}->{$key};
@@ -715,7 +725,11 @@ sub slice_names {
     
   } elsif ($self->conf->param('region')) {
     # filter by region (specific slice)
-    my $slice = $sa->fetch_by_name($self->conf->param('region'));
+    # don't use SliceAdaptor->fetch_by_name() since this will fail if assembly
+    # versions are different for source and target db
+    my ($cs, $version, $name, $start, $end, $strand) =
+      split(/:/, $self->conf->param('region'));
+    my $slice = $sa->fetch_by_region($cs, $name, $start, $end);
     push @slice_names, $slice->name;
 
   } else {
diff --git a/modules/Bio/EnsEMBL/IdMapping/ExonScoreBuilder.pm b/modules/Bio/EnsEMBL/IdMapping/ExonScoreBuilder.pm
index 1e3e415167..8eee9a73bb 100644
--- a/modules/Bio/EnsEMBL/IdMapping/ExonScoreBuilder.pm
+++ b/modules/Bio/EnsEMBL/IdMapping/ExonScoreBuilder.pm
@@ -52,28 +52,28 @@ use Bio::EnsEMBL::IdMapping::ScoredMappingMatrix;
 sub score_exons {
   my $self = shift;
 
-  $self->logger->log_stamped("Starting exon scoring...\n\n");
+  $self->logger->info("Starting exon scoring...\n\n", 0, 'stamped');
 
   # score using overlaps, then exonerate
   my $matrix = $self->overlap_score;
   my $exonerate_matrix = $self->exonerate_score($matrix);
 
   # log stats before matrix merging
-  $self->logger->log("\nOverlap scoring matrix:\n");
+  $self->logger->info("\nOverlap scoring matrix:\n");
   $self->log_matrix_stats($matrix);
-  $self->logger->log("\nExonerate scoring matrix:\n");
+  $self->logger->info("\nExonerate scoring matrix:\n");
   $self->log_matrix_stats($exonerate_matrix);
   
   # merge matrices
-  $self->logger->log_stamped("\nMerging scoring matrices...\n");
+  $self->logger->info("\nMerging scoring matrices...\n", 0, 'stamped');
   $matrix->merge($exonerate_matrix);
-  $self->logger->log_stamped("Done.\n\n");
+  $self->logger->info("Done.\n\n", 0, 'stamped');
 
   # log stats of combined matrix
-  $self->logger->log("Combined scoring matrix:\n");
+  $self->logger->info("Combined scoring matrix:\n");
   $self->log_matrix_stats($matrix);
   
-  $self->logger->log_stamped("\nDone with exon scoring.\n\n");
+  $self->logger->info("\nDone with exon scoring.\n\n", 0, 'stamped');
 
   return $matrix;
 }
@@ -95,20 +95,20 @@ sub overlap_score {
   if (-s $overlap_cache) {
     
     # read from file
-    $self->logger->log_stamped("Reading exon overlap scoring matrix from file...\n");
-    $self->logger->log("Cache file $overlap_cache.\n", 1);
+    $self->logger->info("Reading exon overlap scoring matrix from file...\n", 0, 'stamped');
+    $self->logger->info("Cache file $overlap_cache.\n", 1);
     $matrix->read_from_file;
-    $self->logger->log_stamped("Done.\n");
+    $self->logger->info("Done.\n", 0, 'stamped');
     
   } else {
     
     # build scoring matrix
-    $self->logger->log("No exon overlap scoring matrix found. Will build new one.\n");
+    $self->logger->info("No exon overlap scoring matrix found. Will build new one.\n");
 
     if ($self->cache->highest_common_cs) {
-      $self->logger->log_stamped("Overlap scoring...\n");
+      $self->logger->info("Overlap scoring...\n", 0, 'stamped');
       $matrix = $self->build_overlap_scores($matrix);
-      $self->logger->log_stamped("Done.\n");
+      $self->logger->info("Done.\n", 0, 'stamped');
     }
 
     # write scoring matrix to file
@@ -142,15 +142,15 @@ sub exonerate_score {
   if (-s $exonerate_cache) {
 
     # read from file
-    $self->logger->log_stamped("Reading exonerate matrix from file...\n");
-    $self->logger->log("Cache file $exonerate_cache.\n", 1);
+    $self->logger->info("Reading exonerate matrix from file...\n", 0, 'stamped');
+    $self->logger->info("Cache file $exonerate_cache.\n", 1);
     $exonerate_matrix->read_from_file;
-    $self->logger->log_stamped("Done.\n");
+    $self->logger->info("Done.\n", 0, 'stamped');
 
   } else {
 
     # build scoring matrix
-    $self->logger->log("No exonerate matrix found. Will build new one.\n");
+    $self->logger->info("No exonerate matrix found. Will build new one.\n");
 
     # dump exons to fasta files
     my $dump_count = $self->dump_filtered_exons($matrix);
@@ -164,7 +164,7 @@ sub exonerate_score {
 
     } else {
 
-      $self->logger->log("No source and/or target exons dumped, so don't need to run exonerate.\n");
+      $self->logger->info("No source and/or target exons dumped, so don't need to run exonerate.\n");
 
     }
     
@@ -193,7 +193,7 @@ sub build_overlap_scores {
   }
 
   # get sorted list of exon containers
-  $self->logger->log_stamped("Reading sorted exons from cache...\n", 1);
+  $self->logger->info("Reading sorted exons from cache...\n", 1, 'stamped');
 
   my @source_exons = $self->sort_exons(
     [values %{ $self->cache->get_by_name('exons_by_id', 'source') }]
@@ -202,7 +202,7 @@ sub build_overlap_scores {
     [values %{ $self->cache->get_by_name('exons_by_id', 'target') }]
   );
 
-  $self->logger->log_stamped("Done.\n", 1);
+  $self->logger->info("Done.\n", 1, 'stamped');
 
   # get first source and target exon container
   my $source_ec = shift(@source_exons);
@@ -211,7 +211,7 @@ sub build_overlap_scores {
   my %source_overlap = ();
   my %target_overlap = ();
   
-  $self->logger->log_stamped("Scoring...\n", 1);
+  $self->logger->info("Scoring...\n", 1, 'stamped');
 
   while ($source_ec or $target_ec) {
 
@@ -243,7 +243,8 @@ sub build_overlap_scores {
           next if (defined($matrix->get_score(
             $source_ec->[0]->id, $target_exon->id)));
 
-          $self->overlap_score($source_ec->[0], $target_exon, $matrix);
+          $self->calc_overlap_score($source_ec->[0], $target_exon,
+            $matrix);
         }
       }
 
@@ -265,7 +266,7 @@ sub build_overlap_scores {
           next if (defined($matrix->get_score(
             $source_exon->id, $target_ec->[0]->id)));
 
-          $self->overlap_score($source_exon, $target_ec->[0], $matrix);
+          $self->calc_overlap_score($source_exon, $target_ec->[0], $matrix);
         }
       }
 
@@ -274,7 +275,7 @@ sub build_overlap_scores {
     }
   }
 
-  $self->logger->log_stamped("Done.\n", 1);
+  $self->logger->info("Done.\n", 1, 'stamped');
 
   return $matrix;
 }
@@ -312,7 +313,7 @@ sub compare_exon_containers {
 # region by exons sizes. 1.0 is full overlap on both exons. Score of at least
 # 0,5 are added to the exon scoring matrix.
 #
-sub overlap_score {
+sub calc_overlap_score {
   my $self = shift;
   my $source_exon = shift;
   my $target_exon = shift;
@@ -361,8 +362,8 @@ sub run_exonerate {
 
   my $source_file = $self->exon_fasta_file('source');
   my $target_file = $self->exon_fasta_file('target');
-  my $source_size = -s $source_size;
-  my $target_size = -s $target_size;
+  my $source_size = -s $source_file;
+  my $target_size = -s $target_file;
 
   # check if fasta files exist and are not empty
   unless ($source_size and $target_size) {
@@ -373,21 +374,21 @@ sub run_exonerate {
   my $logpath = ($self->conf->param('logpath')||$self->cache->dump_path).
     '/lsf_exonerate';
   system("rm -rf $logpath") == 0 or
-    $self->logger->log_error("Unable to delete lsf log dir $logpath: $!\n");
+    $self->logger->error("Unable to delete lsf log dir $logpath: $!\n");
   system("mkdir -p $logpath") == 0 or
-    $self->logger->log_error("Can't create lsf log dir $logpath: $!\n");
+    $self->logger->error("Can't create lsf log dir $logpath: $!\n");
 
   # delete exonerate output from previous runs
   my $dumppath = $self->cache->dump_path;
 
   opendir(DUMPDIR, $dumppath) or
-    $self->logger->log_error("Can't open $dumppath for reading: $!");
+    $self->logger->error("Can't open $dumppath for reading: $!");
 
   while (defined(my $file = readdir(DUMPDIR))) {
     next unless /exonerate_map\.\d+/;
 
     unlink("$dumppath/$file") or
-      $self->logger->log_error("Can't delete $dumppath/$file: $!");
+      $self->logger->error("Can't delete $dumppath/$file: $!");
   }
   
   closedir(DUMPDIR);
@@ -404,29 +405,37 @@ sub run_exonerate {
   #
   # run exonerate jobs using lsf
   #
-  my $exonerate_job = "bsub -J $lsf_name[1-$num_jobs] " .
-    " -o $logpath/exonerate.%I.out -e $logpath/exonerate.%I.err" .
-    " -q normal -m bc_hosts " .
-    " $exonerate_path $source_file $target_file" .
-    " --querychunkid \$LSB_JOBINDEX --querychunktotal $num_jobs" .
-    " --model affine:local -M 900 --showalignment FALSE --subopt no" .
-    " --percent $percent --ryo \"myinfo: \%qi \%ti \%et \%ql \%tl\\n\"" .
-    " | grep '^myinfo:' > $dumppath/exonerate_map.\$LSB_JOBINDEX";
-
-  $self->logger->log("Submitting $num_jobs exonerate jobs to lsf:\n\n");
-  $self->logger->log("$exonerate_job\n\n");
+  local *BSUB;
+  open BSUB, "|bsub -J$lsf_name\[1-$num_jobs\] -o $logpath/exonerate.\%I.out"
+    or $self->logger->error("Could not open open pipe to bsub: $!\n");
+
+  my $exonerate_job = qq{$exonerate_path } .
+    qq{--query $source_file --target $target_file } .
+    q{--querychunkid $LSB_JOBINDEX } .
+    qq{--querychunktotal $num_jobs } .
+    q{--model affine:local -M 900 --showalignment FALSE --subopt no } .
+    qq{--percent $percent } .
+    q{--ryo 'myinfo: %qi %ti %et %ql %tl\n' } .
+    qq{| grep '^myinfo:' > $dumppath/exonerate_map.\$LSB_JOBINDEX} . "\n";
+  
+  $self->logger->info("Submitting $num_jobs exonerate jobs to lsf:\n\n");
+  $self->logger->info("$exonerate_job\n\n");
 
-  system("$exonerate_job") == 0
-    or $self->logger->log_error("Error submitting exonerate jobs: $!");
+  print BSUB $exonerate_job;
+  $self->logger->error("Error submitting exonerate jobs: $!\n")
+    unless ($? == 0); 
+  close BSUB;
 
   # submit depended job to monitor finishing of exonerate jobs
-  $self->logger->log_stamped("Waiting for exonerate jobs to finish...\n");
+  $self->logger->info("Waiting for exonerate jobs to finish...\n", 0, 'stamped');
 
-  my $depended_job = "bsub -K -w ended($lsf_name) -q small " .
-    " -o $logpath/exonerate_depend.out -e $logpath/exonerate_depend.err" .
-    " /bin/true";
+  my $dependent_job = qq{bsub -K -w "ended($lsf_name)" -q small } .
+    qq{-o $logpath/exonerate_depend.out /bin/true};
 
-  $self->logger->log_stamepd("All exonerate jobs finished.\n");
+  system($dependent_job) == 0 or
+    $self->logger->error("Error submitting dependent job: $!\n");
+
+  $self->logger->info("All exonerate jobs finished.\n", 0, 'stamped');
 
   #
   # check results
@@ -446,18 +455,18 @@ sub run_exonerate {
   }
 
   if (@missing) {
-    $self->logger->log("Couldn't find all exonerate output files. These are missing:\n");
+    $self->logger->info("Couldn't find all exonerate output files. These are missing:\n");
     foreach (@missing) {
-      $self->logger->log("$_\n", 1);
+      $self->logger->info("$_\n", 1);
     }
 
     exit(1);
   }
 
   if (@error) {
-    $self->logger->log("One or more exonerate jobs failed. Check these error files:\n");
+    $self->logger->info("One or more exonerate jobs failed. Check these error files:\n");
     foreach (@error) {
-      $self->logger->log("$_\n", 1);
+      $self->logger->info("$_\n", 1);
     }
 
     exit(1);
@@ -506,7 +515,7 @@ sub write_filtered_exons {
     throw('You must provide a ScoredMappingMatrix.');
   }
 
-  $self->logger->log_stamped("\nDumping $type exons to fasta file...\n");
+  $self->logger->info("\nDumping $type exons to fasta file...\n", 0, 'stamped');
 
   # don't dump exons shorter than this
   my $min_exon_length = $self->conf->param('min_exon_length') || 15;
@@ -552,11 +561,11 @@ sub write_filtered_exons {
 
   # log
   my $fmt = "%-30s%10s\n";
-  my $size = -e $file;
-  $self->logger->log(sprintf($fmt, 'Total exons:', $total_exons), 1);
-  $self->logger->log(sprintf($fmt, 'Dumped exons:', $dumped_exons), 1);
-  $self->logger->log(sprintf($fmt, 'Dump file size:', parse_bytes($size)), 1);
-  $self->logger->log_stamped("Done.\n\n");
+  my $size = -s $file;
+  $self->logger->info(sprintf($fmt, 'Total exons:', $total_exons), 1);
+  $self->logger->info(sprintf($fmt, 'Dumped exons:', $dumped_exons), 1);
+  $self->logger->info(sprintf($fmt, 'Dump file size:', parse_bytes($size)), 1);
+  $self->logger->info("Done.\n\n", 0, 'stamped');
 
   return $dumped_exons;
 }
@@ -571,21 +580,20 @@ sub parse_exonerate_results {
     throw('You must provide a ScoredMappingMatrix.');
   }
 
-  $self->logger->log_stamped("Parsing exonerate results...\n");
+  $self->logger->info("Parsing exonerate results...\n", 0, 'stamped');
 
   # loop over all result files
   my $dumppath = $self->cache->dump_path;
   my $num_files = 0;
+  my $num_lines = 0;
 
   opendir(DUMPDIR, $dumppath) or
-    $self->logger->log_error("Can't open $dumppath for reading: $!");
+    $self->logger->error("Can't open $dumppath for reading: $!");
 
   while (defined(my $file = readdir(DUMPDIR))) {
-    next unless /exonerate_map\.\d+/;
+    next unless $file =~ /exonerate_map\.\d+/;
 
-    # counters
     $num_files++;
-    my $num_lines = 0;
 
     open(F, '<', "$dumppath/$file");
 
@@ -601,7 +609,7 @@ sub parse_exonerate_results {
       my $score = 0;
 
       if ($source_length == 0 or $target_length == 0) {
-        $self->logger->log_warning("Alignment length is 0 for $source_id/$target_id.\n");
+        $self->logger->warning("Alignment length is 0 for $source_id/$target_id.\n");
       } else {
         $score = 2 * $match_length / ($source_length + $target_length);
       }
@@ -615,7 +623,7 @@ sub parse_exonerate_results {
   
   closedir(DUMPDIR);
 
-  $self->logger->log_stamped("Done parsing $num_lines lines from $num_files result files.\n");
+  $self->logger->info("Done parsing $num_lines lines from $num_files result files.\n", 0, 'stamped');
 
   return $exonerate_matrix;
 }
diff --git a/modules/Bio/EnsEMBL/IdMapping/GeneScoreBuilder.pm b/modules/Bio/EnsEMBL/IdMapping/GeneScoreBuilder.pm
index dec16faba6..f06213822d 100644
--- a/modules/Bio/EnsEMBL/IdMapping/GeneScoreBuilder.pm
+++ b/modules/Bio/EnsEMBL/IdMapping/GeneScoreBuilder.pm
@@ -52,7 +52,7 @@ sub score_genes {
     throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
   }
 
-  $self->logger->log_stamped("Starting gene scoring...\n\n");
+  $self->logger->info("Starting gene scoring...\n\n", 0, 'stamped');
 
   # build scores based on transcript scores
   my $matrix = $self->scores_from_transcript_scores($transcript_matrix);
@@ -60,23 +60,23 @@ sub score_genes {
   # log stats of combined matrix
   my $fmt = "%-40s%10.0f\n";
 
-  $self->logger->log("Scoring matrix:\n");
+  $self->logger->info("Scoring matrix:\n");
 
-  $self->logger->log(sprintf($fmt, "Total source genes:",
-    $self->cache->get_count_by_name('genes_by_id', 'source'), 1);
+  $self->logger->info(sprintf($fmt, "Total source genes:",
+    $self->cache->get_count_by_name('genes_by_id', 'source')), 1);
 
-  $self->logger->log(sprintf($fmt, "Scored source genes:",
+  $self->logger->info(sprintf($fmt, "Scored source genes:",
     $matrix->get_source_count), 1);
 
-  $self->logger->log(sprintf($fmt, "Total target genes:",
-    $self->cache->get_count_by_name('genes_by_id', 'target'), 1);
+  $self->logger->info(sprintf($fmt, "Total target genes:",
+    $self->cache->get_count_by_name('genes_by_id', 'target')), 1);
 
-  $self->logger->log(sprintf($fmt, "Scored target genes:",
+  $self->logger->info(sprintf($fmt, "Scored target genes:",
     $matrix->get_target_count), 1);
 
   $self->log_matrix_stats($matrix);
   
-  $self->logger->log("\nDone with transcript scoring.\n\n");
+  $self->logger->info("\nDone with transcript scoring.\n\n");
 
   return $matrix;
 }
@@ -101,19 +101,19 @@ sub scores_from_transcript_scores {
   if (-s $gene_cache) {
     
     # read from file
-    $self->logger->log_stamped("Reading gene scoring matrix from file...\n");
-    $self->logger->log("Cache file $gene_cache.\n", 1);
+    $self->logger->info("Reading gene scoring matrix from file...\n", 0, 'stamped');
+    $self->logger->info("Cache file $gene_cache.\n", 1);
     $matrix->read_from_file;
-    $self->logger->log_stamped("Done.\n");
+    $self->logger->info("Done.\n", 0, 'stamped');
     
   } else {
     
     # build scoring matrix
-    $self->logger->log("No gene scoring matrix found. Will build new one.\n");
+    $self->logger->info("No gene scoring matrix found. Will build new one.\n");
 
-    $self->logger->log_stamped("Transcript scoring...\n");
+    $self->logger->info("Gene scoring...\n", 0, 'stamped');
     $matrix = $self->build_scores($matrix, $transcript_matrix);
-    $self->logger->log_stamped("Done.\n");
+    $self->logger->info("Done.\n", 0, 'stamped');
 
     # write scoring matrix to file
     $matrix->write_to_file;
@@ -144,7 +144,7 @@ sub build_scores {
   $self->flag_matrix_from_transcript_scores($matrix, $transcript_matrix);
 
   # now calculate the actual scores for the genes in the flag matrix
-  $final_matrix = $self->score_matrix_from_flag_matrix($matrix,
+  my $final_matrix = $self->score_matrix_from_flag_matrix($matrix,
     $transcript_matrix);
   
   return $final_matrix;
@@ -161,7 +161,7 @@ sub flag_matrix_from_transcript_scores {
   my $num_genes =
     scalar(keys %{ $self->cache->get_by_name('genes_by_id', 'source') });
   
-  $self->logger->log("Creating flag matrix...\n", 1);
+  $self->logger->info("Creating flag matrix...\n", 1);
 
   # for every transcript scoring matrix entry, make an entry in the gene flag
   # matrix.
@@ -178,7 +178,7 @@ sub flag_matrix_from_transcript_scores {
     $matrix->add_score($source_gene->id, $target_gene->id, 1);
   }
 
-  $self->logger->log("\n\n");
+  $self->logger->info("\n\n");
 
   return $matrix;
 }
@@ -201,7 +201,7 @@ sub score_matrix_from_flag_matrix {
   my $num_genes =
     scalar(keys %{ $self->cache->get_by_name('genes_by_id', 'source') });
   
-  $self->logger->log("Creating score matrix from flag matrix...\n", 1);
+  $self->logger->info("Creating score matrix from flag matrix...\n", 1);
 
   # loop over flag matrix and do proper scoring for each entry
   foreach my $entry (@{ $flag_matrix->get_all_Entries }) {
@@ -230,7 +230,7 @@ sub score_matrix_from_flag_matrix {
     $matrix->add($entry->source, $entry->target, $score);
   }
 
-  $self->logger->log("\n\n");
+  $self->logger->info("\n\n");
 
   return $matrix;
 }
@@ -254,7 +254,7 @@ sub complex_gene_gene_score {
     @{ $target_gene->get_all_Transcripts };
     
   # loop over source transcripts
-  foreach my $source_transcript (@{ $source_gene->get_all_Transcripts ) {
+  foreach my $source_transcript (@{ $source_gene->get_all_Transcripts }) {
 
     # now loop over target transcripts and find the highest scoring target
     # transcript belonging to the target gene
@@ -262,7 +262,7 @@ sub complex_gene_gene_score {
     
     foreach my $target_transcript_id (@{ $transcript_matrix->get_targets_for_source($source_transcript->id) }) {
 
-      next unless (%target_transcripts{$target_transcript_id});
+      next unless ($target_transcripts{$target_transcript_id});
 
       my $score = $transcript_matrix->get_score(
         $source_transcript->id, $target_transcript_id);
@@ -281,7 +281,7 @@ sub complex_gene_gene_score {
     @{ $source_gene->get_all_Transcripts };
     
   # loop over target transcripts
-  foreach my $target_transcript (@{ $target_gene->get_all_Transcripts ) {
+  foreach my $target_transcript (@{ $target_gene->get_all_Transcripts }) {
 
     # now loop over source transcripts and find the highest scoring source
     # transcript belonging to the source gene
@@ -289,7 +289,7 @@ sub complex_gene_gene_score {
     
     foreach my $source_transcript_id (@{ $transcript_matrix->get_sources_for_target($target_transcript->id) }) {
 
-      next unless (%source_transcripts{$source_transcript_id});
+      next unless ($source_transcripts{$source_transcript_id});
 
       my $score = $transcript_matrix->get_score(
         $source_transcript_id, $target_transcript->id);
@@ -306,14 +306,14 @@ sub complex_gene_gene_score {
   # calculate overall score for this gene
   my $gene_score = 0;
 
-  if (($source_gene_length + $target_gene_length) > 0) {
+  if (($source_gene->length + $target_gene->length) > 0) {
 
     $gene_score = ($source_gene_score + $target_gene_score) /
-                  ($source_gene_length + $target_gene_length);
+                  ($source_gene->length + $target_gene->length);
 
   } else {
   
-    $self->logger->log_warning("Combined length of source (".$source_gene->id.") and target (".$target_gene->id.") gene is zero!\n", 1);
+    $self->logger->warning("Combined length of source (".$source_gene->id.") and target (".$target_gene->id.") gene is zero!\n", 1);
   
   }
 
@@ -326,7 +326,7 @@ sub complex_gene_gene_score {
 # This is used when the more elaborate gene representing score does not
 # distinguish very well.
 #
-sub complex_gene_gene_score {
+sub simple_gene_gene_score {
   my $self = shift;
   my $source_gene = shift;
   my $target_gene = shift;
@@ -334,8 +334,8 @@ sub complex_gene_gene_score {
 
   my $gene_score = 0;
 
-  foreach my $source_transcript (@{ $source_gene->get_all_Transcripts ) {
-    foreach my $target_transcript (@{ $target_gene->get_all_Transcripts ) {
+  foreach my $source_transcript (@{ $source_gene->get_all_Transcripts }) {
+    foreach my $target_transcript (@{ $target_gene->get_all_Transcripts }) {
       
       my $score = $transcript_matrix->get_score($source_transcript->id,
         $target_transcript->id);
diff --git a/modules/Bio/EnsEMBL/IdMapping/ScoreBuilder.pm b/modules/Bio/EnsEMBL/IdMapping/ScoreBuilder.pm
index 15aaf86661..4f6403e68e 100644
--- a/modules/Bio/EnsEMBL/IdMapping/ScoreBuilder.pm
+++ b/modules/Bio/EnsEMBL/IdMapping/ScoreBuilder.pm
@@ -111,15 +111,15 @@ sub log_matrix_stats {
   my $fmt1 = "%-40s%10.0f\n";
   my $fmt2 = "%-40s%10.2f\n";
   
-  $self->logger->log(sprintf($fmt1, "Scoring matrix entries:",
+  $self->logger->info(sprintf($fmt1, "Scoring matrix entries:",
     $matrix->get_entry_count), 1);
   
-  $self->logger->log(sprintf($fmt2, "Average score:",
+  $self->logger->info(sprintf($fmt2, "Average score:",
     $matrix->get_average_score), 1);
   
   my ($min, $max) = @{ $matrix->get_min_max_scores };
-  $self->logger->log(sprintf($fmt2, "Min. score:", $min), 1);
-  $self->logger->log(sprintf($fmt2, "Max. score:", $max), 1);
+  $self->logger->info(sprintf($fmt2, "Min. score:", $min), 1);
+  $self->logger->info(sprintf($fmt2, "Max. score:", $max), 1);
 }
 
 
diff --git a/modules/Bio/EnsEMBL/IdMapping/ScoredMappingMatrix.pm b/modules/Bio/EnsEMBL/IdMapping/ScoredMappingMatrix.pm
index e06371e7aa..4ff78b6ad3 100644
--- a/modules/Bio/EnsEMBL/IdMapping/ScoredMappingMatrix.pm
+++ b/modules/Bio/EnsEMBL/IdMapping/ScoredMappingMatrix.pm
@@ -156,27 +156,32 @@ sub get_all_Entries {
 
 
 sub get_all_sources {
-  return [keys %{ $_->{'source_list'} }];
+  my $self = shift;
+  return [keys %{ $self->{'cache'}->{'source_list'} }];
 }
 
 
 sub get_all_targets {
-  return [keys %{ $_->{'target_list'} }];
+  my $self = shift;
+  return [keys %{ $self->{'cache'}->{'target_list'} }];
 }
 
 
 sub get_entry_count {
-  return scalar(keys %{ $_->{'matrix'} });
+  my $self = shift;
+  return scalar(keys %{ $self->{'cache'}->{'matrix'} });
 }
 
 
 sub get_source_count {
-  return scalar(keys %{ $_->{'source_list'} });
+  my $self = shift;
+  return scalar(keys %{ $self->{'cache'}->{'source_list'} });
 }
 
 
 sub get_target_count {
-  return scalar(keys %{ $_->{'target_list'} });
+  my $self = shift;
+  return scalar(keys %{ $self->{'cache'}->{'target_list'} });
 }
 
 
@@ -228,10 +233,10 @@ sub merge {
 
   my $c = 0;
 
-  foreach my $key (keys %{ $matrix->{'matrix'} }) {
+  foreach my $key (keys %{ $matrix->{'cache'}->{'matrix'} }) {
     if (!defined($self->{'cache'}->{'matrix'}->{$key}) or
-        $self->{'cache'}->{'matrix'}->{$key} < $matrix->{'matrix'}->{$key}) {
-      $self->{'cache'}->{'matrix'}->{$key} = $matrix->{'matrix'}->{$key};
+        $self->{'cache'}->{'matrix'}->{$key} < $matrix->{'cache'}->{'matrix'}->{$key}) {
+      $self->{'cache'}->{'matrix'}->{$key} = $matrix->{'cache'}->{'matrix'}->{$key};
       $c++;
     }
   }
diff --git a/modules/Bio/EnsEMBL/IdMapping/TranscriptScoreBuilder.pm b/modules/Bio/EnsEMBL/IdMapping/TranscriptScoreBuilder.pm
index 815a8c64b1..354c07a988 100644
--- a/modules/Bio/EnsEMBL/IdMapping/TranscriptScoreBuilder.pm
+++ b/modules/Bio/EnsEMBL/IdMapping/TranscriptScoreBuilder.pm
@@ -52,7 +52,7 @@ sub score_transcripts {
     throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
   }
 
-  $self->logger->log_stamped("Starting transcript scoring...\n\n");
+  $self->logger->info("Starting transcript scoring...\n\n", 0, 'stamped');
 
   # build scores based on exon scores
   my $matrix = $self->scores_from_exon_scores($exon_matrix);
@@ -60,23 +60,23 @@ sub score_transcripts {
   # log stats of combined matrix
   my $fmt = "%-40s%10.0f\n";
 
-  $self->logger->log("Scoring matrix:\n");
+  $self->logger->info("Scoring matrix:\n");
 
-  $self->logger->log(sprintf($fmt, "Total source transcripts:",
-    $self->cache->get_count_by_name('transcripts_by_id', 'source'), 1);
+  $self->logger->info(sprintf($fmt, "Total source transcripts:",
+    $self->cache->get_count_by_name('transcripts_by_id', 'source')), 1);
 
-  $self->logger->log(sprintf($fmt, "Scored source transcripts:",
+  $self->logger->info(sprintf($fmt, "Scored source transcripts:",
     $matrix->get_source_count), 1);
 
-  $self->logger->log(sprintf($fmt, "Total target transcripts:",
-    $self->cache->get_count_by_name('transcripts_by_id', 'target'), 1);
+  $self->logger->info(sprintf($fmt, "Total target transcripts:",
+    $self->cache->get_count_by_name('transcripts_by_id', 'target')), 1);
 
-  $self->logger->log(sprintf($fmt, "Scored target transcripts:",
+  $self->logger->info(sprintf($fmt, "Scored target transcripts:",
     $matrix->get_target_count), 1);
 
   $self->log_matrix_stats($matrix);
   
-  $self->logger->log("\nDone with transcript scoring.\n\n");
+  $self->logger->info("\nDone with transcript scoring.\n\n");
 
   return $matrix;
 }
@@ -101,19 +101,19 @@ sub scores_from_exon_scores {
   if (-s $transcript_cache) {
     
     # read from file
-    $self->logger->log_stamped("Reading transcript scoring matrix from file...\n");
-    $self->logger->log("Cache file $transcript_cache.\n", 1);
+    $self->logger->info("Reading transcript scoring matrix from file...\n", 0, 'stamped');
+    $self->logger->info("Cache file $transcript_cache.\n", 1);
     $matrix->read_from_file;
-    $self->logger->log_stamped("Done.\n");
+    $self->logger->info("Done.\n", 0, 'stamped');
     
   } else {
     
     # build scoring matrix
-    $self->logger->log("No transcript scoring matrix found. Will build new one.\n");
+    $self->logger->info("No transcript scoring matrix found. Will build new one.\n");
 
-    $self->logger->log_stamped("Transcript scoring...\n");
+    $self->logger->info("Transcript scoring...\n", 0, 'stamped');
     $matrix = $self->build_scores($matrix, $exon_matrix);
-    $self->logger->log_stamped("Done.\n");
+    $self->logger->info("Done.\n", 0, 'stamped');
 
     # write scoring matrix to file
     $matrix->write_to_file;
@@ -144,7 +144,8 @@ sub build_scores {
   $self->flag_matrix_from_exon_scores($matrix, $exon_matrix);
 
   # now calculate the actual scores for the transcripts in the flag matrix
-  $final_matrix = $self->score_matrix_from_flag_matrix($matrix, $exon_matrix);
+  my $final_matrix =
+    $self->score_matrix_from_flag_matrix($matrix, $exon_matrix);
   
   return $final_matrix;
 }
@@ -160,7 +161,7 @@ sub flag_matrix_from_exon_scores {
   my $num_transcripts =
     scalar(keys %{ $self->cache->get_by_name('transcripts_by_id', 'source') });
 
-  $self->logger->log("Creating flag matrix...\n", 1);
+  $self->logger->info("Creating flag matrix...\n", 1);
 
   # loop over source transcripts
   foreach my $source_transcript (values %{ $self->cache->get_by_name('transcripts_by_id', 'source') }) {
@@ -169,7 +170,7 @@ sub flag_matrix_from_exon_scores {
     $self->logger->log_progress($num_transcripts, ++$i, 20, 1, 0);
 
     # get all exons for the source transcript
-    foreach my $source_exon (@{ $souce_transcript->get_all_Exons }) {
+    foreach my $source_exon (@{ $source_transcript->get_all_Exons }) {
 
       # get target exons for this source exon from scoring matrix
       foreach my $target_exon_id (@{ $exon_matrix->get_targets_for_source($source_exon->id) }) {
@@ -178,14 +179,14 @@ sub flag_matrix_from_exon_scores {
         foreach my $target_transcript (@{ $self->cache->get_by_key('transcripts_by_exon_id', 'target', $target_exon_id) }) {
           
           # add scoring flag for these two transcripts
-          $matrix->add_score($source_transcript->id, $target_transcript->id, 1)Ã;
+          $matrix->add_score($source_transcript->id, $target_transcript->id, 1);
           
         }
       }
     }
   }
 
-  $self->logger->log("\n\n");
+  $self->logger->info("\n\n");
 
   return $matrix;
 }
@@ -210,7 +211,7 @@ sub score_matrix_from_flag_matrix {
   my $num_transcripts =
     scalar(keys %{ $self->cache->get_by_name('transcripts_by_id', 'source') });
 
-  $self->logger->log("Creating score matrix from flag matrix...\n", 1);
+  $self->logger->info("Creating score matrix from flag matrix...\n", 1);
   
   # loop over source transcripts
   foreach my $source_transcript (values %{ $self->cache->get_by_name('transcripts_by_id', 'source') }) {
@@ -247,7 +248,7 @@ sub score_matrix_from_flag_matrix {
 
         foreach my $target_exon_id (@{ $exon_matrix->get_targets_for_source($source_exon->id) }) {
 
-          next unless (%target_exon{$target_exon_id});
+          next unless ($target_exons{$target_exon_id});
 
           my $score = $exon_matrix->get_score(
             $source_exon->id, $target_exon_id);
@@ -266,7 +267,7 @@ sub score_matrix_from_flag_matrix {
 
         foreach my $source_exon_id (@{ $exon_matrix->get_sources_for_target($target_exon->id) }) {
 
-          next unless (%source_exon{$source_exon_id});
+          next unless ($source_exons{$source_exon_id});
 
           my $score = $exon_matrix->get_score(
             $source_exon_id, $target_exon->id);
@@ -287,7 +288,7 @@ sub score_matrix_from_flag_matrix {
         if (($source_transcript_score > $source_transcript_length) or
             ($target_transcript_score > $target_transcript_length)) {
 
-          $self->logger->log_warning("Score > length for source ($source_target_score <> $source_transcript_length) or target ($target_transcript_score <> $target_transcript_length).\n", 1);
+          $self->logger->warning("Score > length for source ($source_transcript_score <> $source_transcript_length) or target ($target_transcript_score <> $target_transcript_length).\n", 1);
 
         } else {
           
@@ -305,14 +306,14 @@ sub score_matrix_from_flag_matrix {
       
       } else {
       
-        $self->logger->log_warning("Combined length of source (".$source_transcript->id.") and target (".$target_transcript->id.") transcript is zero!\n", 1);
+        $self->logger->warning("Combined length of source (".$source_transcript->id.") and target (".$target_transcript->id.") transcript is zero!\n", 1);
       
       }
 
     }
   }
 
-  $self->logger->log("\n\n");
+  $self->logger->info("\n\n");
 
   return $matrix;
     
-- 
GitLab