From 2a60002d1f934b895a73f7979b3e851d5b707e2f Mon Sep 17 00:00:00 2001
From: Patrick Meidl <pm2@sanger.ac.uk>
Date: Tue, 22 Apr 2008 15:05:51 +0000
Subject: [PATCH] added --cache_method=build_cache_auto implementation

---
 misc-scripts/id_mapping/default.conf  |  9 +++---
 misc-scripts/id_mapping/dump_cache.pl | 40 +++++++++++++++++++++++++--
 misc-scripts/id_mapping/run.pl        |  2 ++
 3 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/misc-scripts/id_mapping/default.conf b/misc-scripts/id_mapping/default.conf
index 50e043ee4d..4770785edf 100644
--- a/misc-scripts/id_mapping/default.conf
+++ b/misc-scripts/id_mapping/default.conf
@@ -5,7 +5,7 @@ dry_run = 0
 loglevel = DEBUG
 
 ; paths
-basedir = /lustre/work1/ensembl/pm2/idmapping/perl/2008-04-17
+basedir = /lustre/work1/ensembl/pm2/idmapping/perl/2008-04-22c
 
 ; prepend this path to your 'log' parameter
 ; will default to "$basedir/log" if not set
@@ -26,14 +26,15 @@ targetdbname                = pm2_pan_troglodytes_core_41_21
 
 ; caching
 ;cache_method                = build_cache_all
-dump_cache_concurrent_jobs  = 200
+build_cache_auto_threshold  = 100
+build_cache_concurrent_jobs  = 200
 
 ; limit
 ;region                     = chromosome:CHIMP1A:1:1:2000000:1
-;chromosomes                = 22
+;chromosomes                = 21,22
 
 ; LSF parameters
-lsf_opt_run                 = "-M3500000 -R'select[type==X86_64 && mem>3500],rusage[mem=3500]'"
+lsf_opt_run                 = "-M7500000 -R'select[type==X86_64 && mem>7500],rusage[mem=7500]'"
 ;lsf_opt_dump_cache          = "-M1700000 -R'select[type==X86_64 && mem>1700],rusage[mem=1700]'"
 
 ; ScoreBuilder
diff --git a/misc-scripts/id_mapping/dump_cache.pl b/misc-scripts/id_mapping/dump_cache.pl
index a66a9e5a6f..17e0cf24a2 100755
--- a/misc-scripts/id_mapping/dump_cache.pl
+++ b/misc-scripts/id_mapping/dump_cache.pl
@@ -91,6 +91,8 @@ $conf->parse_options(
   'biotypes=s@' => 0,
   'lsf_opt_dump_cache|lsfoptdumpcache=s' => 0,
   'cache_method=s' => 0,
+  'build_cache_auto_threshold=n' => 0,
+  'build_cache_concurrent_jobs=n' => 0,
 );
 
 # set default logpath
@@ -116,7 +118,7 @@ $logger->init_log($conf->list_param_values);
 # determin cache method to use.
 # this can be used to support different caching strategies or access to old
 # database schemas.
-my $cache_method = $conf->param('cache_method') || 'build_cache_by_seq_region';
+my $cache_method = $conf->param('cache_method') || 'build_cache_auto';
 no strict 'refs';
 my $retval = &$cache_method;
 
@@ -129,6 +131,40 @@ exit($retval);
 ### END main ###
 
 
+sub build_cache_auto {
+  # load the cache implementation
+  my $cache_impl = 'Bio::EnsEMBL::IdMapping::Cache';
+  dynamic_use($cache_impl);
+
+  my $cache = $cache_impl->new(
+    -LOGGER       => $logger,
+    -CONF         => $conf,
+  );
+
+  $logger->debug("\nChecking number of toplevel seq_regions...\n");
+  my $max = 0;
+
+  foreach my $dbtype (qw(source target)) {
+    my $num = scalar(@{ $cache->slice_names($dbtype) });
+    $max = $num if ($num > $max);
+    $logger->debug("$dbtype: $num.\n", 1);
+  }
+
+  my $threshold = $conf->param('build_cache_auto_threshold') || 100;
+  my $retval;
+
+  if ($max > $threshold) {
+    $logger->debug("\nWill use build_cache_all.\n");
+    $retval = &build_cache_all;
+  } else {
+    $logger->debug("\nWill use build_cache_by_seq_region.\n");
+    $retval = &build_cache_by_seq_region;
+  }
+
+  return $retval;
+}
+
+
 sub build_cache_by_seq_region {
 
   my %jobs = ();
@@ -178,7 +214,7 @@ sub build_cache_by_seq_region {
 
     # build lsf command
     my $lsf_name = 'dump_by_seq_region_'.time;
-    my $concurrent = $conf->param('dump_cache_concurrent_jobs') || 200;
+    my $concurrent = $conf->param('build_cache_concurrent_jobs') || 200;
 
     my $options = $conf->create_commandline_options(
         logauto       => 1,
diff --git a/misc-scripts/id_mapping/run.pl b/misc-scripts/id_mapping/run.pl
index 8eb5198ffe..09eae8903d 100755
--- a/misc-scripts/id_mapping/run.pl
+++ b/misc-scripts/id_mapping/run.pl
@@ -88,6 +88,8 @@ $conf->parse_options(
   'region=s' => 0,
   'biotypes=s@' => 0,
   'cache_method=s' => 0,
+  'build_cache_auto_threshold=n' => 0,
+  'build_cache_concurrent_jobs=n' => 0,
   'min_exon_length|minexonlength=i' => 0,
   'exonerate_path|exoneratepath=s' => 1,
   'exonerate_threshold|exoneratethreshold=f' => 0,
-- 
GitLab