From 2a60002d1f934b895a73f7979b3e851d5b707e2f Mon Sep 17 00:00:00 2001 From: Patrick Meidl <pm2@sanger.ac.uk> Date: Tue, 22 Apr 2008 15:05:51 +0000 Subject: [PATCH] added --cache_method=build_cache_auto implementation --- misc-scripts/id_mapping/default.conf | 9 +++--- misc-scripts/id_mapping/dump_cache.pl | 40 +++++++++++++++++++++++++-- misc-scripts/id_mapping/run.pl | 2 ++ 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/misc-scripts/id_mapping/default.conf b/misc-scripts/id_mapping/default.conf index 50e043ee4d..4770785edf 100644 --- a/misc-scripts/id_mapping/default.conf +++ b/misc-scripts/id_mapping/default.conf @@ -5,7 +5,7 @@ dry_run = 0 loglevel = DEBUG ; paths -basedir = /lustre/work1/ensembl/pm2/idmapping/perl/2008-04-17 +basedir = /lustre/work1/ensembl/pm2/idmapping/perl/2008-04-22c ; prepend this path to your 'log' parameter ; will default to "$basedir/log" if not set @@ -26,14 +26,15 @@ targetdbname = pm2_pan_troglodytes_core_41_21 ; caching ;cache_method = build_cache_all -dump_cache_concurrent_jobs = 200 +build_cache_auto_threshold = 100 +build_cache_concurrent_jobs = 200 ; limit ;region = chromosome:CHIMP1A:1:1:2000000:1 -;chromosomes = 22 +;chromosomes = 21,22 ; LSF parameters -lsf_opt_run = "-M3500000 -R'select[type==X86_64 && mem>3500],rusage[mem=3500]'" +lsf_opt_run = "-M7500000 -R'select[type==X86_64 && mem>7500],rusage[mem=7500]'" ;lsf_opt_dump_cache = "-M1700000 -R'select[type==X86_64 && mem>1700],rusage[mem=1700]'" ; ScoreBuilder diff --git a/misc-scripts/id_mapping/dump_cache.pl b/misc-scripts/id_mapping/dump_cache.pl index a66a9e5a6f..17e0cf24a2 100755 --- a/misc-scripts/id_mapping/dump_cache.pl +++ b/misc-scripts/id_mapping/dump_cache.pl @@ -91,6 +91,8 @@ $conf->parse_options( 'biotypes=s@' => 0, 'lsf_opt_dump_cache|lsfoptdumpcache=s' => 0, 'cache_method=s' => 0, + 'build_cache_auto_threshold=n' => 0, + 'build_cache_concurrent_jobs=n' => 0, ); # set default logpath @@ -116,7 +118,7 @@ $logger->init_log($conf->list_param_values); # determin cache method to use. # this can be used to support different caching strategies or access to old # database schemas. -my $cache_method = $conf->param('cache_method') || 'build_cache_by_seq_region'; +my $cache_method = $conf->param('cache_method') || 'build_cache_auto'; no strict 'refs'; my $retval = &$cache_method; @@ -129,6 +131,40 @@ exit($retval); ### END main ### +sub build_cache_auto { + # load the cache implementation + my $cache_impl = 'Bio::EnsEMBL::IdMapping::Cache'; + dynamic_use($cache_impl); + + my $cache = $cache_impl->new( + -LOGGER => $logger, + -CONF => $conf, + ); + + $logger->debug("\nChecking number of toplevel seq_regions...\n"); + my $max = 0; + + foreach my $dbtype (qw(source target)) { + my $num = scalar(@{ $cache->slice_names($dbtype) }); + $max = $num if ($num > $max); + $logger->debug("$dbtype: $num.\n", 1); + } + + my $threshold = $conf->param('build_cache_auto_threshold') || 100; + my $retval; + + if ($max > $threshold) { + $logger->debug("\nWill use build_cache_all.\n"); + $retval = &build_cache_all; + } else { + $logger->debug("\nWill use build_cache_by_seq_region.\n"); + $retval = &build_cache_by_seq_region; + } + + return $retval; +} + + sub build_cache_by_seq_region { my %jobs = (); @@ -178,7 +214,7 @@ sub build_cache_by_seq_region { # build lsf command my $lsf_name = 'dump_by_seq_region_'.time; - my $concurrent = $conf->param('dump_cache_concurrent_jobs') || 200; + my $concurrent = $conf->param('build_cache_concurrent_jobs') || 200; my $options = $conf->create_commandline_options( logauto => 1, diff --git a/misc-scripts/id_mapping/run.pl b/misc-scripts/id_mapping/run.pl index 8eb5198ffe..09eae8903d 100755 --- a/misc-scripts/id_mapping/run.pl +++ b/misc-scripts/id_mapping/run.pl @@ -88,6 +88,8 @@ $conf->parse_options( 'region=s' => 0, 'biotypes=s@' => 0, 'cache_method=s' => 0, + 'build_cache_auto_threshold=n' => 0, + 'build_cache_concurrent_jobs=n' => 0, 'min_exon_length|minexonlength=i' => 0, 'exonerate_path|exoneratepath=s' => 1, 'exonerate_threshold|exoneratethreshold=f' => 0, -- GitLab