Skip to content
Snippets Groups Projects
Commit b17d8adb authored by Patrick Meidl's avatar Patrick Meidl
Browse files

score building, first full draft

parent d2affc48
No related branches found
No related tags found
No related merge requests found
......@@ -5,22 +5,30 @@ dry_run = 1
verbose = 1
; prepend this path to your 'log' parameter
logpath = /ecs2/work2/pm2/logs/test
logpath = /ecs2/work2/pm2/logs/test_20060926
; old db settings
oldhost = ecs4
oldport = 3350
olduser = ensro
olddbname = pm2_homo_sapiens_core_39_36a
sourcehost = ecs2
sourceport = 3364
sourceuser = ensro
sourcedbname = homo_sapiens_core_41_36c
; new db settings
newhost = ecs4
newport = 3350
newuser = ensadmin
newpass = ensembl
newdbname = pm2_homo_sapiens_core_39_36a
targethost = ecs2
targetport = 3364
targetuser = ensro
targetdbname = homo_sapiens_core_41_36c
; cache
dumppath = /ecs2/scratch3/pm2/data/test
;region = chromosome:NCBI36:X:1000000:2000000:1
dumppath = /ecs2/scratch3/pm2/data/test_20060926
;region = chromosome:NCBI36:X:1:2000000:1
; ScoreBuilder
min_exon_length = 15
exonerate_path = /usr/local/ensembl/bin/exonerate-0.8.2
exonerate_threshold = 0.5
exonerate_jobs = 0
exonerate_bytes_per_job = 250000
transcript_score_threshold = 0
......@@ -149,6 +149,11 @@ my $i = 0;
my $size = 0;
($i, $size) = $cache->build_cache($dbtype, $slice_name);
# set flag to indicate everything went fine
my $success_file = $conf->param('logpath')."/lsf/dump_by_seq_region.$dbtype.$slice_name.success";
open(TMPFILE, '>', $success_file) and close TMPFILE
or die "Can't open $success_file for writing: $!";
# log success
$logger->log("Done with $dbtype $slice_name (genes: $i, filesize: $size, runtime: ".$logger->runtime." ".$logger->date_and_mem."\n");
......
......@@ -31,11 +31,11 @@ Optional arguments:
=head1 DESCRIPTION
Use --sourceschema and --targetschema to specify a schema version (default: latest).
This will be used to determine the subroutine to build the cache. By default,
&build_cache_latest() is run which uses Bio::EnsEMBL::IdMapping::Cache to read
from the database and write the cache. An alternative subroutine can use a
different module for that, which will usually inherit from the former and
Use --sourceschema and --targetschema to specify a schema version (default:
latest). This will be used to determine the subroutine to build the cache. By
default, &build_cache_latest() is run which uses Bio::EnsEMBL::IdMapping::Cache
to read from the database and write the cache. An alternative subroutine can
use a different module for that, which will usually inherit from the former and
overwrite Cache->build_cache(). This is useful for backwards compatibility with
older schema versions. Once the cache is built, no API access is needed,
therefore the ID mapping application is independent of the underlying database
......@@ -144,10 +144,10 @@ $conf->check_required_params(
my %jobs;
# create empty directory for logs
my $logpath = ($conf->param('logpath')||$conf->param('dumppath')).'/lsf';
my $logpath = ($conf->param('logpath')||$conf->param('dumppath')).'/lsf_dump_cache';
system("rm -rf $logpath") == 0 or
$logger->log_error("Unable to delete lsf log dir $logpath: $!\n");
system("mkdir $logpath") == 0 or
system("mkdir -p $logpath") == 0 or
$logger->log_error("Can't create lsf log dir $logpath: $!\n");
# submit jobs to lsf
......@@ -164,6 +164,7 @@ foreach my $dbtype (qw(source target)) {
# monitor progress
my $err;
my $total = scalar(keys %jobs);
my @types;
while (keys %jobs) {
foreach my $type (keys %jobs) {
......@@ -174,6 +175,7 @@ while (keys %jobs) {
# the job has finished if you find the error logfile
delete($jobs{$type}) if (-e $err_log);
push @types, $type;
}
$logger->log("Jobs waiting: ".scalar(keys %jobs)."/$total.\r");
......@@ -184,10 +186,14 @@ while (keys %jobs) {
$logger->log("\n\n");
# check if anything went wrong
foreach my $type (@types) {
$err++ unless (-e "$logpath/dump_by_seq_region.$type.success");
}
my $retval = 0;
if ($err) {
$logger->log("At least one of your jobs failed.\n");
$logger->log("Please check $logpath for errors.\n");
$logger->log("Please check $logpath and ".$conf->param('logpath')."/dump_by_seq_region.log for errors.\n");
$retval = 1;
}
......@@ -243,6 +249,7 @@ sub bsubmit {
dbtype => $dbtype,
slice_name => $slice_name,
cache_impl => ref($cache),
log_append => 1,
},
-EXCLUDE => [qw(region chromosomes)]
);
......
......@@ -67,6 +67,7 @@ use Bio::EnsEMBL::Utils::ConfParser;
use Bio::EnsEMBL::Utils::Logger;
use Bio::EnsEMBL::IdMapping::Cache;
use Bio::EnsEMBL::IdMapping::ExonScoreBuilder;
use Bio::EnsEMBL::IdMapping::TranscriptScoreBuilder;
#use Devel::Size qw(size total_size);
#use Data::Dumper;
......@@ -88,13 +89,19 @@ $conf->parse_extra_options(qw(
chromosomes|chr=s@
region=s
biotypes=s@
min_exon_length|minexonlength=i
exonerate_path|exoneratepath=s
exonerate_threshold|exoneratethreshold=i
exonerate_jobs|exoneratejobs=i
exonerate_bytes_per_job|exoneratebytesperjob=i
));
$conf->allowed_params(
$conf->get_common_params,
qw(
mode
dumppath
mode dumppath
chromosomes region biotypes
min_exon_length
exonerate_path exonerate_threshold exonerate_jobs exonerate_byte_per_job
)
);
......@@ -122,6 +129,7 @@ $logger->init_log($conf->list_all_params);
$conf->check_required_params(
qw(
dumppath
exonerate_path
)
);
......@@ -137,21 +145,14 @@ my $cache = Bio::EnsEMBL::IdMapping::Cache->new(
-LOGGER => $logger,
-CONF => $conf,
);
foreach my $dbtype (qw(source target)) {
foreach my $slice_name (@{ $cache->slice_names($dbtype) }) {
$logger->log("\n");
$cache->read_from_file('exons_by_id', "$dbtype.$slice_name");
}
}
$cache->merge('exons_by_id');
$cache->read_instance_from_file;
# run in requested mode
my $mode = $conf->param('mode') || 'normal';
my $run = "run_$mode";
no strict 'refs';
&run_$mode;
&$run;
# finish logfile
......@@ -173,11 +174,15 @@ sub build_scores {
-CONF => $conf,
-CACHE => $cache
);
#my $tsb = Bio::EnsEMBL::IdMapping::TranscriptScoreBuilder->new(
my $tsb = Bio::EnsEMBL::IdMapping::TranscriptScoreBuilder->new(
-LOGGER => $logger,
-CONF => $conf,
-CACHE => $cache
);
#my $gsb = Bio::EnsEMBL::IdMapping::GeneScoreBuilder->new(
$exon_scores = $esb->score_exons;
#$transcript_scores = $tsb->score_transcripts;
$transcript_scores = $tsb->score_transcripts;
#$gene_scores = $gsb->score_genes;
}
......
......@@ -92,6 +92,11 @@ $conf->parse_extra_options(qw(
chromosomes|chr=s@
region=s
biotypes=s@
min_exon_length|minexonlength=i
exonerate_path|exoneratepath=s
exonerate_threshold|exoneratethreshold=i
exonerate_jobs|exoneratejobs=i
exonerate_bytes_per_job|exoneratebytesperjob=i
));
$conf->allowed_params(
$conf->get_common_params,
......@@ -101,6 +106,8 @@ $conf->allowed_params(
mode
dumppath cachefile
chromosomes region biotypes
min_exon_length
exonerate_path exonerate_threshold exonerate_jobs exonerate_byte_per_job
)
);
......@@ -141,7 +148,8 @@ $options{'dump_cache'} = $conf->create_commandline_options(
interactive => 0,
is_component => 1,
},
-EXCLUDE => [qw(mode)]
-EXCLUDE => [qw(mode min_exon_length exonerate_path exonerate_threshold
exonerate_jobs exonerate_byte_per_job)]
);
$options{'id_mapping'} = $conf->create_commandline_options(
......@@ -161,8 +169,9 @@ $options{'id_mapping'} = $conf->create_commandline_options(
# run components, depending on mode
my $mode = $conf->param('mode') || 'normal';
my $sub = "run_$mode";
no strict 'refs';
&run_$mode;
&$sub;
# finish logfile
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment