From 63b1a507c3e76127a1b2b1eeb45023b332380cdd Mon Sep 17 00:00:00 2001 From: Ian Longden <ianl@sanger.ac.uk> Date: Fri, 6 Nov 2009 16:12:29 +0000 Subject: [PATCH] config file can now take the exonerate executable to use and the queue to be used by the farm --- .../xref_mapping/XrefMapper/BasicMapper.pm | 50 +++++++++++++++++++ .../XrefMapper/Methods/ExonerateBasic.pm | 41 +++++++++------ .../xref_mapping/XrefMapper/SubmitMapper.pm | 27 +++++----- 3 files changed, 90 insertions(+), 28 deletions(-) diff --git a/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm b/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm index 7362d3cde2..ba4579adf4 100644 --- a/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm +++ b/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm @@ -47,6 +47,42 @@ sub xref{ return $self->{_xref}; } +=head2 farm_queue + + Arg [1] : (optional) + Example : $mapper->farm_queue("long"); + Description: Getter / Setter for the farm queue. + Returntype : string + Exceptions : none + +=cut + +sub farm_queue{ + my ($self, $arg) = @_; + + (defined $arg) && + ($self->{_queue} = $arg ); + return $self->{_queue}; +} + +=head2 exonerate + + Arg [1] : (optional) + Example : $mapper->exonerate("/usr/local/exonerate1.1.1"); + Description: Getter / Setter for the exonerate executable with full path. + Returntype : string + Exceptions : none + +=cut + +sub exonerate{ + my ($self, $arg) = @_; + + (defined $arg) && + ($self->{_exonerate} = $arg ); + return $self->{_exonerate}; +} + =head2 core Arg [1] : (optional) @@ -125,6 +161,7 @@ sub process_file { my %xref_hash=(); my %species_hash=(); + my %farm_hash=(); while( my $line = <FILE> ) { @@ -141,12 +178,18 @@ sub process_file { elsif($key eq "xref"){ $type = "xref"; } + elsif($key eq "farm"){ + $type = "farm"; + } elsif($type eq "species"){ # processing species data $species_hash{lc($key)} = $value; } elsif($type eq "xref"){ # processing xref data $xref_hash{lc($key)} = $value; } + elsif($type eq "farm"){ + $farm_hash{lc($key)} = $value; + } } @@ -179,6 +222,13 @@ sub process_file { $mapper = "XrefMapper::$module"->new(); + if(defined($farm_hash{'queue'})){ + $mapper->farm_queue($farm_hash{'queue'}); + } + if(defined($farm_hash{'exonerate'})){ + $mapper->exonerate($farm_hash{'exonerate'}); + } + if(defined($xref_hash{host}) and !defined($no_xref)){ my ($host, $user, $dbname, $pass, $port); diff --git a/misc-scripts/xref_mapping/XrefMapper/Methods/ExonerateBasic.pm b/misc-scripts/xref_mapping/XrefMapper/Methods/ExonerateBasic.pm index 2e3be90721..d8bfe06464 100644 --- a/misc-scripts/xref_mapping/XrefMapper/Methods/ExonerateBasic.pm +++ b/misc-scripts/xref_mapping/XrefMapper/Methods/ExonerateBasic.pm @@ -10,20 +10,30 @@ use IPC::Open3; # Path to exonerate executable #my $exonerate_path = "/usr/local/ensembl/bin/exonerate-0.9.0"; -my $exonerate_path = "/software/ensembl/bin/exonerate-1.4.0"; +my $exonerate_path = "/rubbish/software/ensembl/bin/exonerate-1.4.0"; # remove rubbish after wards as just to test sub new { - my($class) = @_; + my($class, $mapper) = @_; my $self ={}; bless $self,$class; + $self->mapper($mapper); $self->jobcount(0); return $self; } +sub mapper{ + my ($self, $arg) = @_; + + (defined $arg) && + ($self->{_mapper} = $arg ); + return $self->{_mapper}; +} + + =head2 jobcount Arg [1] : (optional) @@ -102,8 +112,6 @@ sub resubmit_exonerate { my $unique_name = $self->get_class_name() . "_" . time(); -# my @main_bsub = ( 'bsub', '-R' .'select[linux] -Rrusage[tmp='.$disk_space_needed.']', '-J' . $unique_name . '-o', $outfile, '-e', $errfile); - my $exe_file = $root_dir."/resub_".$job_id."_".$array_number; open(RUN,">$exe_file") || die "Could not open file $exe_file"; @@ -114,8 +122,9 @@ sub resubmit_exonerate { chmod 0755, $exe_file; - - my $usage = '-R "select[linux] rusage[tmp='.$disk_space_needed.']" -J "'.$unique_name.'"'; + my $queue = $self->mapper->farm_queue || 'long'; + + my $usage = '-R "select[linux] rusage[tmp='.$disk_space_needed.']" -J "'.$unique_name.'" -q '.$queue; my $com = "bsub $usage -o $outfile -e $errfile ".$exe_file; @@ -170,15 +179,12 @@ sub resubmit_exonerate { sub submit_exonerate { -# my ($self, $query, $target, $root_dir, $nofarm, @options) = @_; my ($self, $query, $target, $mapper, @options) = @_; my $root_dir = $mapper->core->dir; -# print "query $query\n" if($mapper->verbose); my $queryfile = basename($query); -# print "target $target\n" if($mapper->verbose); my $targetfile = basename($target); my $prefix = $root_dir . "/" . basename($query); @@ -194,14 +200,17 @@ sub submit_exonerate { $disk_space_needed /= 1024000; # convert to MB $disk_space_needed = int($disk_space_needed); $disk_space_needed += 1; -# print "disk space needed = ".$disk_space_needed."\n"; my $num_jobs = calculate_num_jobs($query); + + my $exe = $self->mapper->exonerate || $exonerate_path; + + if(defined($mapper->nofarm)){ my $output = $self->get_class_name() . "_" . $ensembl_type . "_1.map"; my $cmd = <<EON; -$exonerate_path $query $target --showvulgar false --showalignment FALSE --ryo "xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n" $options_str | grep '^xref' > $root_dir/$output +$exe $query $target --showvulgar false --showalignment FALSE --ryo "xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n" $options_str | grep '^xref' > $root_dir/$output EON print "none farm command is $cmd\n" if($mapper->verbose); @@ -218,7 +227,7 @@ EON } for( my $i=1; $i<=1; $i++){ - my $command = "$exonerate_path $query $target --showvulgar false --showalignment FALSE --ryo ". + my $command = "$exe $query $target --showvulgar false --showalignment FALSE --ryo ". '"xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\\\n"'." $options_str | grep ".'"'."^xref".'"'." > $root_dir/$output"; my $insert = "insert into mapping (job_id, type, command_line, percent_query_cutoff, percent_target_cutoff, method, array_size) values($jobid, '$ensembl_type', '$command',". $self->query_identity_threshold.", ".$self->target_identity_threshold.", '".$self->get_class_name()."', $i)"; @@ -254,13 +263,13 @@ EON my $output = $self->get_class_name() . "_" . $ensembl_type . "_" . "\$LSB_JOBINDEX.map"; - my $usage = '-R "select[linux] -rusage[tmp='.$disk_space_needed.']" '.'-J "'.$unique_name.'[1-'.$num_jobs.']%200" -o '.$prefix.'.%J-%I.out -e '.$prefix.'.%J-%I.err'; + my $queue = $self->mapper->farm_queue || 'long'; + -# print "usage :- ".$usage ."\n"; + my $usage = "-q $queue ".'-R "select[linux] -rusage[tmp='.$disk_space_needed.']" '.'-J "'.$unique_name.'[1-'.$num_jobs.']%200" -o '.$prefix.'.%J-%I.out -e '.$prefix.'.%J-%I.err'; -# my @main_bsub = ( 'bsub', '-R' .'select[linux] -Rrusage[tmp='.$disk_space_needed.']', '-J' . $unique_name . "[1-$num_jobs]%200", '-o', "$prefix.%J-%I.out", '-e', "$prefix.%J-%I.err"); - my $command = $exonerate_path." ".$query." ".$target.' --querychunkid $LSB_JOBINDEX --querychunktotal '.$num_jobs.' --showvulgar false --showalignment FALSE --ryo "xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n" '.$options_str; + my $command = $exe." ".$query." ".$target.' --querychunkid $LSB_JOBINDEX --querychunktotal '.$num_jobs.' --showvulgar false --showalignment FALSE --ryo "xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n" '.$options_str; $command .= " | grep '^xref' > $root_dir/$output"; my $exe_file = $root_dir."/".$unique_name.".submit"; diff --git a/misc-scripts/xref_mapping/XrefMapper/SubmitMapper.pm b/misc-scripts/xref_mapping/XrefMapper/SubmitMapper.pm index c95e150004..2c20ae7e40 100644 --- a/misc-scripts/xref_mapping/XrefMapper/SubmitMapper.pm +++ b/misc-scripts/xref_mapping/XrefMapper/SubmitMapper.pm @@ -255,12 +255,6 @@ sub fetch_and_dump_seq_via_toplevel{ my $ensembl = $self->core; $self->add_meta_pair("dump_method","fetch_and_dump_seq_via_toplevel"); - if(defined($self->mapper->dumpcheck()) and -e $ensembl->protein_file() and -e $ensembl->dna_file()){ - my $sth = $self->xref->dbc->prepare("insert into process_status (status, date) values('core_fasta_dumped',now())"); - $sth->execute(); - print "Ensembl Fasta files found (no new dumping)\n" if($self->verbose()); - return; - } # # store ensembl dna file name and open it @@ -276,6 +270,13 @@ sub fetch_and_dump_seq_via_toplevel{ # $ensembl->protein_file($ensembl->dir."/".$ensembl->species."_protein.fasta"); + if(defined($self->mapper->dumpcheck()) and -e $ensembl->protein_file() and -e $ensembl->dna_file()){ + my $sth = $self->xref->dbc->prepare("insert into process_status (status, date) values('core_fasta_dumped',now())"); + $sth->execute(); + print "Ensembl Fasta files found (no new dumping)\n" if($self->verbose()); + return; + } + print "Dumping Ensembl Fasta files\n" if($self->verbose()); open(DNA,">".$ensembl->dna_file()) @@ -680,7 +681,7 @@ sub fix_mappings { } else { - my $obj = $obj_name->new(); + my $obj = $obj_name->new($self->mapper); print "DO resubmit for $array_number\n"; my $job_name = $obj->resubmit_exonerate($self->mapper, $command_line, $out_file, $err_file, $job_id, $array_number, $root_dir); @@ -767,7 +768,7 @@ sub run_mapping { } else { - my $obj = $obj_name->new(); + my $obj = $obj_name->new($self->mapper); my $job_name = $obj->run($queryfile, $targetfile, $self); push @job_names, $job_name; @@ -846,7 +847,7 @@ sub submit_depend_job { # return until everything is finished. # build up the bsub command; first part - my @depend_bsub = ('bsub', '-K'); +# my @depend_bsub = ('bsub', '-K'); # build -w 'ended(job1) && ended(job2)' clause my $ended_str = '-w "'; @@ -858,14 +859,16 @@ sub submit_depend_job { } $ended_str .= '"'; - push @depend_bsub, $ended_str; +# push @depend_bsub, $ended_str; # rest of command - push @depend_bsub, ('-q', 'small', '-o', "$root_dir/depend.out", '-e', "$root_dir/depend.err"); + + my $queue = $self->mapper->farm_queue || 'small'; +# push @depend_bsub, ('-q', $queue, '-o', "$root_dir/depend.out", '-e', "$root_dir/depend.err"); my $jobid = 0; - my $com = "bsub -K -q small -o $root_dir/depend.out -e $root_dir/depend.err $ended_str /bin/true"; + my $com = "bsub -K -q ".$queue." -o $root_dir/depend.out -e $root_dir/depend.err $ended_str /bin/true"; my $line = `$com`; -- GitLab