From 63b1a507c3e76127a1b2b1eeb45023b332380cdd Mon Sep 17 00:00:00 2001
From: Ian Longden <ianl@sanger.ac.uk>
Date: Fri, 6 Nov 2009 16:12:29 +0000
Subject: [PATCH] config file can now take the exonerate executable to use and
 the queue to be used by the farm

---
 .../xref_mapping/XrefMapper/BasicMapper.pm    | 50 +++++++++++++++++++
 .../XrefMapper/Methods/ExonerateBasic.pm      | 41 +++++++++------
 .../xref_mapping/XrefMapper/SubmitMapper.pm   | 27 +++++-----
 3 files changed, 90 insertions(+), 28 deletions(-)

diff --git a/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm b/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm
index 7362d3cde2..ba4579adf4 100644
--- a/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm
+++ b/misc-scripts/xref_mapping/XrefMapper/BasicMapper.pm
@@ -47,6 +47,42 @@ sub xref{
   return $self->{_xref};
 }
 
+=head2 farm_queue
+
+  Arg [1]    : (optional)
+  Example    : $mapper->farm_queue("long");
+  Description: Getter / Setter for the farm queue.
+  Returntype : string
+  Exceptions : none
+
+=cut
+
+sub farm_queue{
+  my ($self, $arg) = @_;
+
+  (defined $arg) &&
+    ($self->{_queue} = $arg );
+  return $self->{_queue};
+}
+
+=head2 exonerate
+
+  Arg [1]    : (optional)
+  Example    : $mapper->exonerate("/usr/local/exonerate1.1.1");
+  Description: Getter / Setter for the exonerate executable with full path.
+  Returntype : string
+  Exceptions : none
+
+=cut
+
+sub exonerate{
+  my ($self, $arg) = @_;
+
+  (defined $arg) &&
+    ($self->{_exonerate} = $arg );
+  return $self->{_exonerate};
+}
+
 =head2 core
 
   Arg [1]    : (optional)
@@ -125,6 +161,7 @@ sub process_file {
   
   my %xref_hash=();
   my %species_hash=();
+  my %farm_hash=();
   
   while( my $line = <FILE> ) {
     
@@ -141,12 +178,18 @@ sub process_file {
     elsif($key eq "xref"){
       $type = "xref";
     }
+    elsif($key eq "farm"){
+      $type = "farm";
+    }
     elsif($type eq "species"){ # processing species data
       $species_hash{lc($key)} = $value;
     }
     elsif($type eq "xref"){    # processing xref data
       $xref_hash{lc($key)} = $value;
     }
+    elsif($type eq "farm"){
+      $farm_hash{lc($key)} = $value;
+    }
   }
   
 
@@ -179,6 +222,13 @@ sub process_file {
   
   $mapper = "XrefMapper::$module"->new();
 
+  if(defined($farm_hash{'queue'})){
+    $mapper->farm_queue($farm_hash{'queue'});
+  }
+  if(defined($farm_hash{'exonerate'})){
+    $mapper->exonerate($farm_hash{'exonerate'});
+  }
+  
 
   if(defined($xref_hash{host}) and !defined($no_xref)){
     my ($host, $user, $dbname, $pass, $port);
diff --git a/misc-scripts/xref_mapping/XrefMapper/Methods/ExonerateBasic.pm b/misc-scripts/xref_mapping/XrefMapper/Methods/ExonerateBasic.pm
index 2e3be90721..d8bfe06464 100644
--- a/misc-scripts/xref_mapping/XrefMapper/Methods/ExonerateBasic.pm
+++ b/misc-scripts/xref_mapping/XrefMapper/Methods/ExonerateBasic.pm
@@ -10,20 +10,30 @@ use IPC::Open3;
 
 # Path to exonerate executable
 #my $exonerate_path = "/usr/local/ensembl/bin/exonerate-0.9.0";
-my $exonerate_path = "/software/ensembl/bin/exonerate-1.4.0";
+my $exonerate_path = "/rubbish/software/ensembl/bin/exonerate-1.4.0"; #  remove rubbish after wards as just to test
 
 sub new {
 
-  my($class) = @_;
+  my($class, $mapper) = @_;
 
   my $self ={};
   bless $self,$class;
+  $self->mapper($mapper);
   $self->jobcount(0);
 
   return $self;
 
 }
 
+sub mapper{
+  my ($self, $arg) = @_;
+
+  (defined $arg) &&
+    ($self->{_mapper} = $arg );
+  return $self->{_mapper};
+}
+
+
 =head2 jobcount
  
   Arg [1]    : (optional) 
@@ -102,8 +112,6 @@ sub resubmit_exonerate {
   
   my $unique_name = $self->get_class_name() . "_" . time();
   
-#  my @main_bsub = ( 'bsub', '-R' .'select[linux] -Rrusage[tmp='.$disk_space_needed.']',  '-J' . $unique_name . '-o', $outfile, '-e', $errfile);
-  
   my $exe_file = $root_dir."/resub_".$job_id."_".$array_number;
   open(RUN,">$exe_file") || die "Could not open file $exe_file";
   
@@ -114,8 +122,9 @@ sub resubmit_exonerate {
 
   chmod 0755, $exe_file;
 
-
-  my $usage = '-R "select[linux] rusage[tmp='.$disk_space_needed.']" -J "'.$unique_name.'"';
+  my $queue = $self->mapper->farm_queue || 'long';
+  
+  my $usage = '-R "select[linux] rusage[tmp='.$disk_space_needed.']" -J "'.$unique_name.'" -q '.$queue;
 
 
   my $com = "bsub $usage -o $outfile -e $errfile ".$exe_file;
@@ -170,15 +179,12 @@ sub resubmit_exonerate {
 
 sub submit_exonerate {
 
-#  my ($self, $query, $target, $root_dir, $nofarm, @options) = @_;
   my ($self, $query, $target, $mapper, @options) = @_;
 
 
   my $root_dir = $mapper->core->dir;
 
-#  print "query $query\n" if($mapper->verbose);
   my $queryfile = basename($query);
-#  print "target $target\n" if($mapper->verbose);
   my $targetfile = basename($target);
 
   my $prefix = $root_dir . "/" . basename($query);
@@ -194,14 +200,17 @@ sub submit_exonerate {
   $disk_space_needed /= 1024000; # convert to MB
   $disk_space_needed = int($disk_space_needed);
   $disk_space_needed += 1;
-#  print "disk space needed = ".$disk_space_needed."\n";
 
   my $num_jobs = calculate_num_jobs($query);
 
+
+  my $exe = $self->mapper->exonerate || $exonerate_path;
+
+
   if(defined($mapper->nofarm)){
     my $output = $self->get_class_name() . "_" . $ensembl_type . "_1.map";
     my $cmd = <<EON;
-$exonerate_path $query $target --showvulgar false --showalignment FALSE --ryo "xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n" $options_str | grep '^xref' > $root_dir/$output
+$exe $query $target --showvulgar false --showalignment FALSE --ryo "xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n" $options_str | grep '^xref' > $root_dir/$output
 EON
     print "none farm command is $cmd\n" if($mapper->verbose);
 
@@ -218,7 +227,7 @@ EON
     }
 
     for( my $i=1; $i<=1; $i++){
-      my $command = "$exonerate_path $query $target --showvulgar false --showalignment FALSE --ryo ".
+      my $command = "$exe $query $target --showvulgar false --showalignment FALSE --ryo ".
 	'"xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\\\n"'." $options_str | grep ".'"'."^xref".'"'." > $root_dir/$output";
       my $insert = "insert into mapping (job_id, type, command_line, percent_query_cutoff, percent_target_cutoff, method, array_size) values($jobid, '$ensembl_type', '$command',".
 				       $self->query_identity_threshold.", ".$self->target_identity_threshold.", '".$self->get_class_name()."', $i)";
@@ -254,13 +263,13 @@ EON
 
   my $output = $self->get_class_name() . "_" . $ensembl_type . "_" . "\$LSB_JOBINDEX.map";
 
-  my $usage = '-R "select[linux] -rusage[tmp='.$disk_space_needed.']" '.'-J "'.$unique_name.'[1-'.$num_jobs.']%200" -o '.$prefix.'.%J-%I.out -e  '.$prefix.'.%J-%I.err';
+  my $queue = $self->mapper->farm_queue || 'long';
+
 
-#  print "usage :- ".$usage ."\n";
+  my $usage = "-q $queue ".'-R "select[linux] -rusage[tmp='.$disk_space_needed.']" '.'-J "'.$unique_name.'[1-'.$num_jobs.']%200" -o '.$prefix.'.%J-%I.out -e  '.$prefix.'.%J-%I.err';
 
-#  my @main_bsub = ( 'bsub', '-R' .'select[linux] -Rrusage[tmp='.$disk_space_needed.']',  '-J' . $unique_name . "[1-$num_jobs]%200", '-o', "$prefix.%J-%I.out", '-e', "$prefix.%J-%I.err");
 
-  my $command = $exonerate_path." ".$query." ".$target.' --querychunkid $LSB_JOBINDEX --querychunktotal '.$num_jobs.' --showvulgar false --showalignment FALSE --ryo "xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n" '.$options_str;
+  my $command = $exe." ".$query." ".$target.' --querychunkid $LSB_JOBINDEX --querychunktotal '.$num_jobs.' --showvulgar false --showalignment FALSE --ryo "xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n" '.$options_str;
   $command .= " | grep '^xref' > $root_dir/$output";
 
   my $exe_file = $root_dir."/".$unique_name.".submit";
diff --git a/misc-scripts/xref_mapping/XrefMapper/SubmitMapper.pm b/misc-scripts/xref_mapping/XrefMapper/SubmitMapper.pm
index c95e150004..2c20ae7e40 100644
--- a/misc-scripts/xref_mapping/XrefMapper/SubmitMapper.pm
+++ b/misc-scripts/xref_mapping/XrefMapper/SubmitMapper.pm
@@ -255,12 +255,6 @@ sub fetch_and_dump_seq_via_toplevel{
   my $ensembl = $self->core;
   $self->add_meta_pair("dump_method","fetch_and_dump_seq_via_toplevel");
 
-  if(defined($self->mapper->dumpcheck()) and -e $ensembl->protein_file() and -e $ensembl->dna_file()){
-    my $sth = $self->xref->dbc->prepare("insert into process_status (status, date) values('core_fasta_dumped',now())");
-    $sth->execute();    
-    print "Ensembl Fasta files found (no new dumping)\n" if($self->verbose());
-    return;
-  }
 
   #
   # store ensembl dna file name and open it
@@ -276,6 +270,13 @@ sub fetch_and_dump_seq_via_toplevel{
   #
   $ensembl->protein_file($ensembl->dir."/".$ensembl->species."_protein.fasta");
 
+  if(defined($self->mapper->dumpcheck()) and -e $ensembl->protein_file() and -e $ensembl->dna_file()){
+    my $sth = $self->xref->dbc->prepare("insert into process_status (status, date) values('core_fasta_dumped',now())");
+    $sth->execute();    
+    print "Ensembl Fasta files found (no new dumping)\n" if($self->verbose());
+    return;
+  }
+
   print "Dumping Ensembl Fasta files\n" if($self->verbose());
 
   open(DNA,">".$ensembl->dna_file())
@@ -680,7 +681,7 @@ sub fix_mappings {
 
     } else {
 
-      my $obj = $obj_name->new();
+      my $obj = $obj_name->new($self->mapper);
  
       print "DO resubmit for $array_number\n";
       my $job_name = $obj->resubmit_exonerate($self->mapper, $command_line, $out_file, $err_file, $job_id, $array_number, $root_dir);
@@ -767,7 +768,7 @@ sub run_mapping {
 
     } else {
 
-      my $obj = $obj_name->new();
+      my $obj = $obj_name->new($self->mapper);
  
       my $job_name = $obj->run($queryfile, $targetfile, $self);
       push @job_names, $job_name;
@@ -846,7 +847,7 @@ sub submit_depend_job {
   # return until everything is finished.
 
   # build up the bsub command; first part
-  my @depend_bsub = ('bsub', '-K');
+#  my @depend_bsub = ('bsub', '-K');
 
   # build -w 'ended(job1) && ended(job2)' clause
   my $ended_str = '-w "';
@@ -858,14 +859,16 @@ sub submit_depend_job {
   }
   $ended_str .= '"';
 
-  push @depend_bsub, $ended_str;
+#  push @depend_bsub, $ended_str;
 
   # rest of command
-  push @depend_bsub, ('-q', 'small', '-o', "$root_dir/depend.out", '-e', "$root_dir/depend.err");
+  
+  my $queue = $self->mapper->farm_queue || 'small';
+#  push @depend_bsub, ('-q', $queue, '-o', "$root_dir/depend.out", '-e', "$root_dir/depend.err");
 
   my $jobid = 0;
 
-  my $com = "bsub -K -q small -o $root_dir/depend.out -e $root_dir/depend.err $ended_str /bin/true";
+  my $com = "bsub -K -q ".$queue." -o $root_dir/depend.out -e $root_dir/depend.err $ended_str /bin/true";
 
 
   my $line = `$com`;
-- 
GitLab