ExonerateBasic.pm 10.8 KB
Newer Older
1 2 3 4 5
# Base class that all other mapping methods inherit from

package XrefMapper::Methods::ExonerateBasic;

use strict;
6
use warnings;
7 8 9 10 11

use File::Basename;
use IPC::Open3;

# Path to exonerate executable
12 13
#my $exonerate_path = "/usr/local/ensembl/bin/exonerate-0.9.0";
my $exonerate_path = "/software/ensembl/bin/exonerate-1.4.0";
14 15 16 17 18 19 20

sub new {

  my($class) = @_;

  my $self ={};
  bless $self,$class;
21
  $self->jobcount(0);
22 23 24 25 26

  return $self;

}

27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
=head2 jobcount
 
  Arg [1]    : (optional) 
  Example    : $mapper->jobcount(1004);
  Description: Getter / Setter for number of jobs submitted. 
  Returntype : scalar
  Exceptions : none

=cut

sub jobcount {
  my ($self, $arg) = @_;

  (defined $arg) &&
    ($self->{_jobcount} = $arg );
  return $self->{_jobcount};
}
 

46 47 48 49 50 51 52 53 54 55 56 57 58 59
=head2 run

  Arg[1]     : Query filename to pass to exonerate; this should be the XREF file.
  Arg[2]     : Target filename to pass to exonerate; this should be the ENSEMBL file.
  Example    : none
  Description: Run exonerate with default options.
  Returntype : none
  Exceptions : none
  Caller     : general

=cut

sub run() {

Ian Longden's avatar
Ian Longden committed
60 61 62
  my ($self, $query, $target, $mapper) = @_;

  my $name = $self->submit_exonerate($query, $target, $mapper, $self->options());
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84

  return $name;

}

=head2 options

  Args       : none
  Example    : none
  Description: Options to pass to exonerate. Override as appropriate.
  Returntype : List of strings.
  Exceptions : none
  Caller     : general

=cut

sub options() {

  return ();

}

85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159

=head2 resubmit_exonerate

=cut

sub resubmit_exonerate {
  my ($self, $mapper, $command, $outfile, $errfile, $job_id, $array_number, $root_dir) = @_;



  my ($junk,$query, $target, @rest) = split(/\s+/,$command);

  my $disk_space_needed = (stat($query))[7]+(stat($target))[7];
  
  $disk_space_needed /= 1024000; # convert to MB
  $disk_space_needed = int($disk_space_needed);
  $disk_space_needed += 1;
  
  my $unique_name = $self->get_class_name() . "_" . time();
  
#  my @main_bsub = ( 'bsub', '-R' .'select[linux] -Rrusage[tmp='.$disk_space_needed.']',  '-J' . $unique_name . '-o', $outfile, '-e', $errfile);
  
  my $exe_file = $root_dir."/resub_".$job_id."_".$array_number;
  open(RUN,">$exe_file") || die "Could not open file $exe_file";
  
  print RUN ". /usr/local/lsf/conf/profile.lsf\n";
  print RUN $command."\n";

  close(RUN);

  chmod 0755, $exe_file;


  my $usage = '-R "select[linux] rusage[tmp='.$disk_space_needed.']" -J "'.$unique_name.'"';


  my $com = "bsub $usage -o $outfile -e $errfile ".$exe_file;

  if($mapper->nofarm){
    print "Running job locally for job number $job_id [$array_number]\n" if($mapper->verbose);
    my $line = `$exe_file`;
    my $sth = $mapper->xref->dbc->prepare('update mapping_jobs set status = "SUBMITTED"'." where job_id = $job_id and array_number = $array_number");
    $sth->execute();
    $sth->finish;
    return 1;
  }


  my $line = `$com`;

  my $jobid  = 0;
  if ($line =~ /^Job <(\d+)> is submitted/) {
     $jobid = $1;
     print "LSF job ID for main mapping job: $jobid (job array with 1 job ($array_number))\n" if($mapper->verbose);
  }


  if (!$jobid) {
    # Something went wrong
    warn("Job submission failed:\n$@\n");
    print STDERR "bsub command was $com\n";
    print STDERR $line."\n";
    return 0;
  }
  else{
    # write details of job to database
    my $sth = $mapper->xref->dbc->prepare('update mapping_jobs set status = "SUBMITTED"'." where job_id = $job_id and array_number = $array_number");
    $sth->execute();
    $sth->finish;
  }
  
  return $jobid;

}

160 161 162 163 164 165 166 167 168 169 170 171 172
=head2 submit_exonerate

  Args       : none
  Example    : none
  Description: Submit a *single* mapping job array to exonerate.
  Returntype : The name of the job array submitted to LSF. Can be used in depend job.
  Exceptions : none
  Caller     : general

=cut

sub submit_exonerate {

Ian Longden's avatar
Ian Longden committed
173 174 175
#  my ($self, $query, $target, $root_dir, $nofarm, @options) = @_;
  my ($self, $query, $target, $mapper, @options) = @_;

176

Ian Longden's avatar
Ian Longden committed
177 178
  my $root_dir = $mapper->core->dir;

179
#  print "query $query\n" if($mapper->verbose);
180
  my $queryfile = basename($query);
181
#  print "target $target\n" if($mapper->verbose);
182 183
  my $targetfile = basename($target);

184 185 186 187 188 189 190 191
  my $prefix = $root_dir . "/" . basename($query);
  $prefix =~ s/\.\w+$//;

  my ($ensembl_type) = $prefix =~ /.*_(dna|peptide)$/; # dna or prot
  my $options_str = join(" ", @options);

  my $unique_name = $self->get_class_name() . "_" . time();

192 193 194 195 196 197 198
  my $disk_space_needed = (stat($query))[7]+(stat($target))[7];

  $disk_space_needed /= 1024000; # convert to MB
  $disk_space_needed = int($disk_space_needed);
  $disk_space_needed += 1;
#  print "disk space needed = ".$disk_space_needed."\n";

199 200
  my $num_jobs = calculate_num_jobs($query);

Ian Longden's avatar
Ian Longden committed
201
  if(defined($mapper->nofarm)){
202 203 204 205
    my $output = $self->get_class_name() . "_" . $ensembl_type . "_1.map";
    my $cmd = <<EON;
$exonerate_path $query $target --showvulgar false --showalignment FALSE --ryo "xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n" $options_str | grep '^xref' > $root_dir/$output
EON
Ian Longden's avatar
Ian Longden committed
206
    print "none farm command is $cmd\n" if($mapper->verbose);
Ian Longden's avatar
Ian Longden committed
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221


    # write details of job to database
    
    my $sth = $mapper->xref->dbc->prepare("insert into process_status (status, date) values('mapping_submitted',now())");
    $sth->execute();
    $sth->finish;
    
    my $jobid = 1;
    if($ensembl_type eq "peptide"){
      $jobid = 2;
    }

    for( my $i=1; $i<=1; $i++){
      my $command = "$exonerate_path $query $target --showvulgar false --showalignment FALSE --ryo ".
222
	'"xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\\\n"'." $options_str | grep ".'"'."^xref".'"'." > $root_dir/$output";
Ian Longden's avatar
Ian Longden committed
223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
      my $insert = "insert into mapping (job_id, type, command_line, percent_query_cutoff, percent_target_cutoff, method, array_size) values($jobid, '$ensembl_type', '$command',".
				       $self->query_identity_threshold.", ".$self->target_identity_threshold.", '".$self->get_class_name()."', $i)";
      
      $sth = $mapper->xref->dbc->prepare($insert);
      $sth->execute;
      $sth->finish;
      
      $sth = $mapper->xref->dbc->prepare("insert into mapping_jobs (root_dir, map_file, status, out_file, err_file, array_number, job_id) values (?,?,?,?,?,?,?)");
      
      my $map_file = $self->get_class_name()."_".$ensembl_type."_".$i.".map";
      my $out_file = "xref_0_".$ensembl_type.".".$jobid."-".$i.".out";
      my $err_file = "xref_0_".$ensembl_type.".".$jobid."-".$i.".err";
      $sth->execute($root_dir, $map_file, 'SUBMITTED', $out_file, $err_file, $i, $jobid);
    }
    $sth->finish;
    

240 241 242 243 244 245
    system($cmd);
    $self->jobcount(1);
    return "nofarm";
  }


246 247 248 249 250
  # array features barf if just one job 
  if($num_jobs == 1){
    $num_jobs++;
  }

251 252
  $self->jobcount($self->jobcount()+$num_jobs);

253

254

255
  my $output = $self->get_class_name() . "_" . $ensembl_type . "_" . "\$LSB_JOBINDEX.map";
256

257
  my $usage = '-R "select[linux] -rusage[tmp='.$disk_space_needed.']" '.'-J "'.$unique_name.'[1-'.$num_jobs.']%200" -o '.$prefix.'.%J-%I.out -e  '.$prefix.'.%J-%I.err';
258

259
#  print "usage :- ".$usage ."\n";
260

261
#  my @main_bsub = ( 'bsub', '-R' .'select[linux] -Rrusage[tmp='.$disk_space_needed.']',  '-J' . $unique_name . "[1-$num_jobs]%200", '-o', "$prefix.%J-%I.out", '-e', "$prefix.%J-%I.err");
262

263 264
  my $command = $exonerate_path." ".$query." ".$target.' --querychunkid $LSB_JOBINDEX --querychunktotal '.$num_jobs.' --showvulgar false --showalignment FALSE --ryo "xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n" '.$options_str;
  $command .= " | grep '^xref' > $root_dir/$output";
265

266 267 268 269 270
  my $exe_file = $root_dir."/".$unique_name.".submit";
  open(RUN,">$exe_file") || die "Could not open file $exe_file";
  
  print RUN ". /usr/local/lsf/conf/profile.lsf\n";
  print RUN $command."\n";
271

272
  close(RUN);
273

274
  chmod 0755, $exe_file;
275

276
  my $com = "bsub $usage $exe_file";
277

278
  my $line = `$com`;
279

280 281 282 283 284
  my $jobid  = 0;
  if ($line =~ /^Job <(\d+)> is submitted/) {
     $jobid = $1;
     print "LSF job ID for main mapping job: $jobid, name $unique_name with $num_jobs arrays elements)\n" if($mapper->verbose);
  }
285

286

287
  if (!$jobid) {
288 289
    # Something went wrong
    warn("Job submission failed:\n$@\n");
290 291
    print STDERR "bsub command was $com\n";
    print STDERR $line."\n";
292
    return 0;
293
  } 
Ian Longden's avatar
Ian Longden committed
294 295 296
  else{
    # write details of job to database
    my $command = "$exonerate_path $query $target --querychunkid \$LSB_JOBINDEX --querychunktotal $num_jobs --showvulgar false --showalignment FALSE --ryo ".
297
      '"xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\\\n"'." $options_str | grep ".'"'."^xref".'"'." > $root_dir/$output";
Ian Longden's avatar
Ian Longden committed
298 299 300

    my $sth = $mapper->xref->dbc->prepare("insert into process_status (status, date) values('mapping_submitted',now())");
    $sth->execute();
Ian Longden's avatar
tidy up  
Ian Longden committed
301 302
    $sth->finish;

Ian Longden's avatar
Ian Longden committed
303 304
    my $insert = "insert into mapping (job_id, type, command_line, percent_query_cutoff, percent_target_cutoff, method, array_size) values($jobid, '$ensembl_type', '$command',".
				       $self->query_identity_threshold.", ".$self->target_identity_threshold.", '".$self->get_class_name()."', $num_jobs)";
Ian Longden's avatar
tidy up  
Ian Longden committed
305

Ian Longden's avatar
Ian Longden committed
306 307 308

    $sth = $mapper->xref->dbc->prepare($insert);
    $sth->execute;
Ian Longden's avatar
tidy up  
Ian Longden committed
309 310
    $sth->finish;

Ian Longden's avatar
Ian Longden committed
311 312 313 314 315 316 317 318 319 320 321 322
    $sth = $mapper->xref->dbc->prepare("insert into mapping_jobs (root_dir, map_file, status, out_file, err_file, array_number, job_id) values (?,?,?,?,?,?,?)");
    
    for( my $i=1; $i<=$num_jobs; $i++){
      my $map_file = $self->get_class_name()."_".$ensembl_type."_".$i.".map";
      my $out_file = "xref_0_".$ensembl_type.".".$jobid."-".$i.".out";
      my $err_file = "xref_0_".$ensembl_type.".".$jobid."-".$i.".err";
      $sth->execute($root_dir, $map_file, 'SUBMITTED', $out_file, $err_file, $i, $jobid);
    }
    $sth->finish;
    
  }
  
323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
  return $unique_name;

}

=head2 calculate_num_jobs

  Args       : Query file name
  Example    : none
  Description: Calculate the number of LSF jobs to submit based on the size of the query file.
  Returntype : The number of jobs.
  Exceptions : none
  Caller     : general

=cut

sub calculate_num_jobs {

  my $query = shift;

  my $bytes_per_job = 250000;

  my $size = (stat $query)[7];
345
  if( $size == 0 ){ return 0 }
346
  return int($size/$bytes_per_job) || 1;
347 348 349 350 351 352 353 354 355 356

}

# Get class name from fully-qualified object name
# e.g. return ExonerateBasic from XrefMapper::Methods::ExonerateBasic=HASH(Ox12113c0)

sub get_class_name() {

  my $self = shift;

357
  my $module_name = ref($self);
358

359
  my @bits = split(/::/, $module_name);
360

361
  return $bits[-1];
362 363 364

}

Glenn Proctor's avatar
Glenn Proctor committed
365 366 367 368 369 370 371 372 373
# Check if any .err files exist that have non-zero size;
# this indicates that something has gone wrong with the exonerate run

sub check_err {

  my ($self, $dir) = @_;

  foreach my $err (glob("$dir/*.err")) {

374
    print "\n\n*** Warning: $err has non-zero size; may indicate problems with exonerate run\n\n\n" if (-s $err);
Glenn Proctor's avatar
Glenn Proctor committed
375 376 377 378

  }
}

379 380 381 382
# Percentage identity that query (xref) must match to be considered.

sub query_identity_threshold {

383
  return 0;
384 385 386 387 388 389 390 391

}


# Percentage identity that target (ensembl) must match to be considered.

sub target_identity_threshold {

392
  return 0;
393 394 395

}

396 397
1;