ExonerateBasic.pm 8.85 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
# Base class that all other mapping methods inherit from

package XrefMapper::Methods::ExonerateBasic;

use strict;

use File::Basename;
use IPC::Open3;

# Path to exonerate executable
11
12
#my $exonerate_path = "/usr/local/ensembl/bin/exonerate-0.9.0";
my $exonerate_path = "/software/ensembl/bin/exonerate-1.4.0";
13
14
15
16
17
18
19

sub new {

  my($class) = @_;

  my $self ={};
  bless $self,$class;
20
  $self->jobcount(0);
21
22
23
24
25

  return $self;

}

26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
=head2 jobcount
 
  Arg [1]    : (optional) 
  Example    : $mapper->jobcount(1004);
  Description: Getter / Setter for number of jobs submitted. 
  Returntype : scalar
  Exceptions : none

=cut

sub jobcount {
  my ($self, $arg) = @_;

  (defined $arg) &&
    ($self->{_jobcount} = $arg );
  return $self->{_jobcount};
}
 

45
46
47
48
49
50
51
52
53
54
55
56
57
58
=head2 run

  Arg[1]     : Query filename to pass to exonerate; this should be the XREF file.
  Arg[2]     : Target filename to pass to exonerate; this should be the ENSEMBL file.
  Example    : none
  Description: Run exonerate with default options.
  Returntype : none
  Exceptions : none
  Caller     : general

=cut

sub run() {

Ian Longden's avatar
Ian Longden committed
59
60
61
  my ($self, $query, $target, $mapper) = @_;

  my $name = $self->submit_exonerate($query, $target, $mapper, $self->options());
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96

  return $name;

}

=head2 options

  Args       : none
  Example    : none
  Description: Options to pass to exonerate. Override as appropriate.
  Returntype : List of strings.
  Exceptions : none
  Caller     : general

=cut

sub options() {

  return ();

}

=head2 submit_exonerate

  Args       : none
  Example    : none
  Description: Submit a *single* mapping job array to exonerate.
  Returntype : The name of the job array submitted to LSF. Can be used in depend job.
  Exceptions : none
  Caller     : general

=cut

sub submit_exonerate {

Ian Longden's avatar
Ian Longden committed
97
98
99
#  my ($self, $query, $target, $root_dir, $nofarm, @options) = @_;
  my ($self, $query, $target, $mapper, @options) = @_;

100

Ian Longden's avatar
Ian Longden committed
101
102
  my $root_dir = $mapper->core->dir;

Ian Longden's avatar
Ian Longden committed
103
  print "query $query\n" if($mapper->verbose);
104
  my $queryfile = basename($query);
Ian Longden's avatar
Ian Longden committed
105
  print "target $target\n" if($mapper->verbose);
106
107
  my $targetfile = basename($target);

108
109
110
111
112
113
114
115
  my $prefix = $root_dir . "/" . basename($query);
  $prefix =~ s/\.\w+$//;

  my ($ensembl_type) = $prefix =~ /.*_(dna|peptide)$/; # dna or prot
  my $options_str = join(" ", @options);

  my $unique_name = $self->get_class_name() . "_" . time();

116
117
118
119
120
121
122
  my $disk_space_needed = (stat($query))[7]+(stat($target))[7];

  $disk_space_needed /= 1024000; # convert to MB
  $disk_space_needed = int($disk_space_needed);
  $disk_space_needed += 1;
#  print "disk space needed = ".$disk_space_needed."\n";

123
124
  my $num_jobs = calculate_num_jobs($query);

Ian Longden's avatar
Ian Longden committed
125
  if(defined($mapper->nofarm)){
126
127
128
129
    my $output = $self->get_class_name() . "_" . $ensembl_type . "_1.map";
    my $cmd = <<EON;
$exonerate_path $query $target --showvulgar false --showalignment FALSE --ryo "xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n" $options_str | grep '^xref' > $root_dir/$output
EON
Ian Longden's avatar
Ian Longden committed
130
    print "none farm command is $cmd\n" if($mapper->verbose);
Ian Longden's avatar
Ian Longden committed
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163


    # write details of job to database
    
    my $sth = $mapper->xref->dbc->prepare("insert into process_status (status, date) values('mapping_submitted',now())");
    $sth->execute();
    $sth->finish;
    
    my $jobid = 1;
    if($ensembl_type eq "peptide"){
      $jobid = 2;
    }

    for( my $i=1; $i<=1; $i++){
      my $command = "$exonerate_path $query $target --showvulgar false --showalignment FALSE --ryo ".
	'"xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n"'." $options_str | grep ".'"'."^xref".'"'." > $root_dir/$output";
      my $insert = "insert into mapping (job_id, type, command_line, percent_query_cutoff, percent_target_cutoff, method, array_size) values($jobid, '$ensembl_type', '$command',".
				       $self->query_identity_threshold.", ".$self->target_identity_threshold.", '".$self->get_class_name()."', $i)";
      
      $sth = $mapper->xref->dbc->prepare($insert);
      $sth->execute;
      $sth->finish;
      
      $sth = $mapper->xref->dbc->prepare("insert into mapping_jobs (root_dir, map_file, status, out_file, err_file, array_number, job_id) values (?,?,?,?,?,?,?)");
      
      my $map_file = $self->get_class_name()."_".$ensembl_type."_".$i.".map";
      my $out_file = "xref_0_".$ensembl_type.".".$jobid."-".$i.".out";
      my $err_file = "xref_0_".$ensembl_type.".".$jobid."-".$i.".err";
      $sth->execute($root_dir, $map_file, 'SUBMITTED', $out_file, $err_file, $i, $jobid);
    }
    $sth->finish;
    

164
165
166
167
168
169
    system($cmd);
    $self->jobcount(1);
    return "nofarm";
  }


170
171
172
173
174
  # array features barf if just one job 
  if($num_jobs == 1){
    $num_jobs++;
  }

175
176
  $self->jobcount($self->jobcount()+$num_jobs);

177

178

179
  my $output = $self->get_class_name() . "_" . $ensembl_type . "_" . "\$LSB_JOBINDEX.map";
180

181
  my @main_bsub = ( 'bsub', '-R' .'select[linux] -Rrusage[tmp='.$disk_space_needed.']',  '-J' . $unique_name . "[1-$num_jobs]%200", '-o', "$prefix.%J-%I.out", '-e', "$prefix.%J-%I.err");
182

183

184
185


186
187
188
189
190
191

  # Create actual execute script to be executed with LSF, and write to pipe
  my $main_job = <<EOF;
. /usr/local/lsf/conf/profile.lsf


192
$exonerate_path $query $target --querychunkid \$LSB_JOBINDEX --querychunktotal $num_jobs --showvulgar false --showalignment FALSE --ryo "xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n" $options_str | grep '^xref' > $root_dir/$output
193
194
195

EOF

196
197


198
199
200
201
202
203
204
205
206
207
208
209
  # now submit it
  my $jobid = 0;

  eval {
    my $pid;
    my $reader;

    local *BSUB;
    local *BSUB_READER;

    if (($reader = open(BSUB_READER, '-|'))) {
      while (<BSUB_READER>) {
210

211
212
	if (/^Job <(\d+)> is submitted/) {
	  $jobid = $1;
Ian Longden's avatar
Ian Longden committed
213
	  print "LSF job ID for main mapping job: $jobid (job array with $num_jobs jobs)\n" if($mapper->verbose);
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
	}
      }
      close(BSUB_READER);
    } else {
      die("Could not fork : $!\n") unless (defined($reader));
      open(STDERR, ">&STDOUT");
      if (($pid = open(BSUB, '|-'))) {
	
	print BSUB $main_job;
	close BSUB;
	if ($? != 0) {
	  die("bsub exited with non-zero status - job not submitted\n");
	}
      } else {
	if (defined($pid)) {
	  exec(@main_bsub);
	  die("Could not exec bsub : $!\n");
	} else {
	  die("Could not fork : $!\n");
	}
      }
      exit(0);
236
    }
237
238
239
240
241
242
  };

  if ($@) {
    # Something went wrong
    warn("Job submission failed:\n$@\n");
    return 0;
243
  }
Ian Longden's avatar
Ian Longden committed
244
245
246
247
248
249
250
  else{
    # write details of job to database
    my $command = "$exonerate_path $query $target --querychunkid \$LSB_JOBINDEX --querychunktotal $num_jobs --showvulgar false --showalignment FALSE --ryo ".
      '"xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n"'." $options_str | grep ".'"'."^xref".'"'." > $root_dir/$output";

    my $sth = $mapper->xref->dbc->prepare("insert into process_status (status, date) values('mapping_submitted',now())");
    $sth->execute();
Ian Longden's avatar
tidy up  
Ian Longden committed
251
252
    $sth->finish;

Ian Longden's avatar
Ian Longden committed
253
254
    my $insert = "insert into mapping (job_id, type, command_line, percent_query_cutoff, percent_target_cutoff, method, array_size) values($jobid, '$ensembl_type', '$command',".
				       $self->query_identity_threshold.", ".$self->target_identity_threshold.", '".$self->get_class_name()."', $num_jobs)";
Ian Longden's avatar
tidy up  
Ian Longden committed
255

Ian Longden's avatar
Ian Longden committed
256
257
258

    $sth = $mapper->xref->dbc->prepare($insert);
    $sth->execute;
Ian Longden's avatar
tidy up  
Ian Longden committed
259
260
    $sth->finish;

Ian Longden's avatar
Ian Longden committed
261
262
263
264
265
266
267
268
269
270
271
272
    $sth = $mapper->xref->dbc->prepare("insert into mapping_jobs (root_dir, map_file, status, out_file, err_file, array_number, job_id) values (?,?,?,?,?,?,?)");
    
    for( my $i=1; $i<=$num_jobs; $i++){
      my $map_file = $self->get_class_name()."_".$ensembl_type."_".$i.".map";
      my $out_file = "xref_0_".$ensembl_type.".".$jobid."-".$i.".out";
      my $err_file = "xref_0_".$ensembl_type.".".$jobid."-".$i.".err";
      $sth->execute($root_dir, $map_file, 'SUBMITTED', $out_file, $err_file, $i, $jobid);
    }
    $sth->finish;
    
  }
  
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
  return $unique_name;

}

=head2 calculate_num_jobs

  Args       : Query file name
  Example    : none
  Description: Calculate the number of LSF jobs to submit based on the size of the query file.
  Returntype : The number of jobs.
  Exceptions : none
  Caller     : general

=cut

sub calculate_num_jobs {

  my $query = shift;

  my $bytes_per_job = 250000;

  my $size = (stat $query)[7];
295
  if( $size == 0 ){ return 0 }
296
  return int($size/$bytes_per_job) || 1;
297
298
299
300
301
302
303
304
305
306

}

# Get class name from fully-qualified object name
# e.g. return ExonerateBasic from XrefMapper::Methods::ExonerateBasic=HASH(Ox12113c0)

sub get_class_name() {

  my $self = shift;

307
  my $module_name = ref($self);
308

309
  my @bits = split(/::/, $module_name);
310

311
  return $bits[-1];
312
313
314

}

Glenn Proctor's avatar
Glenn Proctor committed
315
316
317
318
319
320
321
322
323
# Check if any .err files exist that have non-zero size;
# this indicates that something has gone wrong with the exonerate run

sub check_err {

  my ($self, $dir) = @_;

  foreach my $err (glob("$dir/*.err")) {

324
    print "\n\n*** Warning: $err has non-zero size; may indicate problems with exonerate run\n\n\n" if (-s $err);
Glenn Proctor's avatar
Glenn Proctor committed
325
326
327
328

  }
}

329
330
331
332
# Percentage identity that query (xref) must match to be considered.

sub query_identity_threshold {

333
  return 0;
334
335
336
337
338
339
340
341

}


# Percentage identity that target (ensembl) must match to be considered.

sub target_identity_threshold {

342
  return 0;
343
344
345

}

346
347
1;