ExonerateBasic.pm 7.42 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
# Base class that all other mapping methods inherit from

package XrefMapper::Methods::ExonerateBasic;

use strict;

use File::Basename;
use IPC::Open3;

# Path to exonerate executable
11
12
#my $exonerate_path = "/usr/local/ensembl/bin/exonerate-0.9.0";
my $exonerate_path = "/software/ensembl/bin/exonerate-1.4.0";
13
14
15
16
17
18
19

sub new {

  my($class) = @_;

  my $self ={};
  bless $self,$class;
20
  $self->jobcount(0);
21
22
23
24
25

  return $self;

}

26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
=head2 jobcount
 
  Arg [1]    : (optional) 
  Example    : $mapper->jobcount(1004);
  Description: Getter / Setter for number of jobs submitted. 
  Returntype : scalar
  Exceptions : none

=cut

sub jobcount {
  my ($self, $arg) = @_;

  (defined $arg) &&
    ($self->{_jobcount} = $arg );
  return $self->{_jobcount};
}
 

45
46
47
48
49
50
51
52
53
54
55
56
57
58
=head2 run

  Arg[1]     : Query filename to pass to exonerate; this should be the XREF file.
  Arg[2]     : Target filename to pass to exonerate; this should be the ENSEMBL file.
  Example    : none
  Description: Run exonerate with default options.
  Returntype : none
  Exceptions : none
  Caller     : general

=cut

sub run() {

Ian Longden's avatar
Ian Longden committed
59
60
61
  my ($self, $query, $target, $mapper) = @_;

  my $name = $self->submit_exonerate($query, $target, $mapper, $self->options());
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96

  return $name;

}

=head2 options

  Args       : none
  Example    : none
  Description: Options to pass to exonerate. Override as appropriate.
  Returntype : List of strings.
  Exceptions : none
  Caller     : general

=cut

sub options() {

  return ();

}

=head2 submit_exonerate

  Args       : none
  Example    : none
  Description: Submit a *single* mapping job array to exonerate.
  Returntype : The name of the job array submitted to LSF. Can be used in depend job.
  Exceptions : none
  Caller     : general

=cut

sub submit_exonerate {

Ian Longden's avatar
Ian Longden committed
97
98
99
#  my ($self, $query, $target, $root_dir, $nofarm, @options) = @_;
  my ($self, $query, $target, $mapper, @options) = @_;

100

Ian Longden's avatar
Ian Longden committed
101
102
  my $root_dir = $mapper->core->dir;

Ian Longden's avatar
Ian Longden committed
103
  print "query $query\n" if($mapper->verbose);
104
  my $queryfile = basename($query);
Ian Longden's avatar
Ian Longden committed
105
  print "target $target\n" if($mapper->verbose);
106
107
  my $targetfile = basename($target);

108
109
110
111
112
113
114
115
  my $prefix = $root_dir . "/" . basename($query);
  $prefix =~ s/\.\w+$//;

  my ($ensembl_type) = $prefix =~ /.*_(dna|peptide)$/; # dna or prot
  my $options_str = join(" ", @options);

  my $unique_name = $self->get_class_name() . "_" . time();

116
117
118
119
120
121
122
  my $disk_space_needed = (stat($query))[7]+(stat($target))[7];

  $disk_space_needed /= 1024000; # convert to MB
  $disk_space_needed = int($disk_space_needed);
  $disk_space_needed += 1;
#  print "disk space needed = ".$disk_space_needed."\n";

123
124
  my $num_jobs = calculate_num_jobs($query);

Ian Longden's avatar
Ian Longden committed
125
  if(defined($mapper->nofarm)){
126
127
128
129
130
    my $output = $self->get_class_name() . "_" . $ensembl_type . "_1.map";
    my $cmd = <<EON;
$exonerate_path $query $target --showvulgar false --showalignment FALSE --ryo "xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n" $options_str | grep '^xref' > $root_dir/$output

EON
Ian Longden's avatar
Ian Longden committed
131
    print "none farm command is $cmd\n" if($mapper->verbose);
132
133
134
135
136
137
    system($cmd);
    $self->jobcount(1);
    return "nofarm";
  }


138
139
140
141
142
  # array features barf if just one job 
  if($num_jobs == 1){
    $num_jobs++;
  }

143
144
  $self->jobcount($self->jobcount()+$num_jobs);

145

146

147
  my $output = $self->get_class_name() . "_" . $ensembl_type . "_" . "\$LSB_JOBINDEX.map";
148

149
  my @main_bsub = ( 'bsub', '-R' .'select[linux] -Rrusage[tmp='.$disk_space_needed.']',  '-J' . $unique_name . "[1-$num_jobs]%200", '-o', "$prefix.%J-%I.out", '-e', "$prefix.%J-%I.err");
150

151

152
153


154
155
156
157
158
159

  # Create actual execute script to be executed with LSF, and write to pipe
  my $main_job = <<EOF;
. /usr/local/lsf/conf/profile.lsf


160
$exonerate_path $query $target --querychunkid \$LSB_JOBINDEX --querychunktotal $num_jobs --showvulgar false --showalignment FALSE --ryo "xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n" $options_str | grep '^xref' > $root_dir/$output
161
162
163

EOF

164
165


166
167
168
169
170
171
172
173
174
175
176
177
  # now submit it
  my $jobid = 0;

  eval {
    my $pid;
    my $reader;

    local *BSUB;
    local *BSUB_READER;

    if (($reader = open(BSUB_READER, '-|'))) {
      while (<BSUB_READER>) {
178

179
180
	if (/^Job <(\d+)> is submitted/) {
	  $jobid = $1;
Ian Longden's avatar
Ian Longden committed
181
	  print "LSF job ID for main mapping job: $jobid (job array with $num_jobs jobs)\n" if($mapper->verbose);
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
	}
      }
      close(BSUB_READER);
    } else {
      die("Could not fork : $!\n") unless (defined($reader));
      open(STDERR, ">&STDOUT");
      if (($pid = open(BSUB, '|-'))) {
	
	print BSUB $main_job;
	close BSUB;
	if ($? != 0) {
	  die("bsub exited with non-zero status - job not submitted\n");
	}
      } else {
	if (defined($pid)) {
	  exec(@main_bsub);
	  die("Could not exec bsub : $!\n");
	} else {
	  die("Could not fork : $!\n");
	}
      }
      exit(0);
204
    }
205
206
207
208
209
210
  };

  if ($@) {
    # Something went wrong
    warn("Job submission failed:\n$@\n");
    return 0;
211
  }
Ian Longden's avatar
Ian Longden committed
212
213
214
215
216
217
218
  else{
    # write details of job to database
    my $command = "$exonerate_path $query $target --querychunkid \$LSB_JOBINDEX --querychunktotal $num_jobs --showvulgar false --showalignment FALSE --ryo ".
      '"xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n"'." $options_str | grep ".'"'."^xref".'"'." > $root_dir/$output";

    my $sth = $mapper->xref->dbc->prepare("insert into process_status (status, date) values('mapping_submitted',now())");
    $sth->execute();
Ian Longden's avatar
tidy up  
Ian Longden committed
219
220
    $sth->finish;

Ian Longden's avatar
Ian Longden committed
221
222
    my $insert = "insert into mapping (job_id, type, command_line, percent_query_cutoff, percent_target_cutoff, method, array_size) values($jobid, '$ensembl_type', '$command',".
				       $self->query_identity_threshold.", ".$self->target_identity_threshold.", '".$self->get_class_name()."', $num_jobs)";
Ian Longden's avatar
tidy up  
Ian Longden committed
223

Ian Longden's avatar
Ian Longden committed
224
225
226

    $sth = $mapper->xref->dbc->prepare($insert);
    $sth->execute;
Ian Longden's avatar
tidy up  
Ian Longden committed
227
228
    $sth->finish;

Ian Longden's avatar
Ian Longden committed
229
230
231
232
233
234
235
236
237
238
239
240
    $sth = $mapper->xref->dbc->prepare("insert into mapping_jobs (root_dir, map_file, status, out_file, err_file, array_number, job_id) values (?,?,?,?,?,?,?)");
    
    for( my $i=1; $i<=$num_jobs; $i++){
      my $map_file = $self->get_class_name()."_".$ensembl_type."_".$i.".map";
      my $out_file = "xref_0_".$ensembl_type.".".$jobid."-".$i.".out";
      my $err_file = "xref_0_".$ensembl_type.".".$jobid."-".$i.".err";
      $sth->execute($root_dir, $map_file, 'SUBMITTED', $out_file, $err_file, $i, $jobid);
    }
    $sth->finish;
    
  }
  
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
  return $unique_name;

}

=head2 calculate_num_jobs

  Args       : Query file name
  Example    : none
  Description: Calculate the number of LSF jobs to submit based on the size of the query file.
  Returntype : The number of jobs.
  Exceptions : none
  Caller     : general

=cut

sub calculate_num_jobs {

  my $query = shift;

  my $bytes_per_job = 250000;

  my $size = (stat $query)[7];
263
  if( $size == 0 ){ return 0 }
264
  return int($size/$bytes_per_job) || 1;
265
266
267
268
269
270
271
272
273
274

}

# Get class name from fully-qualified object name
# e.g. return ExonerateBasic from XrefMapper::Methods::ExonerateBasic=HASH(Ox12113c0)

sub get_class_name() {

  my $self = shift;

275
  my $module_name = ref($self);
276

277
  my @bits = split(/::/, $module_name);
278

279
  return $bits[-1];
280
281
282

}

Glenn Proctor's avatar
Glenn Proctor committed
283
284
285
286
287
288
289
290
291
# Check if any .err files exist that have non-zero size;
# this indicates that something has gone wrong with the exonerate run

sub check_err {

  my ($self, $dir) = @_;

  foreach my $err (glob("$dir/*.err")) {

292
    print "\n\n*** Warning: $err has non-zero size; may indicate problems with exonerate run\n\n\n" if (-s $err);
Glenn Proctor's avatar
Glenn Proctor committed
293
294
295
296

  }
}

297
298
299
300
# Percentage identity that query (xref) must match to be considered.

sub query_identity_threshold {

301
  return 0;
302
303
304
305
306
307
308
309

}


# Percentage identity that target (ensembl) must match to be considered.

sub target_identity_threshold {

310
  return 0;
311
312
313

}

314
315
1;