ExonerateBasic.pm 7.54 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
# Base class that all other mapping methods inherit from

package XrefMapper::Methods::ExonerateBasic;

use strict;

use File::Basename;
use IPC::Open3;

# Path to exonerate executable
11
12
#my $exonerate_path = "/usr/local/ensembl/bin/exonerate-0.9.0";
my $exonerate_path = "/software/ensembl/bin/exonerate-1.4.0";
13
14
15
16
17
18
19

sub new {

  my($class) = @_;

  my $self ={};
  bless $self,$class;
20
  $self->jobcount(0);
21
22
23
24
25

  return $self;

}

26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
=head2 jobcount
 
  Arg [1]    : (optional) 
  Example    : $mapper->jobcount(1004);
  Description: Getter / Setter for number of jobs submitted. 
  Returntype : scalar
  Exceptions : none

=cut

sub jobcount {
  my ($self, $arg) = @_;

  (defined $arg) &&
    ($self->{_jobcount} = $arg );
  return $self->{_jobcount};
}
 

45
46
47
48
49
50
51
52
53
54
55
56
57
58
=head2 run

  Arg[1]     : Query filename to pass to exonerate; this should be the XREF file.
  Arg[2]     : Target filename to pass to exonerate; this should be the ENSEMBL file.
  Example    : none
  Description: Run exonerate with default options.
  Returntype : none
  Exceptions : none
  Caller     : general

=cut

sub run() {

Ian Longden's avatar
Ian Longden committed
59
60
61
#  my ($self, $query, $target, $dir, $nofarm) = @_;
#
#  my $name = $self->submit_exonerate($query, $target, $dir, $nofarm, $self->options());
62

Ian Longden's avatar
Ian Longden committed
63
64
65
  my ($self, $query, $target, $mapper) = @_;

  my $name = $self->submit_exonerate($query, $target, $mapper, $self->options());
66

67
68
#  $self->check_err($dir);
#  no point until after the depend job done.
Glenn Proctor's avatar
Glenn Proctor committed
69

70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
  return $name;

}

=head2 options

  Args       : none
  Example    : none
  Description: Options to pass to exonerate. Override as appropriate.
  Returntype : List of strings.
  Exceptions : none
  Caller     : general

=cut

sub options() {

  return ();

}

=head2 submit_exonerate

  Args       : none
  Example    : none
  Description: Submit a *single* mapping job array to exonerate.
  Returntype : The name of the job array submitted to LSF. Can be used in depend job.
  Exceptions : none
  Caller     : general

=cut

sub submit_exonerate {

Ian Longden's avatar
Ian Longden committed
104
105
106
#  my ($self, $query, $target, $root_dir, $nofarm, @options) = @_;
  my ($self, $query, $target, $mapper, @options) = @_;

107

Ian Longden's avatar
Ian Longden committed
108
109
110
  my $root_dir = $mapper->core->dir;

  print "query $query\n";
111
  my $queryfile = basename($query);
Ian Longden's avatar
Ian Longden committed
112
  print "target $target\n";
113
114
  my $targetfile = basename($target);

115
116
117
118
119
120
121
122
  my $prefix = $root_dir . "/" . basename($query);
  $prefix =~ s/\.\w+$//;

  my ($ensembl_type) = $prefix =~ /.*_(dna|peptide)$/; # dna or prot
  my $options_str = join(" ", @options);

  my $unique_name = $self->get_class_name() . "_" . time();

123
124
125
126
127
128
129
  my $disk_space_needed = (stat($query))[7]+(stat($target))[7];

  $disk_space_needed /= 1024000; # convert to MB
  $disk_space_needed = int($disk_space_needed);
  $disk_space_needed += 1;
#  print "disk space needed = ".$disk_space_needed."\n";

130
131
  my $num_jobs = calculate_num_jobs($query);

Ian Longden's avatar
Ian Longden committed
132
  if(defined($mapper->nofarm)){
133
134
135
136
137
138
139
140
141
142
143
144
    my $output = $self->get_class_name() . "_" . $ensembl_type . "_1.map";
    my $cmd = <<EON;
$exonerate_path $query $target --showvulgar false --showalignment FALSE --ryo "xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n" $options_str | grep '^xref' > $root_dir/$output

EON
    print "none farm command is $cmd\n";
    system($cmd);
    $self->jobcount(1);
    return "nofarm";
  }


145
146
147
148
149
  # array features barf if just one job 
  if($num_jobs == 1){
    $num_jobs++;
  }

150
151
  $self->jobcount($self->jobcount()+$num_jobs);

152

153

154
  my $output = $self->get_class_name() . "_" . $ensembl_type . "_" . "\$LSB_JOBINDEX.map";
155

156
  my @main_bsub = ( 'bsub', '-R' .'select[linux] -Rrusage[tmp='.$disk_space_needed.']',  '-J' . $unique_name . "[1-$num_jobs]%200", '-o', "$prefix.%J-%I.out", '-e', "$prefix.%J-%I.err");
157

158

159
160


161
162
163
164
165
166

  # Create actual execute script to be executed with LSF, and write to pipe
  my $main_job = <<EOF;
. /usr/local/lsf/conf/profile.lsf


167
$exonerate_path $query $target --querychunkid \$LSB_JOBINDEX --querychunktotal $num_jobs --showvulgar false --showalignment FALSE --ryo "xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n" $options_str | grep '^xref' > $root_dir/$output
168
169
170

EOF

171
172


173
174
175
176
177
178
179
180
181
182
183
184
  # now submit it
  my $jobid = 0;

  eval {
    my $pid;
    my $reader;

    local *BSUB;
    local *BSUB_READER;

    if (($reader = open(BSUB_READER, '-|'))) {
      while (<BSUB_READER>) {
185

186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
	if (/^Job <(\d+)> is submitted/) {
	  $jobid = $1;
	  print "LSF job ID for main mapping job: $jobid (job array with $num_jobs jobs)\n"
	}
      }
      close(BSUB_READER);
    } else {
      die("Could not fork : $!\n") unless (defined($reader));
      open(STDERR, ">&STDOUT");
      if (($pid = open(BSUB, '|-'))) {
	
	print BSUB $main_job;
	close BSUB;
	if ($? != 0) {
	  die("bsub exited with non-zero status - job not submitted\n");
	}
      } else {
	if (defined($pid)) {
	  exec(@main_bsub);
	  die("Could not exec bsub : $!\n");
	} else {
	  die("Could not fork : $!\n");
	}
      }
      exit(0);
211
    }
212
213
214
215
216
217
  };

  if ($@) {
    # Something went wrong
    warn("Job submission failed:\n$@\n");
    return 0;
218
  }
Ian Longden's avatar
Ian Longden committed
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
  else{
    # write details of job to database
    my $command = "$exonerate_path $query $target --querychunkid \$LSB_JOBINDEX --querychunktotal $num_jobs --showvulgar false --showalignment FALSE --ryo ".
      '"xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n"'." $options_str | grep ".'"'."^xref".'"'." > $root_dir/$output";

    my $sth = $mapper->xref->dbc->prepare("insert into process_status (status, date) values('mapping_submitted',now())");
    $sth->execute();
    my $insert = "insert into mapping (job_id, type, command_line, percent_query_cutoff, percent_target_cutoff, method, array_size) values($jobid, '$ensembl_type', '$command',".
				       $self->query_identity_threshold.", ".$self->target_identity_threshold.", '".$self->get_class_name()."', $num_jobs)";
#    print $insert."\n";

    $sth = $mapper->xref->dbc->prepare($insert);
    $sth->execute;
 
    $sth = $mapper->xref->dbc->prepare("insert into mapping_jobs (root_dir, map_file, status, out_file, err_file, array_number, job_id) values (?,?,?,?,?,?,?)");
    
    for( my $i=1; $i<=$num_jobs; $i++){
      my $map_file = $self->get_class_name()."_".$ensembl_type."_".$i.".map";
      my $out_file = "xref_0_".$ensembl_type.".".$jobid."-".$i.".out";
      my $err_file = "xref_0_".$ensembl_type.".".$jobid."-".$i.".err";
      $sth->execute($root_dir, $map_file, 'SUBMITTED', $out_file, $err_file, $i, $jobid);
    }
    $sth->finish;
    
  }
  
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
  return $unique_name;

}

=head2 calculate_num_jobs

  Args       : Query file name
  Example    : none
  Description: Calculate the number of LSF jobs to submit based on the size of the query file.
  Returntype : The number of jobs.
  Exceptions : none
  Caller     : general

=cut

sub calculate_num_jobs {

  my $query = shift;

  my $bytes_per_job = 250000;

  my $size = (stat $query)[7];
267
  if( $size == 0 ){ return 0 }
268
  return int($size/$bytes_per_job) || 1;
269
270
271
272
273
274
275
276
277
278

}

# Get class name from fully-qualified object name
# e.g. return ExonerateBasic from XrefMapper::Methods::ExonerateBasic=HASH(Ox12113c0)

sub get_class_name() {

  my $self = shift;

279
  my $module_name = ref($self);
280

281
  my @bits = split(/::/, $module_name);
282

283
  return $bits[-1];
284
285
286

}

Glenn Proctor's avatar
Glenn Proctor committed
287
288
289
290
291
292
293
294
295
# Check if any .err files exist that have non-zero size;
# this indicates that something has gone wrong with the exonerate run

sub check_err {

  my ($self, $dir) = @_;

  foreach my $err (glob("$dir/*.err")) {

296
    print "\n\n*** Warning: $err has non-zero size; may indicate problems with exonerate run\n\n\n" if (-s $err);
Glenn Proctor's avatar
Glenn Proctor committed
297
298
299
300

  }
}

301
302
303
304
# Percentage identity that query (xref) must match to be considered.

sub query_identity_threshold {

305
  return 0;
306
307
308
309
310
311
312
313

}


# Percentage identity that target (ensembl) must match to be considered.

sub target_identity_threshold {

314
  return 0;
315
316
317

}

318
319
1;