ExonerateBasic.pm 5.93 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
# Base class that all other mapping methods inherit from

package XrefMapper::Methods::ExonerateBasic;

use strict;

use File::Basename;
use IPC::Open3;

# Path to exonerate executable
Ian Longden's avatar
Ian Longden committed
11
my $exonerate_path = "/usr/local/ensembl/bin/exonerate-0.9.0";
12
13
14
15
16
17
18
19


sub new {

  my($class) = @_;

  my $self ={};
  bless $self,$class;
20
  $self->jobcount(0);
21
22
23
24
25

  return $self;

}

26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
=head2 jobcount
 
  Arg [1]    : (optional) 
  Example    : $mapper->jobcount(1004);
  Description: Getter / Setter for number of jobs submitted. 
  Returntype : scalar
  Exceptions : none

=cut

sub jobcount {
  my ($self, $arg) = @_;

  (defined $arg) &&
    ($self->{_jobcount} = $arg );
  return $self->{_jobcount};
}
 

45
46
47
48
49
50
51
52
53
54
55
56
57
58
=head2 run

  Arg[1]     : Query filename to pass to exonerate; this should be the XREF file.
  Arg[2]     : Target filename to pass to exonerate; this should be the ENSEMBL file.
  Example    : none
  Description: Run exonerate with default options.
  Returntype : none
  Exceptions : none
  Caller     : general

=cut

sub run() {

59
  my ($self, $query, $target, $dir) = @_;
60

61
  my $name = $self->submit_exonerate($query, $target, $dir, $self->options());
62

63
64
#  $self->check_err($dir);
#  no point until after the depend job done.
Glenn Proctor's avatar
Glenn Proctor committed
65

66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
  return $name;

}

=head2 options

  Args       : none
  Example    : none
  Description: Options to pass to exonerate. Override as appropriate.
  Returntype : List of strings.
  Exceptions : none
  Caller     : general

=cut

sub options() {

  return ();

}

=head2 submit_exonerate

  Args       : none
  Example    : none
  Description: Submit a *single* mapping job array to exonerate.
  Returntype : The name of the job array submitted to LSF. Can be used in depend job.
  Exceptions : none
  Caller     : general

=cut

sub submit_exonerate {

100
  my ($self, $query, $target, $root_dir, @options) = @_;
101

102
103
104
  my $queryfile = basename($query);
  my $targetfile = basename($target);

105
106
107
108
109
110
111
  my $disk_space_needed = (stat($query))[7]+(stat($target))[7];

  $disk_space_needed /= 1024000; # convert to MB
  $disk_space_needed = int($disk_space_needed);
  $disk_space_needed += 1;
#  print "disk space needed = ".$disk_space_needed."\n";

112
113
  my $num_jobs = calculate_num_jobs($query);

114
115
116
117
118
  # array features barf if just one job 
  if($num_jobs == 1){
    $num_jobs++;
  }

119
120
  $self->jobcount($self->jobcount()+$num_jobs);

121
122
123
124
125
126
127
  my $options_str = join(" ", @options);

  my $unique_name = $self->get_class_name() . "_" . time();

  my $prefix = $root_dir . "/" . basename($query);
  $prefix =~ s/\.\w+$//;

128
  my ($ensembl_type) = $prefix =~ /.*_(dna|peptide)$/; # dna or prot
129

130

131
  my $output = $self->get_class_name() . "_" . $ensembl_type . "_" . "\$LSB_JOBINDEX.map";
132

133
  my @main_bsub = ( 'bsub', '-R' .'select[linux] -Rrusage[tmp='.$disk_space_needed.']',  '-J' . $unique_name . "[1-$num_jobs]%200", '-o', "$prefix.%J-%I.out", '-e', "$prefix.%J-%I.err");
134
135

#  my @main_bsub = ( 'bsub', '-J' . $unique_name . "[1-$num_jobs]", '-o', "$prefix.%J-%I.out", '-e', "$prefix.%J-%I.err");
136

137
#  print "bsub command: " . join(" ", @main_bsub) . "\n\n";
138
139
140
141
142
143
144

  # Create actual execute script to be executed with LSF, and write to pipe
  my $main_job = <<EOF;
. /usr/local/lsf/conf/profile.lsf

cd /tmp

145
rm -f /tmp/\$LSB_JOBINDEX.$queryfile /tmp/\$LSB_JOBINDEX.$targetfile /tmp/$output
146

147
148
lsrcp ecs2c:$target /tmp/\$LSB_JOBINDEX.$targetfile
lsrcp ecs2c:$query  /tmp/\$LSB_JOBINDEX.$queryfile
149

150
$exonerate_path /tmp/\$LSB_JOBINDEX.$queryfile /tmp/\$LSB_JOBINDEX.$targetfile --querychunkid \$LSB_JOBINDEX --querychunktotal $num_jobs --showvulgar false --showalignment FALSE --ryo "xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n" $options_str | grep '^xref' > /tmp/$output
151

152
lsrcp /tmp/$output ecs2c:$root_dir/$output
153

154
rm -f /tmp/\$LSB_JOBINDEX.$queryfile /tmp/\$LSB_JOBINDEX.$targetfile /tmp/$output
155
156
EOF

157
158
159
160
161
162
163
164
165
166
167
168
  # now submit it
  my $jobid = 0;

  eval {
    my $pid;
    my $reader;

    local *BSUB;
    local *BSUB_READER;

    if (($reader = open(BSUB_READER, '-|'))) {
      while (<BSUB_READER>) {
169

170
171
172
173
	if (/^Job <(\d+)> is submitted/) {
	  $jobid = $1;
	  print "LSF job ID for main mapping job: $jobid (job array with $num_jobs jobs)\n"
	}
174
175
176
	else{
	  print "ERROR:? $_\n";
	}
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
      }
      close(BSUB_READER);
    } else {
      die("Could not fork : $!\n") unless (defined($reader));
      open(STDERR, ">&STDOUT");
      if (($pid = open(BSUB, '|-'))) {
	
	print BSUB $main_job;
	close BSUB;
	if ($? != 0) {
	  die("bsub exited with non-zero status - job not submitted\n");
	}
      } else {
	if (defined($pid)) {
	  exec(@main_bsub);
	  die("Could not exec bsub : $!\n");
	} else {
	  die("Could not fork : $!\n");
	}
      }
      exit(0);
198
    }
199
200
201
202
203
204
  };

  if ($@) {
    # Something went wrong
    warn("Job submission failed:\n$@\n");
    return 0;
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
  }

  return $unique_name;

}

=head2 calculate_num_jobs

  Args       : Query file name
  Example    : none
  Description: Calculate the number of LSF jobs to submit based on the size of the query file.
  Returntype : The number of jobs.
  Exceptions : none
  Caller     : general

=cut

sub calculate_num_jobs {

  my $query = shift;

  my $bytes_per_job = 250000;

  my $size = (stat $query)[7];
229
  if( $size == 0 ){ return 0 }
230
  return int($size/$bytes_per_job) || 1;
231
232
233
234
235
236
237
238
239
240

}

# Get class name from fully-qualified object name
# e.g. return ExonerateBasic from XrefMapper::Methods::ExonerateBasic=HASH(Ox12113c0)

sub get_class_name() {

  my $self = shift;

241
  my $module_name = ref($self);
242

243
  my @bits = split(/::/, $module_name);
244

245
  return $bits[-1];
246
247
248

}

Glenn Proctor's avatar
Glenn Proctor committed
249
250
251
252
253
254
255
256
257
# Check if any .err files exist that have non-zero size;
# this indicates that something has gone wrong with the exonerate run

sub check_err {

  my ($self, $dir) = @_;

  foreach my $err (glob("$dir/*.err")) {

258
    print "\n\n*** Warning: $err has non-zero size; may indicate problems with exonerate run\n\n\n" if (-s $err);
Glenn Proctor's avatar
Glenn Proctor committed
259
260
261
262

  }
}

263
264
265
266
# Percentage identity that query (xref) must match to be considered.

sub query_identity_threshold {

267
  return 0;
268
269
270
271
272
273
274
275

}


# Percentage identity that target (ensembl) must match to be considered.

sub target_identity_threshold {

276
  return 0;
277
278
279

}

280
281
1;