ExonerateBasic.pm 4.89 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
# Base class that all other mapping methods inherit from

package XrefMapper::Methods::ExonerateBasic;

use strict;

use File::Basename;
use IPC::Open3;

# Path to exonerate executable
Ian Longden's avatar
Ian Longden committed
11
my $exonerate_path = "/usr/local/ensembl/bin/exonerate-0.9.0";
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38


sub new {

  my($class) = @_;

  my $self ={};
  bless $self,$class;

  return $self;

}

=head2 run

  Arg[1]     : Query filename to pass to exonerate; this should be the XREF file.
  Arg[2]     : Target filename to pass to exonerate; this should be the ENSEMBL file.
  Example    : none
  Description: Run exonerate with default options.
  Returntype : none
  Exceptions : none
  Caller     : general

=cut

sub run() {

39
  my ($self, $query, $target, $dir) = @_;
40

41
  my $name = $self->submit_exonerate($query, $target, $dir, $self->options());
42

Glenn Proctor's avatar
Glenn Proctor committed
43
44
  $self->check_err($dir);

45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
  return $name;

}

=head2 options

  Args       : none
  Example    : none
  Description: Options to pass to exonerate. Override as appropriate.
  Returntype : List of strings.
  Exceptions : none
  Caller     : general

=cut

sub options() {

  return ();

}

=head2 submit_exonerate

  Args       : none
  Example    : none
  Description: Submit a *single* mapping job array to exonerate.
  Returntype : The name of the job array submitted to LSF. Can be used in depend job.
  Exceptions : none
  Caller     : general

=cut

sub submit_exonerate {

79
  my ($self, $query, $target, $root_dir, @options) = @_;
80

81
82
83
  my $queryfile = basename($query);
  my $targetfile = basename($target);

84
85
86
87
88
89
90
91
92
  my $num_jobs = calculate_num_jobs($query);

  my $options_str = join(" ", @options);

  my $unique_name = $self->get_class_name() . "_" . time();

  my $prefix = $root_dir . "/" . basename($query);
  $prefix =~ s/\.\w+$//;

93
  my ($ensembl_type) = $prefix =~ /.*_(dna|peptide)$/; # dna or prot
94
95

  my $output = $self->get_class_name() . "_" . $ensembl_type . "_" . "\$LSB_JOBINDEX.map";
96

97
  my @main_bsub = ( 'bsub', '-J' . $unique_name . "[1-$num_jobs]", '-o', "$prefix.%J-%I.out", '-e', "$prefix.%J-%I.err");
98
99
100
101
102
103
104
105
106

  #print "bsub command: " . join(" ", @main_bsub) . "\n\n";

  # Create actual execute script to be executed with LSF, and write to pipe
  my $main_job = <<EOF;
. /usr/local/lsf/conf/profile.lsf

cd /tmp

107
rm -f /tmp/\$LSB_JOBINDEX.$queryfile /tmp/\$LSB_JOBINDEX.$targetfile /tmp/$output
108

109
110
lsrcp $target /tmp/\$LSB_JOBINDEX.$targetfile
lsrcp $query  /tmp/\$LSB_JOBINDEX.$queryfile
111

112
$exonerate_path /tmp/\$LSB_JOBINDEX.$queryfile /tmp/\$LSB_JOBINDEX.$targetfile --querychunkid \$LSB_JOBINDEX --querychunktotal $num_jobs --showvulgar false --showalignment FALSE --ryo "xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n" $options_str | grep '^xref' > /tmp/$output
113

114
lsrcp /tmp/$output $root_dir/$output
115

116
rm -f /tmp/\$LSB_JOBINDEX.$queryfile /tmp/\$LSB_JOBINDEX.$targetfile /tmp/$output
117
118
EOF

119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
  # now submit it
  my $jobid = 0;

  eval {
    my $pid;
    my $reader;

    local *BSUB;
    local *BSUB_READER;

    if (($reader = open(BSUB_READER, '-|'))) {
      while (<BSUB_READER>) {
	if (/^Job <(\d+)> is submitted/) {
	  $jobid = $1;
	  print "LSF job ID for main mapping job: $jobid (job array with $num_jobs jobs)\n"
	}
      }
      close(BSUB_READER);
    } else {
      die("Could not fork : $!\n") unless (defined($reader));
      open(STDERR, ">&STDOUT");
      if (($pid = open(BSUB, '|-'))) {
	
	print BSUB $main_job;
	close BSUB;
	if ($? != 0) {
	  die("bsub exited with non-zero status - job not submitted\n");
	}
      } else {
	if (defined($pid)) {
	  exec(@main_bsub);
	  die("Could not exec bsub : $!\n");
	} else {
	  die("Could not fork : $!\n");
	}
      }
      exit(0);
156
    }
157
158
159
160
161
162
  };

  if ($@) {
    # Something went wrong
    warn("Job submission failed:\n$@\n");
    return 0;
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
  }

  return $unique_name;

}

=head2 calculate_num_jobs

  Args       : Query file name
  Example    : none
  Description: Calculate the number of LSF jobs to submit based on the size of the query file.
  Returntype : The number of jobs.
  Exceptions : none
  Caller     : general

=cut

sub calculate_num_jobs {

  my $query = shift;

  my $bytes_per_job = 250000;

  my $size = (stat $query)[7];

188
  return int($size/$bytes_per_job) || 1;
189
190
191
192
193
194
195
196
197
198

}

# Get class name from fully-qualified object name
# e.g. return ExonerateBasic from XrefMapper::Methods::ExonerateBasic=HASH(Ox12113c0)

sub get_class_name() {

  my $self = shift;

199
  my $module_name = ref($self);
200

201
  my @bits = split(/::/, $module_name);
202

203
  return $bits[-1];
204
205
206

}

Glenn Proctor's avatar
Glenn Proctor committed
207
208
209
210
211
212
213
214
215
216
217
218
219
220
# Check if any .err files exist that have non-zero size;
# this indicates that something has gone wrong with the exonerate run

sub check_err {

  my ($self, $dir) = @_;

  foreach my $err (glob("$dir/*.err")) {

    print "Warning: $err has non-zero size; may indicate problems with exonerate run\n" if (-s $err);

  }
}

221
222
223
224
# Percentage identity that query (xref) must match to be considered.

sub query_identity_threshold {

225
  return 0;
226
227
228
229
230
231
232
233

}


# Percentage identity that target (ensembl) must match to be considered.

sub target_identity_threshold {

234
  return 0;
235
236
237

}

238
239
1;