ExonerateBasic.pm 4.46 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
# Base class that all other mapping methods inherit from

package XrefMapper::Methods::ExonerateBasic;

use strict;

use File::Basename;
use IPC::Open3;

# Path to exonerate executable
Glenn Proctor's avatar
Glenn Proctor committed
11
my $exonerate_path = "/usr/local/ensembl/bin/exonerate-0.9";
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38


sub new {

  my($class) = @_;

  my $self ={};
  bless $self,$class;

  return $self;

}

=head2 run

  Arg[1]     : Query filename to pass to exonerate; this should be the XREF file.
  Arg[2]     : Target filename to pass to exonerate; this should be the ENSEMBL file.
  Example    : none
  Description: Run exonerate with default options.
  Returntype : none
  Exceptions : none
  Caller     : general

=cut

sub run() {

39
  my ($self, $query, $target, $dir) = @_;
40

41
  my $name = $self->submit_exonerate($query, $target, $dir, $self->options());
42

Glenn Proctor's avatar
Glenn Proctor committed
43
44
  $self->check_err($dir);

45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
  return $name;

}

=head2 options

  Args       : none
  Example    : none
  Description: Options to pass to exonerate. Override as appropriate.
  Returntype : List of strings.
  Exceptions : none
  Caller     : general

=cut

sub options() {

  return ();

}

=head2 submit_exonerate

  Args       : none
  Example    : none
  Description: Submit a *single* mapping job array to exonerate.
  Returntype : The name of the job array submitted to LSF. Can be used in depend job.
  Exceptions : none
  Caller     : general

=cut

sub submit_exonerate {

79
  my ($self, $query, $target, $root_dir, @options) = @_;
80

81
82
83
  my $queryfile = basename($query);
  my $targetfile = basename($target);

84
85
86
87
88
89
90
91
92
  my $num_jobs = calculate_num_jobs($query);

  my $options_str = join(" ", @options);

  my $unique_name = $self->get_class_name() . "_" . time();

  my $prefix = $root_dir . "/" . basename($query);
  $prefix =~ s/\.\w+$//;

93
94
95
  my ($ensembl_type) = $prefix =~ /.*_(dna|prot)$/; # dna or prot

  my $output = $self->get_class_name() . "_" . $ensembl_type . "_" . "\$LSB_JOBINDEX.map";
96

97
  my @main_bsub = ( 'bsub', '-J', $unique_name . "[1-$num_jobs]", '-o', "$prefix.%J-%I.out", '-e', "$prefix.%J-%I.err");
98
99
100
101
102

  #print "bsub command: " . join(" ", @main_bsub) . "\n\n";

  # Use IPC::Open3 to open the process so that we can read and write from/to its stdout/stderr
  my ($wtr, $rtr, $etr, $pid);
103
  $pid = open3($wtr, $rtr, $etr, @main_bsub) || die "Cannot call bsub command";
104
105
106
107
108
109
110

  # Create actual execute script to be executed with LSF, and write to pipe
  my $main_job = <<EOF;
. /usr/local/lsf/conf/profile.lsf

cd /tmp

111
rm -f /tmp/\$LSB_JOBINDEX.$queryfile /tmp/\$LSB_JOBINDEX.$targetfile /tmp/$output
112

113
114
lsrcp ecs1a:$target /tmp/\$LSB_JOBINDEX.$targetfile
lsrcp ecs1a:$query  /tmp/\$LSB_JOBINDEX.$queryfile
115

116
$exonerate_path /tmp/\$LSB_JOBINDEX.$queryfile /tmp/\$LSB_JOBINDEX.$targetfile --querychunkid \$LSB_JOBINDEX --querychunktotal $num_jobs --showvulgar false --showalignment FALSE --ryo "xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n" $options_str | grep '^xref' > /tmp/$output
117
118
119

lsrcp /tmp/$output ecs1a:$root_dir/$output

120
rm -f /tmp/\$LSB_JOBINDEX.$queryfile /tmp/\$LSB_JOBINDEX.$targetfile /tmp/$output
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
EOF

  # TODO make sure query/target are the right way round

  print $wtr $main_job;

  #print "Job:\n" . $main_job . "\n\n";

  close($wtr);

  # Wait until bsub has actually run - will print to its stdout ($rtr) and then close it
  my $jobid;
  while (<$rtr>) {
    if (/Job <([0-9]+)> is/) {
      $jobid = $1;
Glenn Proctor's avatar
Glenn Proctor committed
136
      print "LSF job ID for main mapping job: $jobid (job array with $num_jobs jobs)\n"
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
    }
  }
  if (!defined($jobid)) {
    print STDERR "Error: could not get LSF job ID for mapping job\n";
  }

  return $unique_name;

}

=head2 calculate_num_jobs

  Args       : Query file name
  Example    : none
  Description: Calculate the number of LSF jobs to submit based on the size of the query file.
  Returntype : The number of jobs.
  Exceptions : none
  Caller     : general

=cut

sub calculate_num_jobs {

  my $query = shift;

  my $bytes_per_job = 250000;

  my $size = (stat $query)[7];

166
  return int($size/$bytes_per_job) || 1;
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184

}

# Get class name from fully-qualified object name
# e.g. return ExonerateBasic from XrefMapper::Methods::ExonerateBasic=HASH(Ox12113c0)

sub get_class_name() {

  my $self = shift;

  $self =~ s/=.*$//;

  my @bits = split(/::/, $self);

  return @bits[$#bits];

}

Glenn Proctor's avatar
Glenn Proctor committed
185
186
187
188
189
190
191
192
193
194
195
196
197
198
# Check if any .err files exist that have non-zero size;
# this indicates that something has gone wrong with the exonerate run

sub check_err {

  my ($self, $dir) = @_;

  foreach my $err (glob("$dir/*.err")) {

    print "Warning: $err has non-zero size; may indicate problems with exonerate run\n" if (-s $err);

  }
}

199
200
1;