ExonerateBasic.pm 4.11 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# Base class that all other mapping methods inherit from

package XrefMapper::Methods::ExonerateBasic;

use strict;

use File::Basename;
use IPC::Open3;

# Path to exonerate executable
my $exonerate_path = "/usr/local/ensembl/bin/exonerate-0.8.3";


sub new {

  my($class) = @_;

  my $self ={};
  bless $self,$class;

  return $self;

}

=head2 run

  Arg[1]     : Query filename to pass to exonerate; this should be the XREF file.
  Arg[2]     : Target filename to pass to exonerate; this should be the ENSEMBL file.
  Example    : none
  Description: Run exonerate with default options.
  Returntype : none
  Exceptions : none
  Caller     : general

=cut

sub run() {

39
  my ($self, $query, $target, $dir) = @_;
40

41
  my $name = $self->submit_exonerate($query, $target, $dir, $self->options());
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76

  return $name;

}

=head2 options

  Args       : none
  Example    : none
  Description: Options to pass to exonerate. Override as appropriate.
  Returntype : List of strings.
  Exceptions : none
  Caller     : general

=cut

sub options() {

  return ();

}

=head2 submit_exonerate

  Args       : none
  Example    : none
  Description: Submit a *single* mapping job array to exonerate.
  Returntype : The name of the job array submitted to LSF. Can be used in depend job.
  Exceptions : none
  Caller     : general

=cut

sub submit_exonerate {

77
  my ($self, $query, $target, $root_dir, @options) = @_;
78

79
80
81
  my $queryfile = basename($query);
  my $targetfile = basename($target);

82
83
84
85
86
87
88
89
90
  my $num_jobs = calculate_num_jobs($query);

  my $options_str = join(" ", @options);

  my $unique_name = $self->get_class_name() . "_" . time();

  my $prefix = $root_dir . "/" . basename($query);
  $prefix =~ s/\.\w+$//;

91
92
93
  my ($ensembl_type) = $prefix =~ /.*_(dna|prot)$/; # dna or prot

  my $output = $self->get_class_name() . "_" . $ensembl_type . "_" . "\$LSB_JOBINDEX.map";
94

95
  my @main_bsub = ( 'bsub', '-J', $unique_name . "[1-$num_jobs]", '-o', "$prefix.%J-%I.out", '-e', "$prefix.%J-%I.err");
96
97
98
99
100

  #print "bsub command: " . join(" ", @main_bsub) . "\n\n";

  # Use IPC::Open3 to open the process so that we can read and write from/to its stdout/stderr
  my ($wtr, $rtr, $etr, $pid);
101
  $pid = open3($wtr, $rtr, $etr, @main_bsub) || die "Cannot call bsub command";
102
103
104
105
106
107
108

  # Create actual execute script to be executed with LSF, and write to pipe
  my $main_job = <<EOF;
. /usr/local/lsf/conf/profile.lsf

cd /tmp

109
rm -f /tmp/\$LSB_JOBINDEX.$queryfile /tmp/\$LSB_JOBINDEX.$targetfile /tmp/$output
110

111
112
lsrcp ecs1a:$target /tmp/\$LSB_JOBINDEX.$targetfile
lsrcp ecs1a:$query  /tmp/\$LSB_JOBINDEX.$queryfile
113

114
$exonerate_path /tmp/\$LSB_JOBINDEX.$queryfile /tmp/\$LSB_JOBINDEX.$targetfile --querychunkid \$LSB_JOBINDEX --querychunktotal $num_jobs --showvulgar false --showalignment FALSE --ryo "xref:%qi:%ti:%qab:%qae:%tab:%tae:%C:%s\n" $options_str | grep '^xref' > /tmp/$output
115
116
117

lsrcp /tmp/$output ecs1a:$root_dir/$output

118
rm -f /tmp/\$LSB_JOBINDEX.$queryfile /tmp/\$LSB_JOBINDEX.$targetfile /tmp/$output
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
EOF

  # TODO make sure query/target are the right way round

  print $wtr $main_job;

  #print "Job:\n" . $main_job . "\n\n";

  close($wtr);

  # Wait until bsub has actually run - will print to its stdout ($rtr) and then close it
  my $jobid;
  while (<$rtr>) {
    if (/Job <([0-9]+)> is/) {
      $jobid = $1;
Glenn Proctor's avatar
Glenn Proctor committed
134
      print "LSF job ID for main mapping job: $jobid (job array with $num_jobs jobs)\n"
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
    }
  }
  if (!defined($jobid)) {
    print STDERR "Error: could not get LSF job ID for mapping job\n";
  }

  return $unique_name;

}

=head2 calculate_num_jobs

  Args       : Query file name
  Example    : none
  Description: Calculate the number of LSF jobs to submit based on the size of the query file.
  Returntype : The number of jobs.
  Exceptions : none
  Caller     : general

=cut

sub calculate_num_jobs {

  my $query = shift;

  my $bytes_per_job = 250000;

  my $size = (stat $query)[7];

164
  return int($size/$bytes_per_job) || 1;
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184

}

# Get class name from fully-qualified object name
# e.g. return ExonerateBasic from XrefMapper::Methods::ExonerateBasic=HASH(Ox12113c0)

sub get_class_name() {

  my $self = shift;

  $self =~ s/=.*$//;

  my @bits = split(/::/, $self);

  return @bits[$#bits];

}

1;