runWorker.pl 9.03 KB
Newer Older
1
#!/usr/bin/env perl
Jessica Severin's avatar
Jessica Severin committed
2 3

use strict;
Leo Gordon's avatar
Leo Gordon committed
4
use warnings;
5 6 7 8 9

    # Finding out own path in order to reference own components (including own modules):
use Cwd            ();
use File::Basename ();
BEGIN {
10 11
    $ENV{'EHIVE_ROOT_DIR'} = File::Basename::dirname( File::Basename::dirname( Cwd::realpath($0) ) );
    unshift @INC, $ENV{'EHIVE_ROOT_DIR'}.'/modules';
12 13
}

14

Jessica Severin's avatar
Jessica Severin committed
15
use Getopt::Long;
16
use Bio::EnsEMBL::Hive::Utils ('script_usage');
17
use Bio::EnsEMBL::Registry;
18
use Bio::EnsEMBL::Hive::DBSQL::DBAdaptor;
19
use Bio::EnsEMBL::Hive::Queen;
20
use Bio::EnsEMBL::Hive::Valley;
21

22
Bio::EnsEMBL::Registry->no_version_check(1);
Jessica Severin's avatar
Jessica Severin committed
23

24
my ($url, $reg_conf, $reg_type, $reg_alias, $nosqlvc);                   # Connection parameters
25 26
my ($resource_class_id, $resource_class_name, $analysis_id, $logic_name, $job_id, $force);  # Task specification parameters
my ($job_limit, $life_span, $no_cleanup, $no_write, $hive_log_dir, $worker_log_dir, $retry_throwing_jobs, $can_respecialize);   # Worker control parameters
27
my ($help, $debug);
Jessica Severin's avatar
Jessica Severin committed
28

Leo Gordon's avatar
Leo Gordon committed
29 30 31
GetOptions(

# Connection parameters:
32
           'url=s'                      => \$url,
33
           'reg_conf|regfile=s'         => \$reg_conf,
34
           'reg_type=s'                 => \$reg_type,
35
           'reg_alias|regname=s'        => \$reg_alias,
36
           'nosqlvc=i'                  => \$nosqlvc,       # can't use the binary "!" as it is a propagated option
37 38

# Task specification parameters:
39 40
           'rc_id=i'                    => \$resource_class_id,
           'rc_name=s'                  => \$resource_class_name,
41
           'analysis_id=i'              => \$analysis_id,
Leo Gordon's avatar
Leo Gordon committed
42
           'logic_name=s'               => \$logic_name,
43
           'job_id=i'                   => \$job_id,
44
           'force=i'                    => \$force,
45 46

# Worker control parameters:
47
           'job_limit=i'                => \$job_limit,
48 49
           'life_span|lifespan=i'       => \$life_span,
           'no_cleanup'                 => \$no_cleanup,
50
           'no_write'                   => \$no_write,
51 52
           'hive_log_dir|hive_output_dir=s'         => \$hive_log_dir,       # keep compatibility with the old name
           'worker_log_dir|worker_output_dir=s'     => \$worker_log_dir,     # will take precedence over hive_log_dir if set
53
           'retry_throwing_jobs=i'      => \$retry_throwing_jobs,
54
           'can_respecialize=i'         => \$can_respecialize,
Leo Gordon's avatar
Leo Gordon committed
55 56

# Other commands
57 58
           'h|help'                     => \$help,
           'debug=i'                    => \$debug,
59
);
Jessica Severin's avatar
Jessica Severin committed
60

61
if ($help) { script_usage(0); }
Jessica Severin's avatar
Jessica Severin committed
62

63
if($reg_conf) {     # if reg_conf is defined, we load it regardless of whether it is used to connect to the Hive database or not:
64
    Bio::EnsEMBL::Registry->load_all($reg_conf);
65 66
}

67 68 69
my $hive_dba;

if($url or $reg_alias) {
70 71 72 73 74 75 76
        # Perform environment variable substitution separately with and without curly braces.
        #       Fixme: Perl 5.10 has a cute new "branch reset" (?|pattern)
        #              that would allow to merge the two substitutions below into a nice one-liner.
        #              But people around may still be using Perl 5.8, so let's wait a bit.
        #
        # Make sure expressions stay as they were if we were unable to substitute them.
        #
77 78 79 80 81 82 83 84 85 86 87 88
    if($url) {
        $url =~ s/\$(\{(\w+)\})/defined($ENV{$2})?"$ENV{$2}":"\$$1"/eg;
        $url =~ s/\$((\w+))/defined($ENV{$2})?"$ENV{$2}":"\$$1"/eg;
    }

    $hive_dba = Bio::EnsEMBL::Hive::DBSQL::DBAdaptor->new(
            -url                            => $url,
            -reg_conf                       => $reg_conf,
            -reg_type                       => $reg_type,
            -reg_alias                      => $reg_alias,
            -no_sql_schema_version_check    => $nosqlvc,
    );
89

90
} else {
91
    print "\nERROR : Connection parameters (url or reg_conf+reg_alias) need to be specified\n\n";
92
    script_usage(1);
93 94
}

95
unless($hive_dba and $hive_dba->isa("Bio::EnsEMBL::Hive::DBSQL::DBAdaptor")) {
96 97
    print "ERROR : no database connection\n\n";
    script_usage(1);
Jessica Severin's avatar
Jessica Severin committed
98 99
}

100
my $queen = $hive_dba->get_Queen();
Jessica Severin's avatar
Jessica Severin committed
101

102
my ($meadow_type, $meadow_name, $process_id, $exec_host) = Bio::EnsEMBL::Hive::Valley->new()->whereami();
103

104
my $worker = $queen->create_new_worker(
105 106
      # Worker identity:
         -meadow_type           => $meadow_type,
107
         -meadow_name           => $meadow_name,
108 109
         -process_id            => $process_id,
         -exec_host             => $exec_host,
110 111
         -resource_class_id     => $resource_class_id,
         -resource_class_name   => $resource_class_name,
112 113 114 115 116 117

      # Worker control parameters:
         -job_limit             => $job_limit,
         -life_span             => $life_span,
         -no_cleanup            => $no_cleanup,
         -no_write              => $no_write,
118 119
         -worker_log_dir        => $worker_log_dir,
         -hive_log_dir          => $hive_log_dir,
120
         -retry_throwing_jobs   => $retry_throwing_jobs,
121
         -can_respecialize      => $can_respecialize,
122 123 124

      # Other parameters:
         -debug                 => $debug,
125
);
126

127 128 129 130 131 132
my $specialization_arglist = ($analysis_id || $logic_name || $job_id) && [
     -analysis_id           => $analysis_id,
     -logic_name            => $logic_name,
     -job_id                => $job_id,
     -force                 => $force,
];
133

134
$worker->run( $specialization_arglist );
Jessica Severin's avatar
Jessica Severin committed
135 136


137 138 139 140 141 142
__DATA__

=pod

=head1 NAME

Leo Gordon's avatar
Leo Gordon committed
143
    runWorker.pl
144 145 146

=head1 DESCRIPTION

Leo Gordon's avatar
Leo Gordon committed
147 148
    runWorker.pl is an eHive component script that does the work of a single Worker -
    specializes in one of the analyses and starts executing jobs of that analysis one-by-one or batch-by-batch.
149

Leo Gordon's avatar
Leo Gordon committed
150 151
    Most of the functionality of the eHive is accessible via beekeeper.pl script,
    but feel free to run the runWorker.pl if you think you know what you are doing :)
Leo Gordon's avatar
Leo Gordon committed
152 153 154

=head1 USAGE EXAMPLES

Leo Gordon's avatar
Leo Gordon committed
155 156
        # Run one local worker process in ehive_dbname and let the system pick up the analysis
    runWorker.pl -url mysql://username:secret@hostname:port/ehive_dbname
Leo Gordon's avatar
Leo Gordon committed
157

158 159 160
        # Run one local worker process in ehive_dbname and let the system pick up the analysis from the given resource_class
    runWorker.pl -url mysql://username:secret@hostname:port/ehive_dbname -rc_name low_mem

Leo Gordon's avatar
Leo Gordon committed
161 162
        # Run one local worker process in ehive_dbname and specify the logic_name
    runWorker.pl -url mysql://username:secret@hostname:port/ehive_dbname -logic_name fast_blast
Leo Gordon's avatar
Leo Gordon committed
163

164
        # Run a specific job in a local worker process:
165 166
    runWorker.pl -url mysql://username:secret@hostname:port/ehive_dbname -job_id 123456

167 168
=head1 OPTIONS

169
=head2 Connection parameters:
Leo Gordon's avatar
Leo Gordon committed
170

171 172
    -reg_conf <path>        : path to a Registry configuration file
    -reg_alias <string>     : species/alias name for the Hive DBAdaptor
173
    -url <url string>       : url defining where database is located
Leo Gordon's avatar
Leo Gordon committed
174

175 176 177
=head2 Task specificaton parameters:

    -rc_id <id>                 : resource class id
Leo Gordon's avatar
Leo Gordon committed
178
    -rc_name <string>           : resource class name
179
    -analysis_id <id>           : pre-specify this worker in a particular analysis defined by database id
Leo Gordon's avatar
Leo Gordon committed
180
    -logic_name <string>        : pre-specify this worker in a particular analysis defined by name
181
    -job_id <id>                : run a specific job defined by its database id
182
    -force 0|1                  : set to 1 if you want to force running a Worker over a BLOCKED analysis or to run a specific DONE/SEMAPHORED job_id
183 184

=head2 Worker control parameters:
Leo Gordon's avatar
Leo Gordon committed
185

186
    -job_limit <num>            : #jobs to run before worker can die naturally
187
    -life_span <num>            : number of minutes this worker is allowed to run
Leo Gordon's avatar
Leo Gordon committed
188
    -no_cleanup                 : don't perform temp directory cleanup when worker exits
189
    -no_write                   : don't write_output or auto_dataflow input_job
190 191
    -hive_log_dir <path>        : directory where stdout/stderr of the whole hive of workers is redirected
    -worker_log_dir <path>      : directory where stdout/stderr of this particular worker is redirected
192
    -retry_throwing_jobs <0|1>  : if a job dies *knowingly*, should we retry it by default?
193
    -can_respecialize <0|1>     : allow this worker to re-specialize into another analysis (within resource_class) after it has exhausted all jobs of the current one
194

Leo Gordon's avatar
Leo Gordon committed
195 196
=head2 Other options:

197 198 199
    -help                       : print this help
    -debug <level>              : turn on debug messages at <level>
    -analysis_stats             : show status of each analysis in hive
200

201 202
=head1 LICENSE

203
    Copyright [1999-2014] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
204 205 206 207 208 209 210 211 212 213

    Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
    You may obtain a copy of the License at

         http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software distributed under the License
    is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and limitations under the License.

Leo Gordon's avatar
Leo Gordon committed
214 215
=head1 CONTACT

216
    Please subscribe to the Hive mailing list:  http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users  to discuss Hive-related questions or to be notified of our updates
Leo Gordon's avatar
Leo Gordon committed
217

218 219
=cut