runWorker.pl 8.36 KB
Newer Older
1
#!/usr/bin/env perl
Jessica Severin's avatar
Jessica Severin committed
2 3

use strict;
Leo Gordon's avatar
Leo Gordon committed
4
use warnings;
Jessica Severin's avatar
Jessica Severin committed
5
use Getopt::Long;
6
use Bio::EnsEMBL::Hive::Utils ('script_usage');
7
use Bio::EnsEMBL::Registry;
8
use Bio::EnsEMBL::Hive::DBSQL::DBAdaptor;
9
use Bio::EnsEMBL::Hive::Queen;
10
use Bio::EnsEMBL::Hive::Valley;
11

12
Bio::EnsEMBL::Registry->no_version_check(1);
Jessica Severin's avatar
Jessica Severin committed
13

14
my $db_conf = {
15 16 17 18 19 20
    -host   => '',
    -port   => 3306,
    -user   => 'ensro',
    -pass   => '',
    -dbname => '',
};
Jessica Severin's avatar
Jessica Severin committed
21

22
my ($reg_conf, $reg_alias, $url);                   # Connection parameters
23
my ($resource_class_id, $resource_class_name, $analysis_id, $logic_name, $job_id);     # Task specification parameters
24
my ($job_limit, $life_span, $no_cleanup, $no_write, $hive_log_dir, $worker_log_dir, $retry_throwing_jobs, $compile_module_once, $force);   # Worker control parameters
25
my ($help, $debug);
Jessica Severin's avatar
Jessica Severin committed
26

Leo Gordon's avatar
Leo Gordon committed
27 28 29
GetOptions(

# Connection parameters:
30 31 32 33 34 35 36 37 38 39
           'reg_conf|regfile=s'         => \$reg_conf,
           'reg_alias|regname=s'        => \$reg_alias,
           'url=s'                      => \$url,
           'host|dbhost=s'              => \$db_conf->{'-host'},
           'port|dbport=i'              => \$db_conf->{'-port'},
           'user|dbuser=s'              => \$db_conf->{'-user'},
           'password|dbpass=s'          => \$db_conf->{'-pass'},
           'database|dbname=s'          => \$db_conf->{'-dbname'},

# Task specification parameters:
40 41
           'rc_id=i'                    => \$resource_class_id,
           'rc_name=s'                  => \$resource_class_name,
42
           'analysis_id=i'              => \$analysis_id,
Leo Gordon's avatar
Leo Gordon committed
43
           'logic_name=s'               => \$logic_name,
44 45 46 47 48 49
           'job_id=i'                   => \$job_id,

# Worker control parameters:
           'job_limit|limit=i'          => \$job_limit,
           'life_span|lifespan=i'       => \$life_span,
           'no_cleanup'                 => \$no_cleanup,
50
           'no_write'                   => \$no_write,
51 52
           'hive_log_dir|hive_output_dir=s'         => \$hive_log_dir,       # keep compatibility with the old name
           'worker_log_dir|worker_output_dir=s'     => \$worker_log_dir,     # will take precedence over hive_log_dir if set
53
           'retry_throwing_jobs=i'      => \$retry_throwing_jobs,
54
           'compile_module_once=i'      => \$compile_module_once,
55
           'force=i'                    => \$force,
Leo Gordon's avatar
Leo Gordon committed
56 57

# Other commands
58 59
           'h|help'                     => \$help,
           'debug=i'                    => \$debug,
60

61
# loose arguments interpreted as database name (for compatibility with mysql[dump])
62
            '<>', sub { $db_conf->{'-dbname'} = shift @_; },
63
);
Jessica Severin's avatar
Jessica Severin committed
64

65
if ($help) { script_usage(0); }
Jessica Severin's avatar
Jessica Severin committed
66

67
if($reg_conf) {     # if reg_conf is defined, we load it regardless of whether it is used to connect to the Hive database or not:
68
    Bio::EnsEMBL::Registry->load_all($reg_conf);
69 70 71 72 73
}

my $DBA;
if($reg_alias) {
    $DBA = Bio::EnsEMBL::Registry->get_DBAdaptor($reg_alias, 'hive');
74 75
} elsif($url) {
    $DBA = Bio::EnsEMBL::Hive::URLFactory->fetch($url) or die "Unable to connect to '$url'\n";
76
} elsif ($db_conf->{'-host'} and $db_conf->{'-user'} and $db_conf->{'-dbname'}) {
77
    $DBA = Bio::EnsEMBL::Hive::DBSQL::DBAdaptor->new( %$db_conf );
78
} else {
79
    print "\nERROR : Connection parameters (reg_conf+reg_alias, url or dbhost+dbuser+dbname) need to be specified\n\n";
80
    script_usage(1);
81 82 83
}

unless($DBA and $DBA->isa("Bio::EnsEMBL::Hive::DBSQL::DBAdaptor")) {
84
  print "ERROR : no database connection\n\n";
85
  script_usage(1);
Jessica Severin's avatar
Jessica Severin committed
86 87
}

88
my $queen = $DBA->get_Queen();
Jessica Severin's avatar
Jessica Severin committed
89

90
my ($meadow_type, $meadow_name, $process_id, $exec_host) = Bio::EnsEMBL::Hive::Valley->new()->whereami();
91

92
print "runWorker(-MeadowType => $meadow_type, -MeadowName => $meadow_name, -ProcessId => $process_id, -ExecHost => $exec_host)\n";
93

94
my $worker;
Leo Gordon's avatar
Leo Gordon committed
95

96 97 98 99
eval {
    $worker = $queen->create_new_worker(
      # Worker identity:
         -meadow_type           => $meadow_type,
100
         -meadow_name           => $meadow_name,
101 102
         -process_id            => $process_id,
         -exec_host             => $exec_host,
103 104
         -resource_class_id     => $resource_class_id,
         -resource_class_name   => $resource_class_name,
105 106 107 108 109 110

      # Worker control parameters:
         -job_limit             => $job_limit,
         -life_span             => $life_span,
         -no_cleanup            => $no_cleanup,
         -no_write              => $no_write,
111 112
         -worker_log_dir        => $worker_log_dir,
         -hive_log_dir          => $hive_log_dir,
113
         -retry_throwing_jobs   => $retry_throwing_jobs,
114
         -compile_module_once   => $compile_module_once,
115 116 117 118

      # Other parameters:
         -debug                 => $debug,
    );
119 120 121 122 123 124 125

    $queen->specialize_new_worker(  $worker,
         -analysis_id           => $analysis_id,
         -logic_name            => $logic_name,
         -job_id                => $job_id,
         -force                 => $force,
    );
126 127
};
my $msg_thrown = $@;
128

129 130 131 132 133 134 135
if($worker) {

    $worker->run();

} else {

    $queen->print_analysis_status;
136 137 138 139 140
    print "\n=== COULDN'T CREATE WORKER ===\n";

    if($msg_thrown) {
        print "$msg_thrown\n";
    }
141
    exit(1);
142
}
Jessica Severin's avatar
Jessica Severin committed
143 144


145 146 147 148 149 150
__DATA__

=pod

=head1 NAME

Leo Gordon's avatar
Leo Gordon committed
151
    runWorker.pl
152 153 154

=head1 DESCRIPTION

Leo Gordon's avatar
Leo Gordon committed
155 156
    runWorker.pl is an eHive component script that does the work of a single Worker -
    specializes in one of the analyses and starts executing jobs of that analysis one-by-one or batch-by-batch.
157

Leo Gordon's avatar
Leo Gordon committed
158 159
    Most of the functionality of the eHive is accessible via beekeeper.pl script,
    but feel free to run the runWorker.pl if you think you know what you are doing :)
Leo Gordon's avatar
Leo Gordon committed
160 161 162

=head1 USAGE EXAMPLES

Leo Gordon's avatar
Leo Gordon committed
163
        # Run one local worker process in ehive_dbname and let the system pick up the analysis
164
    runWorker.pl --host=hostname --port=3306 --user=username --password=secret ehive_dbname
165

Leo Gordon's avatar
Leo Gordon committed
166 167
        # Run one local worker process in ehive_dbname and let the system pick up the analysis (another connection syntax)
    runWorker.pl -url mysql://username:secret@hostname:port/ehive_dbname
Leo Gordon's avatar
Leo Gordon committed
168

Leo Gordon's avatar
Leo Gordon committed
169 170
        # Run one local worker process in ehive_dbname and specify the logic_name
    runWorker.pl -url mysql://username:secret@hostname:port/ehive_dbname -logic_name fast_blast
Leo Gordon's avatar
Leo Gordon committed
171

172 173 174
        # Run a specific job (by a local worker process):
    runWorker.pl -url mysql://username:secret@hostname:port/ehive_dbname -job_id 123456

175 176
=head1 OPTIONS

177
=head2 Connection parameters:
Leo Gordon's avatar
Leo Gordon committed
178

179
    -conf <path>            : config file describing db connection
180 181
    -reg_conf <path>        : path to a Registry configuration file
    -reg_alias <string>     : species/alias name for the Hive DBAdaptor
182 183 184 185 186 187
    -url <url string>       : url defining where database is located
    -host <machine>         : mysql database host <machine>
    -port <port#>           : mysql port number
    -user <name>            : mysql connection user <name>
    -password <pass>        : mysql connection password
    [-database] <name>      : mysql database <name>
Leo Gordon's avatar
Leo Gordon committed
188

189 190 191
=head2 Task specificaton parameters:

    -rc_id <id>                 : resource class id
Leo Gordon's avatar
Leo Gordon committed
192
    -rc_name <string>           : resource class name
193
    -analysis_id <id>           : pre-specify this worker in a particular analysis defined by database id
Leo Gordon's avatar
Leo Gordon committed
194
    -logic_name <string>        : pre-specify this worker in a particular analysis defined by name
195 196 197
    -job_id <id>                : run a specific job defined by its database id

=head2 Worker control parameters:
Leo Gordon's avatar
Leo Gordon committed
198

199
    -job_limit <num>            : #jobs to run before worker can die naturally
200
    -life_span <num>            : number of minutes this worker is allowed to run
Leo Gordon's avatar
Leo Gordon committed
201
    -no_cleanup                 : don't perform temp directory cleanup when worker exits
202
    -no_write                   : don't write_output or auto_dataflow input_job
203 204
    -hive_log_dir <path>        : directory where stdout/stderr of the whole hive of workers is redirected
    -worker_log_dir <path>      : directory where stdout/stderr of this particular worker is redirected
205
    -retry_throwing_jobs <0|1>  : if a job dies *knowingly*, should we retry it by default?
206
    -compile_module_once 0|1    : should we compile the module only once (desired future behaviour), or pretend to do it before every job (current behaviour)?
207
    -force 0|1                  : set to 1 if you want to force running a Worker over a BLOCKED analysis
208

Leo Gordon's avatar
Leo Gordon committed
209 210
=head2 Other options:

211 212 213
    -help                       : print this help
    -debug <level>              : turn on debug messages at <level>
    -analysis_stats             : show status of each analysis in hive
214

Leo Gordon's avatar
Leo Gordon committed
215 216
=head1 CONTACT

Leo Gordon's avatar
Leo Gordon committed
217
    Please contact ehive-users@ebi.ac.uk mailing list with questions/suggestions.
Leo Gordon's avatar
Leo Gordon committed
218

219 220
=cut