runWorker.pl 8.87 KB
Newer Older
1
#!/usr/bin/env perl
Jessica Severin's avatar
Jessica Severin committed
2 3

use strict;
Leo Gordon's avatar
Leo Gordon committed
4
use warnings;
Jessica Severin's avatar
Jessica Severin committed
5
use Getopt::Long;
6
use Bio::EnsEMBL::Hive::Utils ('script_usage');
7
use Bio::EnsEMBL::Registry;
8
use Bio::EnsEMBL::Hive::DBSQL::DBAdaptor;
9
use Bio::EnsEMBL::Hive::Queen;
10
use Bio::EnsEMBL::Hive::Valley;
11

12
Bio::EnsEMBL::Registry->no_version_check(1);
Jessica Severin's avatar
Jessica Severin committed
13

14
my $db_conf = {
15 16 17 18 19 20
    -host   => '',
    -port   => 3306,
    -user   => 'ensro',
    -pass   => '',
    -dbname => '',
};
Jessica Severin's avatar
Jessica Severin committed
21

22 23
my ($conf_file, $reg_conf, $reg_alias, $url);                   # Connection parameters
my ($rc_id, $logic_name, $analysis_id, $input_id, $job_id);     # Task specification parameters
24 25
my ($job_limit, $life_span, $no_cleanup, $no_write, $hive_output_dir, $worker_output_dir, $retry_throwing_jobs);   # Worker control parameters
my ($help, $debug, $show_analysis_stats);
Jessica Severin's avatar
Jessica Severin committed
26

Leo Gordon's avatar
Leo Gordon committed
27 28 29
GetOptions(

# Connection parameters:
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
           'conf=s'                     => \$conf_file,
           'reg_conf|regfile=s'         => \$reg_conf,
           'reg_alias|regname=s'        => \$reg_alias,
           'url=s'                      => \$url,
           'host|dbhost=s'              => \$db_conf->{'-host'},
           'port|dbport=i'              => \$db_conf->{'-port'},
           'user|dbuser=s'              => \$db_conf->{'-user'},
           'password|dbpass=s'          => \$db_conf->{'-pass'},
           'database|dbname=s'          => \$db_conf->{'-dbname'},

# Task specification parameters:
           'rc_id=i'                    => \$rc_id,
           'logic_name=s'               => \$logic_name,
           'analysis_id=i'              => \$analysis_id,
           'input_id=s'                 => \$input_id,
           'job_id=i'                   => \$job_id,

# Worker control parameters:
           'job_limit|limit=i'          => \$job_limit,
           'life_span|lifespan=i'       => \$life_span,
           'no_cleanup'                 => \$no_cleanup,
           'no_write|nowrite'           => \$no_write,
           'hive_output_dir|outdir=s'   => \$hive_output_dir,       # keep compatibility with the old name
           'worker_output_dir=s'        => \$worker_output_dir,     # will take precedence over hive_output_dir if set
           'retry_throwing_jobs=i'      => \$retry_throwing_jobs,
Leo Gordon's avatar
Leo Gordon committed
55 56

# Other commands
57 58 59
           'h|help'                     => \$help,
           'debug=i'                    => \$debug,
           'analysis_stats'             => \$show_analysis_stats,
60

61
# loose arguments interpreted as database name (for compatibility with mysql[dump])
62
            '<>', sub { $db_conf->{'-dbname'} = shift @_; },
63
);
Jessica Severin's avatar
Jessica Severin committed
64

65
if ($help) { script_usage(0); }
Jessica Severin's avatar
Jessica Severin committed
66

67
parse_conf($conf_file);
Jessica Severin's avatar
Jessica Severin committed
68

69
if($reg_conf) {     # if reg_conf is defined, we load it regardless of whether it is used to connect to the Hive database or not:
70
    Bio::EnsEMBL::Registry->load_all($reg_conf);
71 72 73 74 75
}

my $DBA;
if($reg_alias) {
    $DBA = Bio::EnsEMBL::Registry->get_DBAdaptor($reg_alias, 'hive');
76 77
} elsif($url) {
    $DBA = Bio::EnsEMBL::Hive::URLFactory->fetch($url) or die "Unable to connect to '$url'\n";
78 79
} elsif ($db_conf->{'-host'} and $db_conf->{'-user'} and $db_conf->{'-dbname'}) {
    $DBA = new Bio::EnsEMBL::Hive::DBSQL::DBAdaptor(%$db_conf);
80
} else {
81
    print "\nERROR : Connection parameters (reg_conf+reg_alias, url or dbhost+dbuser+dbname) need to be specified\n\n";
82
    script_usage(1);
83 84 85 86
}

unless($DBA and $DBA->isa("Bio::EnsEMBL::Hive::DBSQL::DBAdaptor")) {
  print("ERROR : no database connection\n\n");
87
  script_usage(1);
Jessica Severin's avatar
Jessica Severin committed
88 89
}

90
my $queen = $DBA->get_Queen();
Jessica Severin's avatar
Jessica Severin committed
91

92
my ($meadow_type, $meadow_name, $process_id, $exec_host) = Bio::EnsEMBL::Hive::Valley->new()->whereami();
93

94
print "runWorker(-MeadowType => $meadow_type, -MeadowName => $meadow_name, -ProcessId => $process_id, -ExecHost => $exec_host)\n";
95

96
my $worker;
Leo Gordon's avatar
Leo Gordon committed
97

98 99 100 101
eval {
    $worker = $queen->create_new_worker(
      # Worker identity:
         -meadow_type           => $meadow_type,
102
         -meadow_name           => $meadow_name,
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
         -process_id            => $process_id,
         -exec_host             => $exec_host,

      # Task specification:
         -rc_id                 => $rc_id,
         -logic_name            => $logic_name,
         -analysis_id           => $analysis_id,
         -input_id              => $input_id,
         -job_id                => $job_id,

      # Worker control parameters:
         -job_limit             => $job_limit,
         -life_span             => $life_span,
         -no_cleanup            => $no_cleanup,
         -no_write              => $no_write,
         -worker_output_dir     => $worker_output_dir,
         -hive_output_dir       => $hive_output_dir,
         -retry_throwing_jobs   => $retry_throwing_jobs,

      # Other parameters:
         -debug                 => $debug,
    );
};
my $msg_thrown = $@;
127

128
unless($worker) {
129 130 131 132 133
    $queen->print_analysis_status if($show_analysis_stats);
    print "\n=== COULDN'T CREATE WORKER ===\n";

    if($msg_thrown) {
        print "$msg_thrown\n";
134
        script_usage(1);
135 136 137
    } else {
        exit(1);
    }
138
}
Jessica Severin's avatar
Jessica Severin committed
139

140
$worker->run();
Jessica Severin's avatar
Jessica Severin committed
141

142
if($show_analysis_stats) {
Leo Gordon's avatar
Leo Gordon committed
143
    $queen->print_analysis_status;
144
    $queen->schedule_workers(); # apparently run not for the return value, but for the side-effects
145 146
}

147
exit 0;
Jessica Severin's avatar
Jessica Severin committed
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165

#######################
#
# subroutines
#
#######################


sub parse_conf {
  my $conf_file = shift;

  if($conf_file and (-e $conf_file)) {
    #read configuration file from disk
    my @conf_list = @{do $conf_file};

    foreach my $confPtr (@conf_list) {
      #print("HANDLE type " . $confPtr->{TYPE} . "\n");
      if(($confPtr->{TYPE} eq 'COMPARA') or ($confPtr->{TYPE} eq 'DATABASE')) {
166
        $db_conf = $confPtr;
Jessica Severin's avatar
Jessica Severin committed
167 168 169 170 171
      }
    }
  }
}

172 173 174 175 176 177
__DATA__

=pod

=head1 NAME

Leo Gordon's avatar
Leo Gordon committed
178
    runWorker.pl
179 180 181

=head1 DESCRIPTION

Leo Gordon's avatar
Leo Gordon committed
182 183
    runWorker.pl is an eHive component script that does the work of a single Worker -
    specializes in one of the analyses and starts executing jobs of that analysis one-by-one or batch-by-batch.
184

Leo Gordon's avatar
Leo Gordon committed
185 186
    Most of the functionality of the eHive is accessible via beekeeper.pl script,
    but feel free to run the runWorker.pl if you think you know what you are doing :)
Leo Gordon's avatar
Leo Gordon committed
187 188 189

=head1 USAGE EXAMPLES

Leo Gordon's avatar
Leo Gordon committed
190
        # Run one local worker process in ehive_dbname and let the system pick up the analysis
191
    runWorker.pl --host=hostname --port=3306 --user=username --password=secret ehive_dbname
192

Leo Gordon's avatar
Leo Gordon committed
193 194
        # Run one local worker process in ehive_dbname and let the system pick up the analysis (another connection syntax)
    runWorker.pl -url mysql://username:secret@hostname:port/ehive_dbname
Leo Gordon's avatar
Leo Gordon committed
195

Leo Gordon's avatar
Leo Gordon committed
196 197
        # Run one local worker process in ehive_dbname and specify the logic_name
    runWorker.pl -url mysql://username:secret@hostname:port/ehive_dbname -logic_name fast_blast
Leo Gordon's avatar
Leo Gordon committed
198

199 200 201
        # Run a specific job (by a local worker process):
    runWorker.pl -url mysql://username:secret@hostname:port/ehive_dbname -job_id 123456

Leo Gordon's avatar
Leo Gordon committed
202 203
        # Create a job outside the eHive to test the specified input_id
    runWorker.pl -url mysql://username:secret@hostname:port/ehive_dbname -logic_name fast_blast -input_id '{ "foo" => 1500 }'
204 205 206

=head1 OPTIONS

207
=head2 Connection parameters:
Leo Gordon's avatar
Leo Gordon committed
208

209
    -conf <path>            : config file describing db connection
210 211
    -reg_conf <path>        : path to a Registry configuration file
    -reg_alias <string>     : species/alias name for the Hive DBAdaptor
212 213 214 215 216 217
    -url <url string>       : url defining where database is located
    -host <machine>         : mysql database host <machine>
    -port <port#>           : mysql port number
    -user <name>            : mysql connection user <name>
    -password <pass>        : mysql connection password
    [-database] <name>      : mysql database <name>
Leo Gordon's avatar
Leo Gordon committed
218

219 220 221 222 223 224 225 226 227
=head2 Task specificaton parameters:

    -rc_id <id>                 : resource class id
    -logic_name <string>        : pre-specify this worker in a particular analysis defined by name
    -analysis_id <id>           : pre-specify this worker in a particular analysis defined by database id
    -input_id <string>          : test this input_id on specified analysis (defined either by analysis_id or logic_name)
    -job_id <id>                : run a specific job defined by its database id

=head2 Worker control parameters:
Leo Gordon's avatar
Leo Gordon committed
228

229
    -job_limit <num>            : #jobs to run before worker can die naturally
230
    -life_span <num>            : number of minutes this worker is allowed to run
Leo Gordon's avatar
Leo Gordon committed
231
    -no_cleanup                 : don't perform temp directory cleanup when worker exits
232
    -no_write                   : don't write_output or auto_dataflow input_job
233 234 235
    -hive_output_dir <path>     : directory where stdout/stderr of the whole hive of workers is redirected
    -worker_output_dir <path>   : directory where stdout/stderr of this particular worker is redirected
    -retry_throwing_jobs <0|1>  : if a job dies *knowingly*, should we retry it by default?
236

Leo Gordon's avatar
Leo Gordon committed
237 238
=head2 Other options:

239 240 241
    -help                       : print this help
    -debug <level>              : turn on debug messages at <level>
    -analysis_stats             : show status of each analysis in hive
242

Leo Gordon's avatar
Leo Gordon committed
243 244
=head1 CONTACT

Leo Gordon's avatar
Leo Gordon committed
245
    Please contact ehive-users@ebi.ac.uk mailing list with questions/suggestions.
Leo Gordon's avatar
Leo Gordon committed
246

247 248
=cut