Commit 2db96ce2 authored by Leo Gordon's avatar Leo Gordon
Browse files

moved submit_workers_max/pending_adjust/total_workers_max/meadow_options into...

moved submit_workers_max/pending_adjust/total_workers_max/meadow_options into Config, but they are still configurable from BK's commandline via config_set(); lots of code cleanup on the way
parent 850d698d
{
"Graph" : {
"Meadow" : { # configure Meadow-specific options here:
"LSF" : {
"SubmissionOptions" : "", # general options that different Meadows can plug into the submission command
"PendingAdjust" : 1, # if set and ->can('count_pending_workers_by_rc_id'), adjusts submitted workers by those in pending state
"TotalRunningWorkersMax" : 1000, # how many workers can be run on this Meadow in total
"farm2" : {
"SubmitWorkersMax" : 50, # if set, provides a cut-off on the number of workers being submitted each iteration
},
"pcs4" : {
"SubmitWorkersMax" : 30,
},
"EBI" : {
"SubmitWorkersMax" : 60,
},
},
"LOCAL" : {
"TotalRunningWorkersMax" : 2,
"my-quadcore-desktop" : {
"TotalRunningWorkersMax" : 4,
},
"your-weak-laptop" : {
"TotalRunningWorkersMax" : 1,
},
},
},
"Graph" : { # configure diagram-generation options here:
"Colours" : {
"Status" : {
"BLOCKED" : "grey",
......@@ -32,4 +57,3 @@
"DisplaySemaphoreBoxes" : 1,
}
}
......@@ -9,9 +9,13 @@ use warnings;
sub new {
my $class = shift @_;
my ($class, $config) = @_;
my $self = bless {}, $class;
$self->config( $config );
return bless { @_ }, $class;
return $self;
}
......@@ -31,30 +35,47 @@ sub toString {
}
sub pipeline_name { # if set, provides a filter for job-related queries
sub config {
my $self = shift @_;
if(scalar(@_)) { # new value is being set (which can be undef)
$self->{'_pipeline_name'} = shift @_;
if(@_) {
$self->{'_config'} = shift @_;
}
return $self->{'_pipeline_name'};
return $self->{'_config'};
}
sub config_get {
my $self = shift @_;
return $self->config->get('Meadow', $self->type, $self->name, @_);
}
sub meadow_options { # general options that different Meadows can plug into the submission command
sub config_set {
my $self = shift @_;
if(scalar(@_)) {
$self->{'_meadow_options'} = shift @_;
return $self->config->set('Meadow', $self->type, $self->name, @_);
}
sub pipeline_name { # if set, provides a filter for job-related queries
my $self = shift @_;
if(@_) { # new value is being set (which can be undef)
$self->{'_pipeline_name'} = shift @_;
}
return $self->{'_meadow_options'} || '';
return $self->{'_pipeline_name'};
}
sub job_name_prefix {
my $self = shift @_;
return ($self->pipeline_name() ? $self->pipeline_name().'-' : '') . 'Hive';
}
sub generate_job_name {
my ($self, $worker_count, $iteration, $rc_id) = @_;
$rc_id ||= 0;
......@@ -64,58 +85,25 @@ sub generate_job_name {
. (($worker_count > 1) ? "[1-${worker_count}]" : '');
}
sub responsible_for_worker {
my ($self, $worker) = @_;
return ($worker->meadow_type eq $self->type) && ($worker->meadow_name eq $self->name);
}
sub check_worker_is_alive_and_mine {
my ($self, $worker) = @_;
die "Please use a derived method";
}
sub kill_worker {
my ($self, $worker) = @_;
die "Please use a derived method";
}
# --------------[(combinable) means of adjusting the number of submitted workers]----------------------
sub total_running_workers_default_max { # no default by default :)
return undef;
}
sub total_running_workers_max { # if set and ->can('count_running_workers'),
# provides a cut-off on the number of workers being submitted
my $self = shift @_;
if(scalar(@_)) { # new value is being set (which can be undef)
$self->{'_total_running_workers_max'} = shift @_;
}
return $self->{'_total_running_workers_max'} || $self->total_running_workers_default_max();
}
sub pending_adjust { # if set and ->can('count_pending_workers_by_rc_id'),
# provides a cut-off on the number of workers being submitted
my $self = shift @_;
if(scalar(@_)) { # new value is being set (which can be undef)
$self->{'_pending_adjust'} = shift @_;
}
return $self->{'_pending_adjust'};
}
sub submit_workers_max { # if set, provides a cut-off on the number of workers being submitted
my $self = shift @_;
if(scalar(@_)) { # new value is being set (which can be undef)
$self->{'_submit_workers_max'} = shift @_;
}
return $self->{'_submit_workers_max'};
}
1;
......@@ -14,12 +14,6 @@ sub name { # also called to check for availability; for the moment assume LOCAL
}
sub total_running_workers_default_max {
return 2;
}
sub get_current_worker_process_id {
my ($self) = @_;
......
......@@ -123,11 +123,11 @@ sub submit_workers {
my ($self, $iteration, $worker_cmd, $worker_count, $rc_id, $rc_parameters) = @_;
my $job_name = $self->generate_job_name($worker_count, $iteration, $rc_id);
my $meadow_options = $self->meadow_options();
my $submission_options = $self->config_get('SubmissionOptions');
$ENV{'LSB_STDOUT_DIRECT'} = 'y'; # unbuffer the output of the bsub command
my $cmd = qq{bsub -o /dev/null -J "${job_name}" $rc_parameters $meadow_options $worker_cmd -rc_id $rc_id};
my $cmd = qq{bsub -o /dev/null -J "${job_name}" $rc_parameters $submission_options $worker_cmd -rc_id $rc_id};
print "SUBMITTING_CMD:\t\t$cmd\n";
system($cmd) && die "Could not submit job(s): $!, $?"; # let's abort the beekeeper and let the user check the syntax
......
......@@ -763,9 +763,9 @@ sub schedule_workers_resync_if_necessary {
my $meadow = $valley->get_current_meadow();
my $pending_by_rc_id = ($meadow->can('count_pending_workers_by_rc_id') and $meadow->pending_adjust()) ? $meadow->count_pending_workers_by_rc_id() : {};
my $submit_limit = $meadow->submit_workers_max();
my $meadow_limit = ($meadow->can('count_running_workers') and defined($meadow->total_running_workers_max)) ? $meadow->total_running_workers_max - $meadow->count_running_workers : undef;
my $pending_by_rc_id = ($meadow->can('count_pending_workers_by_rc_id') and $meadow->config_get('PendingAdjust')) ? $meadow->count_pending_workers_by_rc_id() : {};
my $submit_limit = $meadow->config_get('SubmitWorkersMax');
my $meadow_limit = ($meadow->can('count_running_workers') and defined($meadow->config_get('TotalRunningWorkersMax'))) ? $meadow->config_get('TotalRunningWorkersMax') - $meadow->count_running_workers : undef;
my $available_submit_limit = ($submit_limit and $meadow_limit)
? (($submit_limit<$meadow_limit) ? $submit_limit : $meadow_limit)
......
......@@ -34,7 +34,7 @@ sub meadow_class_path {
sub new {
my ($class, $current_meadow_type, $pipeline_name) = @_;
my ($class, $config, $current_meadow_type, $pipeline_name) = @_;
my $self = bless {}, $class;
......@@ -44,7 +44,7 @@ sub new {
foreach my $meadow_class (@{ find_submodules( $self->meadow_class_path ) }) {
eval "require $meadow_class";
if($meadow_class->name) {
my $meadow_object = $meadow_class->new();
my $meadow_object = $meadow_class->new( $config );
$meadow_object->pipeline_name( $pipeline_name ) if($pipeline_name);
......
......@@ -2,14 +2,14 @@
use strict;
use warnings;
use Data::Dumper;
use Getopt::Long;
use Bio::EnsEMBL::Hive::Utils ('script_usage', 'destringify');
use Bio::EnsEMBL::Hive::Utils::Config;
use Bio::EnsEMBL::Hive::URLFactory;
use Bio::EnsEMBL::Hive::DBSQL::DBAdaptor;
use Bio::EnsEMBL::Hive::Worker;
use Bio::EnsEMBL::Hive::Queen;
use Bio::EnsEMBL::Hive::URLFactory;
use Bio::EnsEMBL::Hive::DBSQL::AnalysisCtrlRuleAdaptor;
use Bio::EnsEMBL::Hive::Valley;
main();
......@@ -37,11 +37,11 @@ sub main {
my $sync = 0;
my $local = 0;
my $show_failed_jobs = 0;
my $no_pend_adjust = 0;
my $submit_workers_max = 50;
my $total_workers_max = undef;
my $meadow_type = undef;
my $meadow_options = '';
my $pending_adjust = undef;
my $submit_workers_max = undef;
my $total_running_workers_max = undef;
my $submission_options = undef;
my $run = 0;
my $max_loops = 0; # not running by default
my $run_job_id = undef;
......@@ -84,11 +84,11 @@ sub main {
# meadow control
'local!' => \$local,
'total_workers_max|local_cpus=i' => \$total_workers_max,
'submit_workers_max|wlimit=i' => \$submit_workers_max,
'no_pend' => \$no_pend_adjust,
'meadow_type=s' => \$meadow_type,
'meadow_options=s' => \$meadow_options,
'total_running_workers_max=i' => \$total_running_workers_max,
'submit_workers_max=i' => \$submit_workers_max,
'pending_adjust=i' => \$pending_adjust,
'submission_options=s' => \$submission_options,
# worker control
'job_limit|jlimit=i' => \$self->{'job_limit'},
......@@ -120,6 +120,8 @@ sub main {
if ($help) { script_usage(0); }
my $config = Bio::EnsEMBL::Hive::Utils::Config->new(); # will probably add a config_file option later
if($run or $run_job_id) {
$max_loops = 1;
} elsif ($loopit or $keep_alive) {
......@@ -168,20 +170,20 @@ sub main {
print STDERR "+---------------------------------------------------------------------+\n";
}
if($run_job_id) {
$submit_workers_max = 1;
}
$meadow_type = 'LOCAL' if($local);
my $valley = Bio::EnsEMBL::Hive::Valley->new( $meadow_type, $pipeline_name );
my $valley = Bio::EnsEMBL::Hive::Valley->new( $config, $meadow_type, $pipeline_name );
my $current_meadow = $valley->get_current_meadow();
warn "Current meadow: ".$current_meadow->toString."\n";
$current_meadow->meadow_options($meadow_options);
$current_meadow->total_running_workers_max($total_workers_max) if($total_workers_max);
$current_meadow->pending_adjust(not $no_pend_adjust);
if($run_job_id) {
$submit_workers_max = 1;
}
$current_meadow->submit_workers_max($submit_workers_max);
$current_meadow->config_set('TotalRunningWorkersMax', $total_running_workers_max) if(defined $total_running_workers_max);
$current_meadow->config_set('PendingAdjust', $pending_adjust) if(defined $pending_adjust);
$current_meadow->config_set('SubmitWorkersMax', $submit_workers_max) if(defined $submit_workers_max);
$current_meadow->config_set('SubmissionOptions', $submission_options) if(defined $submission_options);
if($reset_job_id) { $queen->reset_job_by_dbID_and_sync($reset_job_id); }
......@@ -433,8 +435,8 @@ __DATA__
# Do not run any additional Workers, just check for the current status of the pipeline:
beekeeper.pl -url mysql://username:secret@hostname:port/ehive_dbname
# Run the pipeline in automatic mode (-loop), run all the workers locally (-meadow_type LOCAL) and allow for 3 parallel workers (-total_workers_max 3)
beekeeper.pl -url mysql://username:secret@hostname:port/long_mult_test -meadow_type LOCAL -total_workers_max 3 -loop
# Run the pipeline in automatic mode (-loop), run all the workers locally (-meadow_type LOCAL) and allow for 3 parallel workers (-total_running_workers_max 3)
beekeeper.pl -url mysql://username:secret@hostname:port/long_mult_test -meadow_type LOCAL -total_running_workers_max 3 -loop
# Run in automatic mode, but only restrict to running the 'fast_blast' analysis
beekeeper.pl -url mysql://username:secret@hostname:port/long_mult_test -logic_name fast_blast -loop
......@@ -452,7 +454,6 @@ __DATA__
=head2 Connection parameters
-conf <path> : config file describing db connection
-reg_conf <path> : path to a Registry configuration file
-reg_alias <string> : species/alias name for the Hive DBAdaptor
-url <url string> : url defining where hive database is located
......@@ -471,13 +472,13 @@ __DATA__
-run : run 1 iteration of automation loop
-sleep <num> : when looping, sleep <num> minutes (default 2min)
=head2 Meadow control
=head2 Current Meadow control
-total_workers_max <num> : max # workers to be running in parallel
-submit_workers_max <num> : max # workers to create per loop
-no_pend : don't adjust needed workers by pending workers
-meadow_type <string> : the desired Meadow class name, such as 'LSF' or 'LOCAL'
-meadow_options <string> : passes <string> to the Meadow submission command as <options> (formerly lsf_options)
-total_running_workers_max <num> : max # workers to be running in parallel
-submit_workers_max <num> : max # workers to create per loop iteration
-pending_adjust <0|1> : [do not] adjust needed workers by pending workers
-submission_options <string> : passes <string> to the Meadow submission command as <options> (formerly lsf_options)
=head2 Worker control
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment