Commit 24019053 authored by Leo Gordon's avatar Leo Gordon
Browse files

Added API and schema support for analysis_base.meadow_type /...

Added API and schema support for analysis_base.meadow_type / Analysis->meadow_type(), which will be NULL/undef by default
parent f1603685
......@@ -35,8 +35,8 @@ sub new {
my $self = $class->SUPER::new( @_ ); # deal with Storable stuff
my ($logic_name, $module, $parameters, $resource_class_id, $failed_job_tolerance, $max_retry_count, $can_be_empty, $priority) =
rearrange([qw(logic_name module parameters resource_class_id failed_job_tolerance max_retry_count can_be_empty priority) ], @_);
my ($logic_name, $module, $parameters, $resource_class_id, $failed_job_tolerance, $max_retry_count, $can_be_empty, $priority, $meadow_type) =
rearrange([qw(logic_name module parameters resource_class_id failed_job_tolerance max_retry_count can_be_empty priority meadow_type) ], @_);
$self->logic_name($logic_name) if($logic_name);
$self->module($module) if($module);
......@@ -46,6 +46,7 @@ sub new {
$self->max_retry_count($max_retry_count) if($max_retry_count);
$self->can_be_empty($can_be_empty) if($can_be_empty);
$self->priority($priority) if($priority);
$self->meadow_type($meadow_type) if($meadow_type);
return $self;
}
......@@ -111,6 +112,13 @@ sub priority {
}
sub meadow_type {
my $self = shift;
$self->{'_meadow_type'} = shift if(@_);
return $self->{'_meadow_type'};
}
=head2 process
Arg [1] : none
......
......@@ -380,8 +380,8 @@ sub run {
my %seen_logic_name = ();
foreach my $aha (@{$self->pipeline_analyses}) {
my ($logic_name, $module, $parameters_hash, $input_ids, $blocked, $batch_size, $hive_capacity, $failed_job_tolerance, $max_retry_count, $can_be_empty, $rc_id, $rc_name, $priority) =
rearrange([qw(logic_name module parameters input_ids blocked batch_size hive_capacity failed_job_tolerance max_retry_count can_be_empty rc_id rc_name priority)], %$aha);
my ($logic_name, $module, $parameters_hash, $input_ids, $blocked, $batch_size, $hive_capacity, $failed_job_tolerance, $max_retry_count, $can_be_empty, $rc_id, $rc_name, $priority, $meadow_type) =
rearrange([qw(logic_name module parameters input_ids blocked batch_size hive_capacity failed_job_tolerance max_retry_count can_be_empty rc_id rc_name priority meadow_type)], %$aha);
unless($logic_name) {
die "logic_name' must be defined in every analysis";
......@@ -424,6 +424,7 @@ sub run {
-max_retry_count => $max_retry_count,
-can_be_empty => $can_be_empty,
-priority => $priority,
-meadow_type => $meadow_type,
);
$analysis_adaptor->store($analysis);
......
......@@ -142,6 +142,7 @@ sub pipeline_analyses {
return [
{ -logic_name => 'start',
-module => 'Bio::EnsEMBL::Hive::RunnableDB::LongMult::Start',
-meadow_type=> 'LOCAL', # do not bother the farm with such a simple task (and get it done faster)
-parameters => {},
-input_ids => [
{ 'a_multiplier' => $self->o('first_mult'), 'b_multiplier' => $self->o('second_mult') },
......
......@@ -759,7 +759,9 @@ sub schedule_workers {
foreach my $analysis_stats (@suitable_analyses) {
last if ($available_load <= 0.0);
my $this_meadow_type = $default_meadow_type; # this should be coming from each specific analysis (and only default if undef)
my $analysis = $analysis_stats->get_analysis(); # FIXME: if it proves too expensive we may need to consider caching
my $this_meadow_type = $analysis->meadow_type || $default_meadow_type;
if( defined(my $meadow_limit = $available_worker_slots_by_meadow_type->{ $this_meadow_type }) ) {
$available_submit_limit = defined($available_submit_limit)
......@@ -812,7 +814,7 @@ sub schedule_workers {
$total_workers_to_submit_by_meadow_type{ $this_meadow_type } += $workers_this_analysis;
$total_workers_to_submit += $workers_this_analysis;
$analysis_stats->print_stats();
printf("Scheduler suggests adding $workers_this_analysis more $this_meadow_type:$curr_rc_name workers for analysis_id=%d [%.3f hive_load remaining]\n", $analysis_stats->analysis_id, $available_load);
printf("Scheduler suggests adding $workers_this_analysis x $this_meadow_type:$curr_rc_name workers for '%s' [%.3f hive_load remaining]\n", $analysis->logic_name, $available_load);
}
print ''.('-'x60)."\n";
......
......@@ -60,9 +60,9 @@ sub main {
min(swap), avg(swap), max(swap)
FROM analysis_base
JOIN resource_class rc USING(resource_class_id)
LEFT JOIN worker USING(analysis_id)
LEFT JOIN worker w USING(analysis_id)
LEFT JOIN lsf_report USING (process_id)
WHERE meadow_type='LSF'
WHERE w.meadow_type='LSF'
GROUP BY analysis_id
ORDER BY analysis_id;
});
......
# a new column for the multi-meadow scheduler:
ALTER TABLE analysis_base ADD COLUMN meadow_type varchar(40) DEFAULT NULL;
......@@ -50,6 +50,7 @@ CREATE TABLE analysis_base (
max_retry_count int(10) DEFAULT 3 NOT NULL,
can_be_empty TINYINT UNSIGNED DEFAULT 0 NOT NULL,
priority TINYINT DEFAULT 0 NOT NULL,
meadow_type varchar(40) DEFAULT NULL,
PRIMARY KEY (analysis_id),
UNIQUE KEY logic_name_idx (logic_name)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment