Commit c5c37794 authored by Leo Gordon's avatar Leo Gordon
Browse files

Worker reports the reason why it decides to die + good working example

parent 09649a2e
......@@ -40,6 +40,8 @@ use base ('Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf'); # All Hive datab
In addition to the standard things it defines three options:
o('job_count') controls the total number of FailureTest jobs
o('failure_rate') controls the rate of jobs that are programmed to fail
o('state') controls the state in which the jobs will be failing
o('lethal_after') when job_number is above this (nonzero) threshold, job's death becomes lethal to the Worker
There is a rule dependent on one option that does not have a default (this makes it mandatory):
o('password') your read-write password for creation and maintenance of the hive database
......@@ -64,6 +66,7 @@ sub default_options {
'job_count' => 20, # controls the total number of FailureTest jobs
'failure_rate' => 3, # controls the rate of jobs that are programmed to fail
'state' => 'RUN', # controls in which state the jobs are programmed to fail
'lethal_after' => 0,
};
}
......@@ -91,7 +94,8 @@ sub pipeline_analyses {
'job_count' => $self->o('job_count'), # turn this option into a passable parameter
'failure_rate' => $self->o('failure_rate'), # turn the other option into a passable parameter as well
'state' => $self->o('state'), # turn the third option into a passable parameter too
'input_id' => { 'value' => '#_range_start#', 'divisor' => '#failure_rate#', 'state' => '#state#' },
'lethal_after' => $self->o('lethal_after'),
'input_id' => { 'value' => '#_range_start#', 'divisor' => '#failure_rate#', 'state' => '#state#', 'lethal_after' => '#lethal_after#' },
},
],
-flow_into => {
......
......@@ -31,6 +31,8 @@ Available parameters:
param('state'): defines the state in which the jobs of this analysis may be failing.
param('lethal_after'): makes jobs' failures lethal when 'value' is greater than this parameter
param('time_GET_INPUT'): time in seconds that the job will spend sleeping in GET_INPUT state.
param('time_RUN'): time in seconds that the job will spend sleeping in RUN state.
......@@ -68,6 +70,7 @@ sub fetch_input {
'value' => 1, # normally you generate a batch of jobs with different values of param('value')
'divisor' => 2, # but the same param('divisor') and see how every param('divisor')'s job will crash
'state' => 'RUN', # the state in which the process may commit apoptosis
'lethal_after' => 0, # If value is above this (nonzero) threshold, job's death becomes lethal to the worker.
'time_GET_INPUT' => 0, # how much time fetch_input() will spend in sleeping state
'time_RUN' => 1, # how much time run() will spend in sleeping state
......@@ -123,8 +126,11 @@ sub dangerous_math {
my $divisor = $self->param('divisor') or die "param('divisor') has to be a nonzero integer";
if($value % $divisor == 0) {
if($value>10) { # take the Worker with us into the grave
$self->input_job->lethal_for_worker(1);
if(my $lethal_after = $self->param('lethal_after')) {
if($value>$lethal_after) { # take the Worker with us into the grave
$self->input_job->lethal_for_worker(1);
}
}
die "Preprogrammed death since $value is a multiple of $divisor";
......
......@@ -558,7 +558,10 @@ sub run
if( ($job->status eq 'COMPILATION') # if it failed to compile, there is no point in continuing as the code WILL be broken
or $self->prev_job_error # a bit of AI: if the previous job failed as well, it is LIKELY that we have contamination
or $job->lethal_for_worker ) { # trust the job's expert knowledge
warn "Job's error has contaminated the Worker, so the Worker will now die\n";
my $reason = ($job->status eq 'COMPILATION') ? 'compilation error'
: $self->prev_job_error ? 'two failed jobs in a row'
: 'suggested by job itself';
warn "Job's error has contaminated the Worker ($reason), so the Worker will now die\n";
$self->cause_of_death('CONTAMINATED');
last BATCHES;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment