Commit eae5d773 authored by Leo Gordon's avatar Leo Gordon
Browse files

detecting and reporting lost control

parent c7a7b2eb
......@@ -524,6 +524,7 @@ sub grab_jobs_for_worker {
=head2 release_undone_jobs_from_worker
Arg [1] : Bio::EnsEMBL::Hive::Worker object
Arg [2] : optional message to be recorded in 'job_message' table
Example :
Description: If a worker has died some of its jobs need to be reset back to 'READY'
so they can be rerun.
......@@ -538,7 +539,7 @@ sub grab_jobs_for_worker {
=cut
sub release_undone_jobs_from_worker {
my ($self, $worker) = @_;
my ($self, $worker, $msg) = @_;
my $max_retry_count = $worker->analysis->stats->max_retry_count();
my $worker_id = $worker->worker_id();
......@@ -560,7 +561,8 @@ sub release_undone_jobs_from_worker {
$sth->execute();
my $cod = $worker->cause_of_death();
my $msg = "GarbageCollector: The worker died because of $cod";
$msg ||= "GarbageCollector: The worker died because of $cod";
while(my ($job_id, $retry_count) = $sth->fetchrow_array()) {
my $resource_overusage = ($cod eq 'MEMLIMIT') || ($cod eq 'RUNLIMIT' and $worker->work_done()==0);
......
......@@ -521,8 +521,11 @@ sub run {
while (!$self->cause_of_death and $batches_stopwatch->get_elapsed < $MIN_BATCH_TIME) {
if(my $incompleted_count = @{ $job_adaptor->fetch_all_incomplete_jobs_by_worker_id( $self->worker_id ) }) {
die "This worker is too greedy: not having completed $incompleted_count jobs it is trying to grab yet more jobs! Has it gone multithreaded?\n";
if( scalar(@{ $job_adaptor->fetch_all_incomplete_jobs_by_worker_id( $self->worker_id ) }) ) {
my $msg = "Lost control. Check your Runnable for loose 'next' statements that are not part of a loop";
warn "$msg";
$self->cause_of_death('CONTAMINATED');
$job_adaptor->release_undone_jobs_from_worker($self, $msg);
} else {
$jobs_done_by_batches_loop += $self->run_one_batch( $job_adaptor->grab_jobs_for_worker( $self ) );
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment