Commit 88c92697 authored by Leo Gordon's avatar Leo Gordon
Browse files

turning the max_limbo_seconds into a JSON config parameter that can be as...

turning the max_limbo_seconds into a JSON config parameter that can be as Meadow-specific as you wish
parent aa336053
......@@ -8,6 +8,7 @@
},
"Meadow" : {
"CleanupTempDirectoryKilledWorkers" : 0,
"MaxLimboSeconds" : 10,
"LOCAL" : {
"TotalRunningWorkersMax" : 2,
"my-quadcore-desktop" : {
......
......@@ -108,20 +108,6 @@ sub object_class {
}
############################
#
# Queen attributes
#
############################
sub max_limbo_seconds {
my $self = shift;
$self->{'_max_limbo_seconds'} = shift if(@_);
return $self->{'_max_limbo_seconds'};
}
############################
#
# PUBLIC API
......@@ -515,9 +501,12 @@ sub check_for_dead_workers { # scans the whole Valley for lost Workers (but i
$self->update( $worker, @updated_attribs ) if(scalar(@updated_attribs));
}
my $max_limbo_seconds = $this_meadow->config_get('MaxLimboSeconds') // 0; # The maximum time for a Meadow to start showing the Worker (even in PEND state) after submission.
# We use it as a timeout for burying SUBMITTED and Meadow-invisible entries in the 'worker' table.
if( ($worker->status eq 'LOST')
|| $worker->when_died # reported by Meadow as DEAD (only if Meadow supports get_report_entries_for_process_ids)
|| ($worker->seconds_since_when_submitted > $self->max_limbo_seconds) ) { # SUBMITTED and waited in limbo (not yet registered) for too long => we consider them LOST
|| ($worker->seconds_since_when_submitted > $max_limbo_seconds) ) { # SUBMITTED and Meadow-invisible for too long => we consider them LOST
$worker->cause_of_death('LIMBO') if( ($worker->status eq 'SUBMITTED') and !$worker->cause_of_death); # LIMBO cause_of_death means: found in SUBMITTED state, exceeded the timeout, Meadow did not tell us more
......
......@@ -86,7 +86,6 @@ sub main {
$self->{'submit_log_dir'} = undef;
$self->{'worker_delay_startup_seconds'} = undef;
$self->{'worker_crash_on_startup_prob'} = undef;
$self->{'max_limbo_seconds'} = 30;
# store all the options passed on the command line for registration
# we re-create this a bit later, so that we can protect any passwords
......@@ -134,9 +133,6 @@ sub main {
'worker_delay_startup_seconds=i' => \$self->{'worker_delay_startup_seconds'},
'worker_crash_on_startup_prob=f' => \$self->{'worker_crash_on_startup_prob'},
# queen attributes:
'max_limbo_seconds=i' => \$self->{'max_limbo_seconds'},
# other commands/options
'h|help!' => \$help,
'v|version|versions!' => \$report_versions,
......@@ -268,10 +264,6 @@ sub main {
my $queen = $self->{'dba'}->get_Queen;
foreach my $queen_attrib (qw(max_limbo_seconds)) {
$queen->$queen_attrib( $self->{$queen_attrib} ) if defined($self->{$queen_attrib});
}
if($reset_job_id) { $queen->reset_job_by_dbID_and_sync($reset_job_id); }
if($job_id_for_output) {
......@@ -790,7 +782,6 @@ __DATA__
-hive_log_dir <path> : directory where stdout/stderr of the hive is redirected
-worker_delay_startup_seconds <number> : number of seconds each worker has to wait before first talking to the database (0 by default, useful for debugging)
-worker_crash_on_startup_prob <float> : probability of each worker failing at startup (0 by default, useful for debugging)
-max_limbo_seconds <number> : timeout for dumb Meadows for burying Workers that stay in SUBMITTED state for too long and are therefore considered LOST
-debug <debug_level> : set debug level of the workers
=head2 Other commands/options
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment