Commit 658c9078 authored by Leo Gordon's avatar Leo Gordon
Browse files

Pulling in the bugfix for jobs dying when parsing parameters

parents 6dc0d105 6eb4e142
...@@ -169,7 +169,7 @@ sub fetch_some_by_analysis_id_limit { ...@@ -169,7 +169,7 @@ sub fetch_some_by_analysis_id_limit {
sub fetch_all_incomplete_jobs_by_worker_id { sub fetch_all_incomplete_jobs_by_worker_id {
my ($self, $worker_id) = @_; my ($self, $worker_id) = @_;
my $constraint = "status IN ('COMPILATION','PRE_CLEANUP','FETCH_INPUT','RUN','WRITE_OUTPUT','POST_CLEANUP') AND worker_id='$worker_id'"; my $constraint = "status IN ('CLAIMED','PRE_CLEANUP','FETCH_INPUT','RUN','WRITE_OUTPUT','POST_CLEANUP') AND worker_id='$worker_id'";
return $self->fetch_all($constraint); return $self->fetch_all($constraint);
} }
...@@ -292,7 +292,7 @@ sub store_out_files { ...@@ -292,7 +292,7 @@ sub store_out_files {
Arg [1] : int $job_id Arg [1] : int $job_id
Arg [2] : int $worker_id (optional) Arg [2] : int $worker_id (optional)
Description: resets a job to to 'READY' (if no $worker_id given) or directly to 'CLAIMED' so it can be run again, and fetches it.. Description: resets a job to to 'READY' (if no $worker_id given) or directly to 'CLAIMED' so it can be run again, and fetches it.
NB: Will also reset a previously 'SEMAPHORED' job to READY. NB: Will also reset a previously 'SEMAPHORED' job to READY.
The retry_count will be set to 1 for previously run jobs (partially or wholly) to trigger PRE_CLEANUP for them, The retry_count will be set to 1 for previously run jobs (partially or wholly) to trigger PRE_CLEANUP for them,
but will not change retry_count if a job has never *really* started. but will not change retry_count if a job has never *really* started.
...@@ -310,7 +310,7 @@ sub reset_or_grab_job_by_dbID { ...@@ -310,7 +310,7 @@ sub reset_or_grab_job_by_dbID {
# Note: the order of the fields being updated is critical! # Note: the order of the fields being updated is critical!
my $sql = qq{ my $sql = qq{
UPDATE job UPDATE job
SET retry_count = CASE WHEN (status='COMPILATION' OR status='READY' OR status='CLAIMED') THEN retry_count ELSE 1 END SET retry_count = CASE WHEN (status='READY' OR status='CLAIMED') THEN retry_count ELSE 1 END
, status=? , status=?
, worker_id=? , worker_id=?
WHERE job_id=? WHERE job_id=?
...@@ -398,7 +398,7 @@ sub grab_jobs_for_worker { ...@@ -398,7 +398,7 @@ sub grab_jobs_for_worker {
Description: If a worker has died some of its jobs need to be reset back to 'READY' Description: If a worker has died some of its jobs need to be reset back to 'READY'
so they can be rerun. so they can be rerun.
Jobs in state CLAIMED as simply reset back to READY. Jobs in state CLAIMED as simply reset back to READY.
If jobs was 'in progress' (COMPILATION, PRE_CLEANUP, FETCH_INPUT, RUN, WRITE_OUTPUT, POST_CLEANUP) If jobs was 'in progress' (PRE_CLEANUP, FETCH_INPUT, RUN, WRITE_OUTPUT, POST_CLEANUP)
the retry_count is increased and the status set back to READY. the retry_count is increased and the status set back to READY.
If the retry_count >= $max_retry_count (3 by default) the job is set If the retry_count >= $max_retry_count (3 by default) the job is set
to 'FAILED' and not rerun again. to 'FAILED' and not rerun again.
...@@ -427,7 +427,7 @@ sub release_undone_jobs_from_worker { ...@@ -427,7 +427,7 @@ sub release_undone_jobs_from_worker {
SELECT job_id SELECT job_id
FROM job FROM job
WHERE worker_id='$worker_id' WHERE worker_id='$worker_id'
AND status in ('COMPILATION','PRE_CLEANUP','FETCH_INPUT','RUN','WRITE_OUTPUT','POST_CLEANUP') AND status in ('PRE_CLEANUP','FETCH_INPUT','RUN','WRITE_OUTPUT','POST_CLEANUP')
} ); } );
$sth->execute(); $sth->execute();
...@@ -478,7 +478,7 @@ sub release_and_age_job { ...@@ -478,7 +478,7 @@ sub release_and_age_job {
retry_count=retry_count+1, retry_count=retry_count+1,
runtime_msec=$runtime_msec runtime_msec=$runtime_msec
WHERE job_id=$job_id WHERE job_id=$job_id
AND status in ('COMPILATION','PRE_CLEANUP','FETCH_INPUT','RUN','WRITE_OUTPUT','POST_CLEANUP') AND status in ('CLAIMED','PRE_CLEANUP','FETCH_INPUT','RUN','WRITE_OUTPUT','POST_CLEANUP')
} ); } );
} }
...@@ -537,7 +537,7 @@ sub reset_jobs_for_analysis_id { ...@@ -537,7 +537,7 @@ sub reset_jobs_for_analysis_id {
my $sql = qq{ my $sql = qq{
UPDATE job UPDATE job
SET retry_count = CASE WHEN (status='COMPILATION' OR status='READY' OR status='CLAIMED') THEN 0 ELSE 1 END, SET retry_count = CASE WHEN (status='READY' OR status='CLAIMED') THEN 0 ELSE 1 END,
}. ( ($self->dbc->driver eq 'pgsql') }. ( ($self->dbc->driver eq 'pgsql')
? "status = CAST(CASE WHEN semaphore_count>0 THEN 'SEMAPHORED' ELSE 'READY' END AS jw_status) " ? "status = CAST(CASE WHEN semaphore_count>0 THEN 'SEMAPHORED' ELSE 'READY' END AS jw_status) "
: "status = CASE WHEN semaphore_count>0 THEN 'SEMAPHORED' ELSE 'READY' END " : "status = CASE WHEN semaphore_count>0 THEN 'SEMAPHORED' ELSE 'READY' END "
......
...@@ -110,6 +110,9 @@ sub param_init { ...@@ -110,6 +110,9 @@ sub param_init {
if($strict_hash_format or $source=~/^\{.*\}$/) { if($strict_hash_format or $source=~/^\{.*\}$/) {
my $param_hash = eval($source) || {}; my $param_hash = eval($source) || {};
if($@ or (ref($param_hash) ne 'HASH')) { if($@ or (ref($param_hash) ne 'HASH')) {
if($self->can('transient_error')) {
$self->transient_error(0);
}
die "Expected a {'param'=>'value'} hashref, but got the following string instead: '$source'\n"; die "Expected a {'param'=>'value'} hashref, but got the following string instead: '$source'\n";
} }
$source = $param_hash; $source = $param_hash;
......
...@@ -477,8 +477,9 @@ sub run { ...@@ -477,8 +477,9 @@ sub run {
$self->{'_interval_partial_timing'} = {}; $self->{'_interval_partial_timing'} = {};
if( my $special_batch = $self->special_batch() ) { if( my $special_batch = $self->special_batch() ) {
my $special_batch_length = scalar(@$special_batch); # has to be recorded because the list is gradually destroyed
$jobs_done_by_batches_loop += $self->run_one_batch( $special_batch ); $jobs_done_by_batches_loop += $self->run_one_batch( $special_batch );
$self->cause_of_death('JOB_LIMIT'); $self->cause_of_death( $jobs_done_by_batches_loop == $special_batch_length ? 'JOB_LIMIT' : 'CONTAMINATED');
} else { # a proper "BATCHES" loop } else { # a proper "BATCHES" loop
while (!$self->cause_of_death and $batches_stopwatch->get_elapsed < $min_batch_time) { while (!$self->cause_of_death and $batches_stopwatch->get_elapsed < $min_batch_time) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment