Skip to content
Snippets Groups Projects
Commit 658c9078 authored by Leo Gordon's avatar Leo Gordon
Browse files

Pulling in the bugfix for jobs dying when parsing parameters

parents 6dc0d105 6eb4e142
No related branches found
No related tags found
No related merge requests found
......@@ -169,7 +169,7 @@ sub fetch_some_by_analysis_id_limit {
sub fetch_all_incomplete_jobs_by_worker_id {
my ($self, $worker_id) = @_;
my $constraint = "status IN ('COMPILATION','PRE_CLEANUP','FETCH_INPUT','RUN','WRITE_OUTPUT','POST_CLEANUP') AND worker_id='$worker_id'";
my $constraint = "status IN ('CLAIMED','PRE_CLEANUP','FETCH_INPUT','RUN','WRITE_OUTPUT','POST_CLEANUP') AND worker_id='$worker_id'";
return $self->fetch_all($constraint);
}
......@@ -292,7 +292,7 @@ sub store_out_files {
Arg [1] : int $job_id
Arg [2] : int $worker_id (optional)
Description: resets a job to to 'READY' (if no $worker_id given) or directly to 'CLAIMED' so it can be run again, and fetches it..
Description: resets a job to to 'READY' (if no $worker_id given) or directly to 'CLAIMED' so it can be run again, and fetches it.
NB: Will also reset a previously 'SEMAPHORED' job to READY.
The retry_count will be set to 1 for previously run jobs (partially or wholly) to trigger PRE_CLEANUP for them,
but will not change retry_count if a job has never *really* started.
......@@ -310,7 +310,7 @@ sub reset_or_grab_job_by_dbID {
# Note: the order of the fields being updated is critical!
my $sql = qq{
UPDATE job
SET retry_count = CASE WHEN (status='COMPILATION' OR status='READY' OR status='CLAIMED') THEN retry_count ELSE 1 END
SET retry_count = CASE WHEN (status='READY' OR status='CLAIMED') THEN retry_count ELSE 1 END
, status=?
, worker_id=?
WHERE job_id=?
......@@ -398,7 +398,7 @@ sub grab_jobs_for_worker {
Description: If a worker has died some of its jobs need to be reset back to 'READY'
so they can be rerun.
Jobs in state CLAIMED as simply reset back to READY.
If jobs was 'in progress' (COMPILATION, PRE_CLEANUP, FETCH_INPUT, RUN, WRITE_OUTPUT, POST_CLEANUP)
If jobs was 'in progress' (PRE_CLEANUP, FETCH_INPUT, RUN, WRITE_OUTPUT, POST_CLEANUP)
the retry_count is increased and the status set back to READY.
If the retry_count >= $max_retry_count (3 by default) the job is set
to 'FAILED' and not rerun again.
......@@ -427,7 +427,7 @@ sub release_undone_jobs_from_worker {
SELECT job_id
FROM job
WHERE worker_id='$worker_id'
AND status in ('COMPILATION','PRE_CLEANUP','FETCH_INPUT','RUN','WRITE_OUTPUT','POST_CLEANUP')
AND status in ('PRE_CLEANUP','FETCH_INPUT','RUN','WRITE_OUTPUT','POST_CLEANUP')
} );
$sth->execute();
......@@ -478,7 +478,7 @@ sub release_and_age_job {
retry_count=retry_count+1,
runtime_msec=$runtime_msec
WHERE job_id=$job_id
AND status in ('COMPILATION','PRE_CLEANUP','FETCH_INPUT','RUN','WRITE_OUTPUT','POST_CLEANUP')
AND status in ('CLAIMED','PRE_CLEANUP','FETCH_INPUT','RUN','WRITE_OUTPUT','POST_CLEANUP')
} );
}
......@@ -537,7 +537,7 @@ sub reset_jobs_for_analysis_id {
my $sql = qq{
UPDATE job
SET retry_count = CASE WHEN (status='COMPILATION' OR status='READY' OR status='CLAIMED') THEN 0 ELSE 1 END,
SET retry_count = CASE WHEN (status='READY' OR status='CLAIMED') THEN 0 ELSE 1 END,
}. ( ($self->dbc->driver eq 'pgsql')
? "status = CAST(CASE WHEN semaphore_count>0 THEN 'SEMAPHORED' ELSE 'READY' END AS jw_status) "
: "status = CASE WHEN semaphore_count>0 THEN 'SEMAPHORED' ELSE 'READY' END "
......
......@@ -110,6 +110,9 @@ sub param_init {
if($strict_hash_format or $source=~/^\{.*\}$/) {
my $param_hash = eval($source) || {};
if($@ or (ref($param_hash) ne 'HASH')) {
if($self->can('transient_error')) {
$self->transient_error(0);
}
die "Expected a {'param'=>'value'} hashref, but got the following string instead: '$source'\n";
}
$source = $param_hash;
......
......@@ -477,8 +477,9 @@ sub run {
$self->{'_interval_partial_timing'} = {};
if( my $special_batch = $self->special_batch() ) {
my $special_batch_length = scalar(@$special_batch); # has to be recorded because the list is gradually destroyed
$jobs_done_by_batches_loop += $self->run_one_batch( $special_batch );
$self->cause_of_death('JOB_LIMIT');
$self->cause_of_death( $jobs_done_by_batches_loop == $special_batch_length ? 'JOB_LIMIT' : 'CONTAMINATED');
} else { # a proper "BATCHES" loop
while (!$self->cause_of_death and $batches_stopwatch->get_elapsed < $min_batch_time) {
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment