Skip to content
Snippets Groups Projects
Commit 85b961a8 authored by Leo Gordon's avatar Leo Gordon
Browse files

preparing to add gc_dataflow

parent 11c13a7c
No related branches found
No related tags found
No related merge requests found
......@@ -551,9 +551,17 @@ sub release_undone_jobs_from_worker {
while(my ($job_id, $retry_count) = $sth->fetchrow_array()) {
$self->db()->get_JobMessageAdaptor()->register_message($job_id, $msg, 1 );
my $may_retry = ($cod ne 'MEMLIMIT' and $cod ne 'RUNLIMIT');
my $resource_overusage = ($cod eq 'MEMLIMIT') or ($cod eq 'RUNLIMIT' and $worker->work_done()==0);
$self->release_and_age_job( $job_id, $max_retry_count, $may_retry );
my $passed_on = 0; # the flag indicating that the garbage_collection was attempted and was successful
if( $resource_overusage) {
$passed_on = $self->gc_dataflow( $job_id, $cod );
}
unless($passed_on) {
$self->release_and_age_job( $job_id, $max_retry_count, not $resource_overusage );
}
}
$sth->finish();
}
......@@ -563,7 +571,7 @@ sub release_and_age_job {
my ($self, $job_id, $max_retry_count, $may_retry) = @_;
$may_retry ||= 0;
# NB: The order of updates IS important. Here we first find out the new status and then increment the retry_count:
# NB: The order of updated fields IS important. Here we first find out the new status and then increment the retry_count:
$self->dbc->do( qq{
UPDATE analysis_job
SET worker_id=0, job_claim='', status=IF( $may_retry AND (retry_count<$max_retry_count), 'READY', 'FAILED'), retry_count=retry_count+1
......@@ -572,6 +580,24 @@ sub release_and_age_job {
} );
}
=head2 gc_dataflow (stub)
0) check if there is a dataflow rule that corresponds to this $cod, return 0 if not
1) perform a 'limited responsibility' dataflow
2) set the given job's status to 'PASSED_ON'
3) record the fact of the dataflow in job_message table
4) return 1 if gc_dataflow succeeded, 0 otherwise
=cut
sub gc_dataflow {
my ($self, $job_id, $cod) = @_;
return 0;
}
=head2 reset_job_by_dbID
......
......@@ -55,7 +55,7 @@ use base ('Bio::EnsEMBL::DBSQL::BaseAdaptor');
and returns all such rules in a list (by reference)
Returntype : reference to list of Bio::EnsEMBL::Hive::DataflowRule objects
Exceptions : none
Caller : Bio::EnsEMBL::Hive::Process::dataflow_output_id
Caller : Bio::EnsEMBL::Hive::AnalysisJob::dataflow_output_id
=cut
......
......@@ -30,7 +30,7 @@
UNIQUE (from_analysis_id, to_analysis_url)
);
A dataflow rule is activated when a Bio::EnsEMBL::Hive::Process::dataflow_output_id is called at any moment during a RunnableDB's execution.
A dataflow rule is activated when a Bio::EnsEMBL::Hive::AnalysisJob::dataflow_output_id is called at any moment during a RunnableDB's execution.
The current RunnableDB's analysis ($from_analysis) and the requested $branch_code (1 by default) define the entry conditions,
and whatever rules match these conditions will generate new jobs with input_ids specified in the dataflow_output_id() call.
If input_id_template happens to contain a non-NULL value, it will be used to generate the corresponding intput_id instead.
......
## A new 'PASSED_ON' state is added to the Job to make it possible to dataflow from resource-overusing jobs recovered from dead workers:
ALTER TABLE analysis_job MODIFY COLUMN status enum('READY','BLOCKED','CLAIMED','COMPILATION','GET_INPUT','RUN','WRITE_OUTPUT','DONE','FAILED','PASSED_ON') DEFAULT 'READY' NOT NULL;
......@@ -129,7 +129,7 @@ CREATE TABLE analysis_job (
input_id char(255) not null,
job_claim char(40) NOT NULL DEFAULT '', #UUID
worker_id int(10) NOT NULL,
status enum('READY','BLOCKED','CLAIMED','COMPILATION','GET_INPUT','RUN','WRITE_OUTPUT','DONE','FAILED') DEFAULT 'READY' NOT NULL,
status enum('READY','BLOCKED','CLAIMED','COMPILATION','GET_INPUT','RUN','WRITE_OUTPUT','DONE','FAILED','PASSED_ON') DEFAULT 'READY' NOT NULL,
retry_count int(10) default 0 not NULL,
completed datetime NOT NULL,
runtime_msec int(10) default 0 NOT NULL,
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment