diff --git a/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisJobAdaptor.pm b/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisJobAdaptor.pm index 129686e6bde6f9245da3eaff17a2b00dae0b7bb5..aa1faac2ed1f7de41ef59f4a35270c3a42f1a042 100644 --- a/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisJobAdaptor.pm +++ b/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisJobAdaptor.pm @@ -551,9 +551,17 @@ sub release_undone_jobs_from_worker { while(my ($job_id, $retry_count) = $sth->fetchrow_array()) { $self->db()->get_JobMessageAdaptor()->register_message($job_id, $msg, 1 ); - my $may_retry = ($cod ne 'MEMLIMIT' and $cod ne 'RUNLIMIT'); + my $resource_overusage = ($cod eq 'MEMLIMIT') or ($cod eq 'RUNLIMIT' and $worker->work_done()==0); - $self->release_and_age_job( $job_id, $max_retry_count, $may_retry ); + my $passed_on = 0; # the flag indicating that the garbage_collection was attempted and was successful + + if( $resource_overusage) { + $passed_on = $self->gc_dataflow( $job_id, $cod ); + } + + unless($passed_on) { + $self->release_and_age_job( $job_id, $max_retry_count, not $resource_overusage ); + } } $sth->finish(); } @@ -563,7 +571,7 @@ sub release_and_age_job { my ($self, $job_id, $max_retry_count, $may_retry) = @_; $may_retry ||= 0; - # NB: The order of updates IS important. Here we first find out the new status and then increment the retry_count: + # NB: The order of updated fields IS important. Here we first find out the new status and then increment the retry_count: $self->dbc->do( qq{ UPDATE analysis_job SET worker_id=0, job_claim='', status=IF( $may_retry AND (retry_count<$max_retry_count), 'READY', 'FAILED'), retry_count=retry_count+1 @@ -572,6 +580,24 @@ sub release_and_age_job { } ); } +=head2 gc_dataflow (stub) + + 0) check if there is a dataflow rule that corresponds to this $cod, return 0 if not + + 1) perform a 'limited responsibility' dataflow + 2) set the given job's status to 'PASSED_ON' + 3) record the fact of the dataflow in job_message table + + 4) return 1 if gc_dataflow succeeded, 0 otherwise + +=cut + +sub gc_dataflow { + my ($self, $job_id, $cod) = @_; + + return 0; +} + =head2 reset_job_by_dbID diff --git a/modules/Bio/EnsEMBL/Hive/DBSQL/DataflowRuleAdaptor.pm b/modules/Bio/EnsEMBL/Hive/DBSQL/DataflowRuleAdaptor.pm index 9b19b195dad819c2b4445b1b213bf538d0c382a2..a9b3b1de322b33f14bee5182f29b84e47e3be3d2 100755 --- a/modules/Bio/EnsEMBL/Hive/DBSQL/DataflowRuleAdaptor.pm +++ b/modules/Bio/EnsEMBL/Hive/DBSQL/DataflowRuleAdaptor.pm @@ -55,7 +55,7 @@ use base ('Bio::EnsEMBL::DBSQL::BaseAdaptor'); and returns all such rules in a list (by reference) Returntype : reference to list of Bio::EnsEMBL::Hive::DataflowRule objects Exceptions : none - Caller : Bio::EnsEMBL::Hive::Process::dataflow_output_id + Caller : Bio::EnsEMBL::Hive::AnalysisJob::dataflow_output_id =cut diff --git a/modules/Bio/EnsEMBL/Hive/DataflowRule.pm b/modules/Bio/EnsEMBL/Hive/DataflowRule.pm index 319d2a77f36a84ff5771eb2724b8b324c56e88ba..d1c13b8df3decd9e17a862beff2d9a75e2ff0647 100755 --- a/modules/Bio/EnsEMBL/Hive/DataflowRule.pm +++ b/modules/Bio/EnsEMBL/Hive/DataflowRule.pm @@ -30,7 +30,7 @@ UNIQUE (from_analysis_id, to_analysis_url) ); - A dataflow rule is activated when a Bio::EnsEMBL::Hive::Process::dataflow_output_id is called at any moment during a RunnableDB's execution. + A dataflow rule is activated when a Bio::EnsEMBL::Hive::AnalysisJob::dataflow_output_id is called at any moment during a RunnableDB's execution. The current RunnableDB's analysis ($from_analysis) and the requested $branch_code (1 by default) define the entry conditions, and whatever rules match these conditions will generate new jobs with input_ids specified in the dataflow_output_id() call. If input_id_template happens to contain a non-NULL value, it will be used to generate the corresponding intput_id instead. diff --git a/sql/patch_2010-09-19.sql b/sql/patch_2010-09-19.sql new file mode 100644 index 0000000000000000000000000000000000000000..9a6dd813d6187ea5fd552c1f62eec80aa7ac9b68 --- /dev/null +++ b/sql/patch_2010-09-19.sql @@ -0,0 +1,5 @@ + +## A new 'PASSED_ON' state is added to the Job to make it possible to dataflow from resource-overusing jobs recovered from dead workers: + +ALTER TABLE analysis_job MODIFY COLUMN status enum('READY','BLOCKED','CLAIMED','COMPILATION','GET_INPUT','RUN','WRITE_OUTPUT','DONE','FAILED','PASSED_ON') DEFAULT 'READY' NOT NULL; + diff --git a/sql/tables.sql b/sql/tables.sql index d69606e459dc7b826fa1557f7e9cd1a60406a090..72cc861fe4f795beaa262d3b712a569a33248814 100644 --- a/sql/tables.sql +++ b/sql/tables.sql @@ -129,7 +129,7 @@ CREATE TABLE analysis_job ( input_id char(255) not null, job_claim char(40) NOT NULL DEFAULT '', #UUID worker_id int(10) NOT NULL, - status enum('READY','BLOCKED','CLAIMED','COMPILATION','GET_INPUT','RUN','WRITE_OUTPUT','DONE','FAILED') DEFAULT 'READY' NOT NULL, + status enum('READY','BLOCKED','CLAIMED','COMPILATION','GET_INPUT','RUN','WRITE_OUTPUT','DONE','FAILED','PASSED_ON') DEFAULT 'READY' NOT NULL, retry_count int(10) default 0 not NULL, completed datetime NOT NULL, runtime_msec int(10) default 0 NOT NULL,