diff --git a/modules/Bio/EnsEMBL/Hive/Queen.pm b/modules/Bio/EnsEMBL/Hive/Queen.pm index eb1cedb48d7d68fff5497ffd370aac9d390172a6..f866463e5f170a3f0fe8fde8e4c74d2c2d52c14d 100644 --- a/modules/Bio/EnsEMBL/Hive/Queen.pm +++ b/modules/Bio/EnsEMBL/Hive/Queen.pm @@ -539,21 +539,20 @@ sub fetch_overdue_workers { =cut sub synchronize_hive { - my $self = shift; - my $filter_analysis = shift; # optional parameter + my ($self, $filter_analysis) = @_; - my $start_time = time(); + my $start_time = time(); - my $list_of_analyses = $filter_analysis ? [$filter_analysis] : $self->db->get_AnalysisAdaptor->fetch_all; + my $list_of_analyses = $filter_analysis ? [$filter_analysis] : $self->db->get_AnalysisAdaptor->fetch_all; - print STDERR "\nSynchronizing the hive (".scalar(@$list_of_analyses)." analyses this time):\n"; - foreach my $analysis (@$list_of_analyses) { - $self->synchronize_AnalysisStats($analysis->stats); - print STDERR ( ($analysis->stats()->status eq 'BLOCKED') ? 'x' : 'o'); - } - print STDERR "\n"; + print STDERR "\nSynchronizing the hive (".scalar(@$list_of_analyses)." analyses this time):\n"; + foreach my $analysis (@$list_of_analyses) { + $self->synchronize_AnalysisStats($analysis->stats); + print STDERR ( ($analysis->stats()->status eq 'BLOCKED') ? 'x' : 'o'); + } + print STDERR "\n"; - print STDERR ''.((time() - $start_time))." seconds to synchronize_hive\n\n"; + print STDERR ''.((time() - $start_time))." seconds to synchronize_hive\n\n"; } @@ -623,6 +622,25 @@ sub synchronize_AnalysisStats { } +sub check_nothing_to_run_but_semaphored { # make sure it is run after a recent sync + my ($self, $filter_analysis) = @_; + + my $list_of_analyses = $filter_analysis ? [$filter_analysis] : $self->db->get_AnalysisAdaptor->fetch_all; + + my $only_semaphored_jobs_to_run = 1; + my $total_semaphored_job_count = 0; + + foreach my $analysis (@$list_of_analyses) { + my $stats = $analysis->stats; + + $only_semaphored_jobs_to_run = 1 if( $stats->total_job_count != $stats->done_job_count + $stats->failed_job_count + $stats->semaphored_job_count ); + $total_semaphored_job_count += $stats->semaphored_job_count; + } + + return ( $total_semaphored_job_count && $only_semaphored_jobs_to_run ); +} + + =head2 get_num_failed_analyses Arg [1] : Bio::EnsEMBL::Hive::AnalysisStats object (optional) diff --git a/modules/Bio/EnsEMBL/Hive/Scheduler.pm b/modules/Bio/EnsEMBL/Hive/Scheduler.pm index d34ac4c23908297176d8e296fd7a986293a6419a..990626189db160dbabe2d1237840049a8ded191a 100644 --- a/modules/Bio/EnsEMBL/Hive/Scheduler.pm +++ b/modules/Bio/EnsEMBL/Hive/Scheduler.pm @@ -70,11 +70,17 @@ sub schedule_workers_resync_if_necessary { print "Scheduler: re-synchronizing the Hive...\n"; $queen->synchronize_hive($filter_analysis); - if($queen->db->hive_auto_rebalance_semaphores) { # make sure rebalancing only ever happens for the pipelines that asked for it - print "Scheduler: re-balancing of semaphore_counts...\n"; - if( my $rebalanced_jobs_counter = $queen->db->get_AnalysisJobAdaptor->balance_semaphores($filter_analysis && $filter_analysis->dbID) ) { - print "Scheduler: re-balanced $rebalanced_jobs_counter jobs, going through another re-synchronization of the Hive...\n"; - $queen->synchronize_hive($filter_analysis); + if( $queen->db->hive_auto_rebalance_semaphores ) { # make sure rebalancing only ever happens for the pipelines that asked for it + if( $queen->check_nothing_to_run_but_semaphored ) { # and double-check on our side + print "Scheduler: looks like we may need re-balancing semaphore_counts...\n"; + if( my $rebalanced_jobs_counter = $queen->db->get_AnalysisJobAdaptor->balance_semaphores($filter_analysis && $filter_analysis->dbID) ) { + print "Scheduler: re-balanced $rebalanced_jobs_counter jobs, going through another re-synchronization of the Hive...\n"; + $queen->synchronize_hive($filter_analysis); + } else { + print "Scheduler: hmmm... managed to re-balance 0 jobs, you may need to investigate further.\n"; + } + } else { + print "Scheduler: apparently there are no semaphored jobs that may need to be re-balanced at this time.\n"; } } else { print "Scheduler: automatic re-balancing of semaphore_counts is off by default. If you think your pipeline might benefit from it, set hive_auto_rebalance_semaphores => 1 in the PipeConfig's hive_meta_table.\n";