Skip to content
Snippets Groups Projects
Commit 18f2193a authored by Leo Gordon's avatar Leo Gordon
Browse files

only run balance_semaphores() automatically when it is absolutely necessary...

only run balance_semaphores() automatically when it is absolutely necessary (no running or otherwise runnable jobs)
parent 0d3c6612
No related branches found
No related tags found
No related merge requests found
......@@ -539,21 +539,20 @@ sub fetch_overdue_workers {
=cut
sub synchronize_hive {
my $self = shift;
my $filter_analysis = shift; # optional parameter
my ($self, $filter_analysis) = @_;
my $start_time = time();
my $start_time = time();
my $list_of_analyses = $filter_analysis ? [$filter_analysis] : $self->db->get_AnalysisAdaptor->fetch_all;
my $list_of_analyses = $filter_analysis ? [$filter_analysis] : $self->db->get_AnalysisAdaptor->fetch_all;
print STDERR "\nSynchronizing the hive (".scalar(@$list_of_analyses)." analyses this time):\n";
foreach my $analysis (@$list_of_analyses) {
$self->synchronize_AnalysisStats($analysis->stats);
print STDERR ( ($analysis->stats()->status eq 'BLOCKED') ? 'x' : 'o');
}
print STDERR "\n";
print STDERR "\nSynchronizing the hive (".scalar(@$list_of_analyses)." analyses this time):\n";
foreach my $analysis (@$list_of_analyses) {
$self->synchronize_AnalysisStats($analysis->stats);
print STDERR ( ($analysis->stats()->status eq 'BLOCKED') ? 'x' : 'o');
}
print STDERR "\n";
print STDERR ''.((time() - $start_time))." seconds to synchronize_hive\n\n";
print STDERR ''.((time() - $start_time))." seconds to synchronize_hive\n\n";
}
......@@ -623,6 +622,25 @@ sub synchronize_AnalysisStats {
}
sub check_nothing_to_run_but_semaphored { # make sure it is run after a recent sync
my ($self, $filter_analysis) = @_;
my $list_of_analyses = $filter_analysis ? [$filter_analysis] : $self->db->get_AnalysisAdaptor->fetch_all;
my $only_semaphored_jobs_to_run = 1;
my $total_semaphored_job_count = 0;
foreach my $analysis (@$list_of_analyses) {
my $stats = $analysis->stats;
$only_semaphored_jobs_to_run = 1 if( $stats->total_job_count != $stats->done_job_count + $stats->failed_job_count + $stats->semaphored_job_count );
$total_semaphored_job_count += $stats->semaphored_job_count;
}
return ( $total_semaphored_job_count && $only_semaphored_jobs_to_run );
}
=head2 get_num_failed_analyses
Arg [1] : Bio::EnsEMBL::Hive::AnalysisStats object (optional)
......
......@@ -70,11 +70,17 @@ sub schedule_workers_resync_if_necessary {
print "Scheduler: re-synchronizing the Hive...\n";
$queen->synchronize_hive($filter_analysis);
if($queen->db->hive_auto_rebalance_semaphores) { # make sure rebalancing only ever happens for the pipelines that asked for it
print "Scheduler: re-balancing of semaphore_counts...\n";
if( my $rebalanced_jobs_counter = $queen->db->get_AnalysisJobAdaptor->balance_semaphores($filter_analysis && $filter_analysis->dbID) ) {
print "Scheduler: re-balanced $rebalanced_jobs_counter jobs, going through another re-synchronization of the Hive...\n";
$queen->synchronize_hive($filter_analysis);
if( $queen->db->hive_auto_rebalance_semaphores ) { # make sure rebalancing only ever happens for the pipelines that asked for it
if( $queen->check_nothing_to_run_but_semaphored ) { # and double-check on our side
print "Scheduler: looks like we may need re-balancing semaphore_counts...\n";
if( my $rebalanced_jobs_counter = $queen->db->get_AnalysisJobAdaptor->balance_semaphores($filter_analysis && $filter_analysis->dbID) ) {
print "Scheduler: re-balanced $rebalanced_jobs_counter jobs, going through another re-synchronization of the Hive...\n";
$queen->synchronize_hive($filter_analysis);
} else {
print "Scheduler: hmmm... managed to re-balance 0 jobs, you may need to investigate further.\n";
}
} else {
print "Scheduler: apparently there are no semaphored jobs that may need to be re-balanced at this time.\n";
}
} else {
print "Scheduler: automatic re-balancing of semaphore_counts is off by default. If you think your pipeline might benefit from it, set hive_auto_rebalance_semaphores => 1 in the PipeConfig's hive_meta_table.\n";
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment