From ba49fd685a9f756b73f429565ffb4c2ff4912271 Mon Sep 17 00:00:00 2001
From: Leo Gordon <lg4@ebi.ac.uk>
Date: Fri, 3 Oct 2014 14:27:19 +0100
Subject: [PATCH] more decision-making explained through log_buffer messages

---
 modules/Bio/EnsEMBL/Hive/Scheduler.pm | 44 +++++++++++++++++++++------
 1 file changed, 34 insertions(+), 10 deletions(-)

diff --git a/modules/Bio/EnsEMBL/Hive/Scheduler.pm b/modules/Bio/EnsEMBL/Hive/Scheduler.pm
index 28e174f7b..e74522d21 100644
--- a/modules/Bio/EnsEMBL/Hive/Scheduler.pm
+++ b/modules/Bio/EnsEMBL/Hive/Scheduler.pm
@@ -158,24 +158,45 @@ sub schedule_workers {
         my $queen_capacity_limiter                      = Bio::EnsEMBL::Hive::Limiter->new( 'Total reciprocal capacity of the Hive', 1.0 - $queen->db->get_RoleAdaptor->get_hive_current_load() );
 
         foreach my $analysis_stats (@stats_sorted_by_suitability) {
-            last if( $submit_capacity_limiter->reached );
+            if( $submit_capacity_limiter->reached ) {
+                if( $analysis_id2rc_name ) {    # only add this message when scheduling and not during a Worker's specialization
+                    push @log_buffer, "Submission capacity ($submit_capacity) has been reached.";
+                }
+                last;
+            }
 
             my $analysis            = $analysis_stats->analysis();    # FIXME: if it proves too expensive we may need to consider caching
+            my $logic_name          = $analysis->logic_name;
             my $this_meadow_type    = $analysis->meadow_type || $default_meadow_type;
 
-            next if( $meadow_capacity_limiter_hashed_by_type && $meadow_capacity_limiter_hashed_by_type->{$this_meadow_type}->reached );
+            if( $meadow_capacity_limiter_hashed_by_type && $meadow_capacity_limiter_hashed_by_type->{$this_meadow_type}->reached ) {
+                push @log_buffer, "Capacity of '$this_meadow_type' Meadow has been reached, skipping Analysis '$logic_name'.";
+                next;
+            }
 
                 #digging deeper under the surface so need to sync:
             if( $analysis_stats->status =~ /^(LOADING|ALL_CLAIMED|BLOCKED|SYNCHING)$/ ) {
-                $queen->safe_synchronize_AnalysisStats($analysis_stats);
+                push @log_buffer, "Analysis '$logic_name' is ".$analysis_stats->status.", safe-synching it...";
+
+                if( $queen->safe_synchronize_AnalysisStats($analysis_stats) ) {
+                    push @log_buffer, "Safe-sync of Analysis '$logic_name' succeeded.";
+                } else {
+                    push @log_buffer, "Safe-sync of Analysis '$logic_name' could not be run at this moment, skipping it.";
+                    next;
+                }
+            }
+            if( $analysis_stats->status =~ /^(BLOCKED|SYNCHING)$/ ) {
+                push @log_buffer, "Analysis '$logic_name' is still ".$analysis_stats->status.", skipping it.";
+                next;
             }
-            next if( $analysis_stats->status =~ /^(BLOCKED|SYNCHING)$/ );
 
                 # getting the initial worker requirement for this analysis (may be stale if not sync'ed recently)
             my $extra_workers_this_analysis = $analysis_stats->num_required_workers;
 
-                # if this analysis doesn't require any extra workers - just skip it:
-            next if ($extra_workers_this_analysis <= 0);
+            if ($extra_workers_this_analysis <= 0) {
+                push @log_buffer, "Analysis '$logic_name' doesn't require extra workers, skipping it.";
+                next;
+            }
 
             $total_extra_workers_required += $extra_workers_this_analysis;    # also keep the total number required so far (if nothing required we may need a resync later)
 
@@ -188,7 +209,7 @@ sub schedule_workers {
                     ? $meadow_capacity_limiter_hashed_by_type->{$this_meadow_type}
                     : (),
                 defined($analysis->analysis_capacity)
-                    ? Bio::EnsEMBL::Hive::Limiter->new( "Number of Workers working at '".$analysis->logic_name."' analysis",
+                    ? Bio::EnsEMBL::Hive::Limiter->new( "Number of Workers working at '$logic_name' analysis",
                                                         $analysis->analysis_capacity - $analysis_stats->num_running_workers )
                     : (),
             );
@@ -199,7 +220,10 @@ sub schedule_workers {
             }
 
                 # do not continue with this analysis if limiters haven't agreed on a positive number:
-            next if ($extra_workers_this_analysis <= 0);
+            if ($extra_workers_this_analysis <= 0) {
+                push @log_buffer, "Although Analysis '$logic_name' needed extra workers, it is being skipped because of activated limiters.";
+                next;
+            }
 
                 # let all parties know the final decision of negotiations:
             foreach my $limiter (@limiters) {
@@ -213,7 +237,7 @@ sub schedule_workers {
                 my $this_rc_name    = $analysis_id2rc_name->{ $analysis_stats->analysis_id };
                 $workers_to_submit_by_meadow_type_rc_name{ $this_meadow_type }{ $this_rc_name } += $extra_workers_this_analysis;
                 push @log_buffer, sprintf("Before checking the Valley for pending jobs, Scheduler allocated $extra_workers_this_analysis x $this_meadow_type:$this_rc_name extra workers for '%s' [%.4f hive_load remaining]\n",
-                                    $analysis->logic_name,
+                                    $logic_name,
                                     $queen_capacity_limiter->available_capacity,
                                 );
             }
@@ -222,7 +246,7 @@ sub schedule_workers {
 
     } else {
 
-        push @log_buffer, "Scheduler could not find any suitable analyses to start with";
+        push @log_buffer, "Scheduler could not find any suitable analyses to start with.";
     }
 
     return (\@workers_to_submit_by_analysis, \%workers_to_submit_by_meadow_type_rc_name, $total_extra_workers_required, \@log_buffer);
-- 
GitLab