Skip to content
Snippets Groups Projects
Commit b1ccf212 authored by Leo Gordon's avatar Leo Gordon
Browse files

report number of analyses discarded as not needing Workers

parent 93839b9b
No related branches found
No related tags found
No related merge requests found
......@@ -191,17 +191,23 @@ sub schedule_workers {
my @workers_to_submit_by_analysis = (); # The down-to-analysis "plan" that may completely change by the time the Workers are born and specialized
my %workers_to_submit_by_meadow_type_rc_name = (); # Pre-pending-adjusted per-resource breakout
my $total_extra_workers_required = 0;
my @log_buffer = ();
my ($stats_sorted_by_suitability, $log_buffer) = Bio::EnsEMBL::Hive::Scheduler::sort_stats_by_suitability( $list_of_analyses );
if( my @stats_sorted_by_suitability = @{ Bio::EnsEMBL::Hive::Scheduler::sort_stats_by_suitability( $list_of_analyses ) } ) {
unless( @$stats_sorted_by_suitability ) {
my $submit_capacity_limiter = Bio::EnsEMBL::Hive::Limiter->new( 'Max number of Workers scheduled this time', $submit_capacity );
my $queen_capacity_limiter = Bio::EnsEMBL::Hive::Limiter->new( 'Total reciprocal capacity of the Hive', 1.0 - $queen->db->get_RoleAdaptor->get_hive_current_load() );
unless( @$log_buffer ) {
push @$log_buffer, "Could not find any suitable analyses to start scheduling.";
}
} else {
foreach my $analysis_stats (@stats_sorted_by_suitability) {
my $submit_capacity_limiter = Bio::EnsEMBL::Hive::Limiter->new( 'Max number of Workers scheduled this time', $submit_capacity );
my $queen_capacity_limiter = Bio::EnsEMBL::Hive::Limiter->new( 'Total reciprocal capacity of the Hive', 1.0 - $queen->db->get_RoleAdaptor->get_hive_current_load() );
foreach my $analysis_stats (@$stats_sorted_by_suitability) {
if( $submit_capacity_limiter->reached ) {
if( $analysis_id2rc_name ) { # only add this message when scheduling and not during a Worker's specialization
push @log_buffer, "Submission capacity (=".$submit_capacity_limiter->original_capacity.") has been reached.";
push @$log_buffer, "Submission capacity (=".$submit_capacity_limiter->original_capacity.") has been reached.";
}
last;
}
......@@ -211,23 +217,23 @@ sub schedule_workers {
my $this_meadow_type = $analysis->meadow_type || $default_meadow_type;
if( $meadow_capacity_limiter_hashed_by_type && $meadow_capacity_limiter_hashed_by_type->{$this_meadow_type}->reached ) {
push @log_buffer, "Available capacity of '$this_meadow_type' Meadow (=".$meadow_capacity_limiter_hashed_by_type->{$this_meadow_type}->original_capacity.") has been reached, skipping Analysis '$logic_name'.";
push @$log_buffer, "Available capacity of '$this_meadow_type' Meadow (=".$meadow_capacity_limiter_hashed_by_type->{$this_meadow_type}->original_capacity.") has been reached, skipping Analysis '$logic_name'.";
next;
}
#digging deeper under the surface so need to sync:
if( $analysis_stats->status =~ /^(LOADING|ALL_CLAIMED|BLOCKED|SYNCHING)$/ ) {
push @log_buffer, "Analysis '$logic_name' is ".$analysis_stats->status.", safe-synching it...";
push @$log_buffer, "Analysis '$logic_name' is ".$analysis_stats->status.", safe-synching it...";
if( $queen->safe_synchronize_AnalysisStats($analysis_stats) ) {
push @log_buffer, "Safe-sync of Analysis '$logic_name' succeeded.";
push @$log_buffer, "Safe-sync of Analysis '$logic_name' succeeded.";
} else {
push @log_buffer, "Safe-sync of Analysis '$logic_name' could not be run at this moment, skipping it.";
push @$log_buffer, "Safe-sync of Analysis '$logic_name' could not be run at this moment, skipping it.";
next;
}
}
if( $analysis_stats->status =~ /^(BLOCKED|SYNCHING)$/ ) {
push @log_buffer, "Analysis '$logic_name' is still ".$analysis_stats->status.", skipping it.";
push @$log_buffer, "Analysis '$logic_name' is still ".$analysis_stats->status.", skipping it.";
next;
}
......@@ -235,7 +241,7 @@ sub schedule_workers {
my $extra_workers_this_analysis = $analysis_stats->num_required_workers;
if ($extra_workers_this_analysis <= 0) {
push @log_buffer, "Analysis '$logic_name' doesn't require extra workers, skipping it.";
push @$log_buffer, "Analysis '$logic_name' doesn't require extra workers, skipping it.";
next;
}
......@@ -262,7 +268,7 @@ sub schedule_workers {
# do not continue with this analysis if limiters haven't agreed on a positive number:
if ($extra_workers_this_analysis <= 0) {
push @log_buffer, "Although Analysis '$logic_name' needed extra workers, it is being skipped because of activated limiters.";
push @$log_buffer, "Although Analysis '$logic_name' needed extra workers, it is being skipped because of activated limiters.";
next;
}
......@@ -272,25 +278,21 @@ sub schedule_workers {
}
push @workers_to_submit_by_analysis, [ $analysis, $extra_workers_this_analysis];
push @log_buffer, $analysis_stats->toString;
push @$log_buffer, $analysis_stats->toString;
if($analysis_id2rc_name) {
my $this_rc_name = $analysis_id2rc_name->{ $analysis_stats->analysis_id };
$workers_to_submit_by_meadow_type_rc_name{ $this_meadow_type }{ $this_rc_name } += $extra_workers_this_analysis;
push @log_buffer, sprintf("Before checking the Valley for pending jobs, the Scheduler allocated $extra_workers_this_analysis x $this_meadow_type:$this_rc_name extra workers for '%s' [%.4f hive_load remaining]",
push @$log_buffer, sprintf("Before checking the Valley for pending jobs, the Scheduler allocated $extra_workers_this_analysis x $this_meadow_type:$this_rc_name extra workers for '%s' [%.4f hive_load remaining]",
$logic_name,
$queen_capacity_limiter->available_capacity,
);
}
} # /foreach my $analysis_stats (@stats_sorted_by_suitability)
} else {
push @log_buffer, "Could not find any suitable analyses to start scheduling.";
} # /foreach my $analysis_stats (@$stats_sorted_by_suitability)
}
return (\@workers_to_submit_by_analysis, \%workers_to_submit_by_meadow_type_rc_name, $total_extra_workers_required, \@log_buffer);
return (\@workers_to_submit_by_analysis, \%workers_to_submit_by_meadow_type_rc_name, $total_extra_workers_required, $log_buffer);
}
......@@ -301,14 +303,30 @@ sub sort_stats_by_suitability {
shuffle # 1. make sure analyses are well mixed within the same priority level
@{ shift @_ };
# assuming sync() is expensive, so first trying analyses that have already been sunk:
my @primary_candidates = grep { ($_->num_required_workers > 0) and ($_->status =~/^(READY|WORKING)$/) }
@sorted_stats;
my (@primary_candidates, @secondary_candidates, $discarded_count, @log_buffer);
foreach my $stats ( @sorted_stats ) {
# assuming sync() is expensive, so first trying analyses that have already been sunk:
if( ($stats->num_required_workers > 0) and ($stats->status =~/^(READY|WORKING)$/) ) {
my @secondary_candidates = grep { $_->status =~ /^(LOADING|BLOCKED|ALL_CLAIMED|SYNCHING)$/ }
@sorted_stats;
push @primary_candidates, $stats;
} elsif( $stats->status =~ /^(LOADING|BLOCKED|ALL_CLAIMED|SYNCHING)$/ ) {
push @secondary_candidates, $stats;
} else {
$discarded_count++;
}
}
if( $discarded_count ) {
push @log_buffer, "Discarded $discarded_count analyses because they do not need any Workers.";
}
return [@primary_candidates, @secondary_candidates];
return ( [@primary_candidates, @secondary_candidates], \@log_buffer );
}
1;
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment