Skip to content
Snippets Groups Projects
Commit c250fa3c authored by Leo Gordon's avatar Leo Gordon
Browse files

API change: merged Q::print_analysis_status + Q::show_and_return_totals ->...

API change: merged Q::print_analysis_status + Q::show_and_return_totals -> Q::print_status_and_return_reasons_to_exit to improve speed
parent 2e275cd7
No related branches found
No related tags found
No related merge requests found
......@@ -647,10 +647,10 @@ sub check_nothing_to_run_but_semaphored { # make sure it is run after a recent
}
=head2 show_and_return_totals
=head2 print_status_and_return_reasons_to_exit
Arg [1] : $list_of_analyses
Example : my ($failed_analyses_counter, $num_remaining_jobs) = $queen->show_and_return_totals( [ $analysis_A, $analysis_B ] );
Example : my $reasons_to_exit = $queen->print_status_and_return_reasons_to_exit( [ $analysis_A, $analysis_B ] );
Description: Runs through all analyses in the given list, reports failed analyses, computes some totals, prints a combined status line
and returns a pair of ($failed_analyses_counter, $total_jobs_to_do)
Exceptions : none
......@@ -658,22 +658,22 @@ sub check_nothing_to_run_but_semaphored { # make sure it is run after a recent
=cut
sub show_and_return_totals {
sub print_status_and_return_reasons_to_exit {
my ($self, $list_of_analyses) = @_;
my ($failed_analyses_counter, $total_done_jobs, $total_failed_jobs, $total_jobs, $cpumsec_to_do) = (0) x 5;
my ($total_done_jobs, $total_failed_jobs, $total_jobs, $cpumsec_to_do) = (0) x 4;
my $reasons_to_exit = '';
foreach my $analysis (@$list_of_analyses) {
foreach my $analysis (sort {$a->dbID <=> $b->dbID} @$list_of_analyses) {
my $stats = $analysis->stats;
my $failed_job_count = $stats->failed_job_count;
print $stats->toString . "\n";
if( $stats->status eq 'FAILED') {
my $logic_name = $analysis->logic_name;
my $tolerance = $analysis->failed_job_tolerance;
warn "\t##################################################################################################\n";
warn "\t# Analysis '$logic_name' has FAILED (failed Jobs: $failed_job_count, tolerance: $tolerance\%) #\n";
warn "\t##################################################################################################\n";
$failed_analyses_counter++;
my $logic_name = $analysis->logic_name;
my $tolerance = $analysis->failed_job_tolerance;
$reasons_to_exit .= "### Analysis '$logic_name' has FAILED (failed Jobs: $failed_job_count, tolerance: $tolerance\%) ###\n";
}
$total_done_jobs += $stats->done_job_count;
......@@ -688,29 +688,14 @@ sub show_and_return_totals {
? (($total_done_jobs+$total_failed_jobs)*100.0/$total_jobs)
: 0.0;
warn sprintf("total over %d analyses : %6.2f%% complete (< %.2f CPU_hrs) (%d to_do + %d done + %d failed = %d total)\n",
printf("total over %d analyses : %6.2f%% complete (< %.2f CPU_hrs) (%d to_do + %d done + %d failed = %d total)\n",
scalar(@$list_of_analyses), $percentage_completed, $cpuhrs_to_do, $total_jobs_to_do, $total_done_jobs, $total_failed_jobs, $total_jobs);
return ($failed_analyses_counter, $total_jobs_to_do);
}
=head2 print_analysis_status
Arg [1] : $list_of_analyses
Example : $queen->print_analysis_status( [ $analysis_A, $analysis_B ] );
Description: Runs through all analyses in the given list and prints their stats.
Exceptions : none
Caller : beekeeper.pl
=cut
sub print_analysis_status {
my ($self, $list_of_analyses) = @_;
foreach my $analysis (sort {$a->dbID <=> $b->dbID} @$list_of_analyses) {
print $analysis->stats->toString . "\n";
unless( $total_jobs_to_do ) {
$reasons_to_exit .= "### No jobs left to do ###\n";
}
return $reasons_to_exit;
}
......
......@@ -261,7 +261,7 @@ sub main {
if($sync) {
$queen->synchronize_hive( $list_of_analyses );
}
$queen->print_analysis_status( $list_of_analyses ) unless($self->{'no_analysis_stats'});
print $queen->print_status_and_return_reasons_to_exit( $list_of_analyses, !$self->{'no_analysis_stats'} );
if($show_worker_stats) {
print "\n===== List of live Workers according to the Queen: ======\n";
......@@ -272,7 +272,6 @@ sub main {
$self->{'dba'}->get_RoleAdaptor->print_active_role_counts;
Bio::EnsEMBL::Hive::Scheduler::schedule_workers_resync_if_necessary($queen, $valley, $list_of_analyses); # show what would be submitted, but do not actually submit
$queen->show_and_return_totals( $list_of_analyses );
if($show_failed_jobs) {
print("===== failed jobs\n");
......@@ -339,20 +338,22 @@ sub run_autonomously {
my $beekeeper_pid = $$;
my $iteration=0;
my $num_of_remaining_jobs=0;
my $failed_analyses=0;
do {
if($iteration++) {
$self->{'dba'}->dbc->disconnect_if_idle;
printf("sleep %.2f minutes. Next loop at %s\n", $self->{'sleep_minutes'}, scalar localtime(time+$self->{'sleep_minutes'}*60));
sleep($self->{'sleep_minutes'}*60);
}
my $reasons_to_exit;
BKLOOP: while( ($iteration++ != $max_loops) or $keep_alive ) { # NB: the order of conditions is important!
print("\n======= beekeeper loop ** $iteration **==========\n");
print("\nBeekeeper : loop #$iteration ======================================================\n");
$queen->check_for_dead_workers($valley, 0);
$queen->print_analysis_status( $list_of_analyses ) unless($self->{'no_analysis_stats'});
if( $reasons_to_exit = $queen->print_status_and_return_reasons_to_exit( $list_of_analyses, !$self->{'no_analysis_stats'} )) {
if($keep_alive) {
print "Beekeeper : detected exit condition, but staying alive because of -keep_alive : ".$reasons_to_exit;
} else {
last BKLOOP;
}
}
$self->{'dba'}->get_RoleAdaptor->print_active_role_counts;
my $workers_to_submit_by_meadow_type_rc_name
......@@ -374,7 +375,7 @@ sub run_autonomously {
foreach my $rc_name (keys %{ $workers_to_submit_by_meadow_type_rc_name->{$meadow_type} }) {
my $this_meadow_rc_worker_count = $workers_to_submit_by_meadow_type_rc_name->{$meadow_type}{$rc_name};
print "Submitting $this_meadow_rc_worker_count workers (rc_name=$rc_name) to ".$this_meadow->signature()."\n";
print "\nBeekeeper : submitting $this_meadow_rc_worker_count workers (rc_name=$rc_name) to ".$this_meadow->signature()."\n";
my ($submission_cmd_args, $worker_cmd_args) = @{ $meadow_type_rc_name2resource_param_list{ $meadow_type }{ $rc_name } || [] };
......@@ -387,21 +388,19 @@ sub run_autonomously {
}
}
} else {
print "Not submitting any workers this iteration\n";
print "\nBeekeeper : not submitting any workers this iteration\n";
}
($failed_analyses, $num_of_remaining_jobs) = $queen->show_and_return_totals( $list_of_analyses );
} while( $keep_alive
or (!$failed_analyses and $num_of_remaining_jobs and $iteration!=$max_loops) );
if( $iteration != $max_loops ) { # skip the last sleep
$self->{'dba'}->dbc->disconnect_if_idle;
printf("Beekeeper : going to sleep for %.2f minute(s). Expect next iteration at %s\n", $self->{'sleep_minutes'}, scalar localtime(time+$self->{'sleep_minutes'}*60));
sleep($self->{'sleep_minutes'}*60);
}
}
print "The Beekeeper has stopped because ".(
$failed_analyses ? "there were $failed_analyses failed analyses"
: !$num_of_remaining_jobs ? "there is nothing left to do"
: "the number of loops was limited by $max_loops and this limit expired"
)."\n";
print "Beekeeper : stopped looping because ".( $reasons_to_exit || "the number of loops was limited by $max_loops and this limit expired\n");
printf("dbc %d disconnect cycles\n", $self->{'dba'}->dbc->disconnect_count);
printf("Beekeeper: dbc %d disconnect cycles\n", $self->{'dba'}->dbc->disconnect_count);
}
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment