Commit e6f5409c authored by Leo Gordon's avatar Leo Gordon
Browse files

moving code that assembles the bacct command line given an interval into...

moving code that assembles the bacct command line given an interval into Meadow::LSF::get_report_entries_for_time_interval and calling it from there

Also removed direct way of setting start_date and end_date and provided a generalized -source parameter for parsing from files/pipes
parent dda3cf0c
......@@ -32,6 +32,7 @@ package Bio::EnsEMBL::Hive::Meadow::LSF;
use strict;
use Time::Piece;
use Time::Seconds;
use base ('Bio::EnsEMBL::Hive::Meadow');
......@@ -171,7 +172,9 @@ sub _yearless_2_datetime { # a private subroutine that recovers missing yea
sub parse_report_source_line {
my $bacct_source_line = shift @_;
my ($self, $bacct_source_line) = @_;
warn "LSF::parse_report_source_line( \"$bacct_source_line\" )\n";
my %status_2_cod = (
'TERM_MEMLIMIT' => 'MEMLIMIT',
......@@ -246,16 +249,16 @@ sub parse_report_source_line {
sub get_report_entries_for_process_ids {
my $self = shift @_;
my $self = shift @_; # make sure we get if off the way before splicing
my %combined_report_entries = ();
while (my $pid_batch = join(' ', map { "'$_'" } splice(@_, 0, 20))) { # can't fit too many pids on one shell cmdline
my $cmd = "bacct -l $pid_batch |";
# warn "LSF::get_combined_report() running cmd:\n\t$cmd\n";
# warn "LSF::get_report_entries_for_process_ids() running cmd:\n\t$cmd\n";
my $batch_of_report_entries = parse_report_source_line( $cmd );
my $batch_of_report_entries = $self->parse_report_source_line( $cmd );
%combined_report_entries = (%combined_report_entries, %$batch_of_report_entries);
}
......@@ -264,6 +267,25 @@ sub get_report_entries_for_process_ids {
}
sub get_report_entries_for_time_interval {
my ($self, $from_time, $to_time, $username) = @_;
my $from_timepiece = Time::Piece->strptime($from_time, '%Y-%m-%d %H:%M:%S');
$from_time = $from_timepiece->strftime('%Y/%m/%d/%H:%M');
my $to_timepiece = Time::Piece->strptime($to_time, '%Y-%m-%d %H:%M:%S') + 2*ONE_MINUTE;
$to_time = $to_timepiece->strftime('%Y/%m/%d/%H:%M');
my $cmd = "bacct -l -C $from_time,$to_time ".($username ? "-u $username" : '') . ' |';
# warn "LSF::get_report_entries_for_time_interval() running cmd:\n\t$cmd\n";
my $batch_of_report_entries = $self->parse_report_source_line( $cmd );
return $batch_of_report_entries;
}
sub submit_workers {
my ($self, $worker_cmd, $required_worker_count, $iteration, $rc_name, $rc_specific_submission_cmd_args, $submit_stdout_file, $submit_stderr_file) = @_;
......
......@@ -15,8 +15,6 @@ BEGIN {
use Getopt::Long;
use Time::Piece;
use Time::Seconds;
use Bio::EnsEMBL::Hive::DBSQL::DBAdaptor;
use Bio::EnsEMBL::Hive::Utils ('script_usage');
use Bio::EnsEMBL::Hive::Meadow::LSF;
......@@ -27,21 +25,19 @@ exit(0);
sub main {
my ($url, $reg_conf, $reg_type, $reg_alias, $nosqlvc, $bacct_source_line, $lsf_user, $help, $start_date, $end_date);
my ($url, $reg_conf, $reg_type, $reg_alias, $nosqlvc, $source_line, $username, $help);
GetOptions(
# connect to the database:
'url=s' => \$url,
'reg_conf|regfile=s' => \$reg_conf,
'reg_type=s' => \$reg_type,
'reg_alias|regname=s' => \$reg_alias,
'nosqlvc=i' => \$nosqlvc, # using "=i" instead of "!" for consistency with scripts where it is a propagated option
'dump|file=s' => \$bacct_source_line,
'lu|lsf_user=s' => \$lsf_user,
'sd|start_date=s' => \$start_date,
'ed|end_date=s' => \$end_date,
'h|help' => \$help,
'url=s' => \$url,
'reg_conf|regfile=s' => \$reg_conf,
'reg_type=s' => \$reg_type,
'reg_alias|regname=s' => \$reg_alias,
'nosqlvc=i' => \$nosqlvc, # using "=i" instead of "!" for consistency with scripts where it is a propagated option
'username=s' => \$username, # say "-user all" if the pipeline was run by several people
'source_line=s' => \$source_line,
'h|help' => \$help,
);
if ($help) { script_usage(0); }
......@@ -62,12 +58,14 @@ sub main {
my $queen = $hive_dba->get_Queen;
my $this_lsf_farm = Bio::EnsEMBL::Hive::Meadow::LSF::name();
die "Cannot find the name of the current farm.\n" unless $this_lsf_farm;
my $this_lsf_farm = Bio::EnsEMBL::Hive::Meadow::LSF::name()
or die "Cannot find the name of the current farm.\n";
if( $bacct_source_line && -r $bacct_source_line ) {
my $report_entries;
warn "Parsing given bacct file '$bacct_source_line'...\n";
if( $source_line ) {
$report_entries = Bio::EnsEMBL::Hive::Meadow::LSF->parse_report_source_line( $source_line );
} else {
......@@ -76,43 +74,22 @@ sub main {
my $meadow_to_interval = $queen->interval_workers_with_unknown_usage();
my $our_interval = $meadow_to_interval->{ 'LSF' }{ $this_lsf_farm };
my ($from_time, $to_time, $workers_count);
my ($from_time, $to_time);
if( $our_interval ) {
($from_time, $to_time, $workers_count) = @$our_interval{ 'min_born', 'max_died', 'workers_count' };
my $to_timepiece = Time::Piece->strptime($to_time, '%Y-%m-%d %H:%M:%S') + 2*ONE_MINUTE;
$to_time = $to_timepiece->strftime('%Y/%m/%d/%H:%M');
} else {
die "Usage information for this meadow has already been loaded, exiting...\n";
}
($from_time, $to_time) = @$our_interval{ 'min_born', 'max_died' };
if (defined $start_date) {
die "start_date must be in a format like '2012/01/25/13:46'" unless $start_date =~ /^\d{4}\/\d{2}\/\d{2}\/\d{2}:\d{2}$/;
$from_time = $start_date;
$report_entries = Bio::EnsEMBL::Hive::Meadow::LSF->get_report_entries_for_time_interval( $from_time, $to_time, $username );
} else {
$from_time=~s/[- ]/\//g;
$from_time=~s/:\d\d$//;
}
if (defined $end_date) {
die "end_date must be in a format like '2012/01/25/13:46'" unless $end_date =~ /^\d{4}\/\d{2}\/\d{2}\/\d{2}:\d{2}$/;
$to_time = $end_date;
die "Usage information for this meadow has already been loaded, exiting...\n";
}
warn "\tfrom=$from_time, to=$to_time\n";
$lsf_user = $lsf_user ? "-u $lsf_user" : '';
my $tee = $bacct_source_line ? "| tee $bacct_source_line" : '';
$bacct_source_line = "bacct -l -C $from_time,$to_time $lsf_user $tee |";
warn 'Will run the following command to obtain '.($tee ? 'and dump ' : '')."bacct information: '$bacct_source_line' (may take a few minutes)\n";
}
my $report_entries = Bio::EnsEMBL::Hive::Meadow::LSF::parse_report_source_line( $bacct_source_line );
if($report_entries and %$report_entries) {
my $processid_2_workerid = $queen->fetch_by_meadow_type_AND_meadow_name_HASHED_FROM_process_id_TO_worker_id( 'LSF', $this_lsf_farm );
my $processid_2_workerid = $queen->fetch_by_meadow_type_AND_meadow_name_HASHED_FROM_process_id_TO_worker_id( 'LSF', $this_lsf_farm );
$queen->store_resource_usage( $report_entries, $processid_2_workerid );
$queen->store_resource_usage( $report_entries, $processid_2_workerid );
}
}
__DATA__
......@@ -128,17 +105,13 @@ __DATA__
This script is used for offline examination of resources used by a Hive pipeline running on LSF
(the script is [Pp]latform-dependent).
Based on the command-line parameters 'start_date' and 'end_date', or on the start time of the first
worker and end time of the last worker (as recorded in pipeline DB), it pulls the relevant data out
of LSF's 'bacct' database, parses it and stores in 'worker_resource_usage' table.
Based on the start time of the first Worker and end time of the last Worker (as recorded in pipeline DB),
it pulls the relevant data out of LSF's 'bacct' database, parses it and stores in 'worker_resource_usage' table.
You can join this table to 'worker' table USING(meadow_name,process_id) in the usual MySQL way
to filter by analysis_id, do various stats, etc.
You can optionally ask the script to dump the 'bacct' database in a dump file,
or fill in the 'worker_resource_usage' table from an existing dump file (most time is taken by querying bacct).
Please note the script may additionally pull information about LSF processes that you ran simultaneously
with running the pipeline. It is easy to ignore them by joining into 'worker' table.
You can optionally provide an an external filename or command to get the data from it (don't forget to append a '|' to the end!)
and then the data will be taken from your source and parsed from there.
=head1 USAGE EXAMPLES
......@@ -146,22 +119,20 @@ __DATA__
lsf_report.pl -url mysql://username:secret@hostname:port/long_mult_test
# The same, but assuming LSF user someone_else ran the pipeline:
lsf_report.pl -url mysql://username:secret@hostname:port/long_mult_test -lsf_user someone_else
lsf_report.pl -url mysql://username:secret@hostname:port/long_mult_test -username someone_else
# Assuming the dump file existed. Load the dumped bacct data into 'worker_resource_usage' table:
lsf_report.pl -url mysql://username:secret@hostname:port/long_mult_test -dump long_mult.bacct
lsf_report.pl -url mysql://username:secret@hostname:port/long_mult_test -source long_mult.bacct
# Assuming the dump file did not exist. Query 'bacct', dump the data into a file and load it into 'worker_resource_usage' table:
lsf_report.pl -url mysql://username:secret@hostname:port/long_mult_test -dump long_mult_again.bacct
# Provide your own command to fetch and parse the worker_resource_usage data from:
lsf_report.pl -url mysql://username:secret@hostname:port/long_mult_test -source "bacct -l -C 2012/01/25/13:33,2012/01/25/14:44 |"
=head1 OPTIONS
-help : print this help
-url <url string> : url defining where hive database is located
-dump <filename> : a filename for bacct dump. It will be read from if the file exists, and written to otherwise.
-lsf_user <username> : if it wasn't you who ran the pipeline, LSF user name of that user can be provided
-start_date <date> : minimal start date of a job (the format is '2012/01/25/13:46')
-end_date <date> : maximal end date of a job (the format is '2012/01/25/13:46')
-username <username> : if it wasn't you who ran the pipeline, LSF user name of that user can be provided
-source <filename> : alternative source of worker_resource_usage data. Can be a filename or a pipe-from command.
=head1 LICENSE
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment