Skip to content
Snippets Groups Projects
Commit 691a441e authored by Leo Gordon's avatar Leo Gordon
Browse files

Added script help and API pod to describe the new pattern-based fetching of Analyses

parent b47a2081
No related branches found
No related tags found
No related merge requests found
......@@ -104,6 +104,22 @@ sub fetch_by_url_query {
}
=head2 fetch_all_by_pattern
Arg [1] : (optional) string $pattern
Example : my $first_fifteen_analyses_and_two_more = $analysis_adaptor->fetch_all_by_pattern( '1..15,analysis_X,21' );
Example : my $two_open_ranges = $analysis_adaptor->fetch_all_by_pattern( '..7,10..' );
Example : my $double_exclusion = $analysis_adaptor->fetch_all_by_pattern( '1..15-3..5+4' );
Example : my $blast_related_with_exceptions = $analysis_adaptor->fetch_all_by_pattern( 'blast%-12-%funnel' );
Description: Fetches an arrayref of non-repeating Analyses objects by interpreting a pattern.
The pattern can contain individual analyses_ids, individual logic_names,
open and closed ranges of analysis_ids, wildcard patterns of logic_names,
merges (+ or ,) and exclusions (-) of the above subsets.
Exceptions : none
Caller : both beekeeper.pl (for scheduling) and runWorker.pl (for specialization)
=cut
sub fetch_all_by_pattern {
my ($self, $pattern) = @_;
......
......@@ -526,7 +526,7 @@ sub run {
$self->adaptor->db->get_AnalysisStatsAdaptor->update_status( $self->current_role->analysis_id, 'ALL_CLAIMED' );
}
if( $cod =~ /^(NO_WORK|HIVE_OVERLOAD)$/ and $self->can_respecialize and (!$specialization_arghash or $specialization_arghash->{'-analyses_pattern'}!~/^[\w\d]+$/) ) {
if( $cod =~ /^(NO_WORK|HIVE_OVERLOAD)$/ and $self->can_respecialize and (!$specialization_arghash or $specialization_arghash->{'-analyses_pattern'}!~/^\w+$/) ) {
$self->adaptor->db->get_AnalysisStatsAdaptor->decrease_running_workers( $self->current_role->analysis->dbID ); # FIXME: tidy up this counting of active roles
$self->cause_of_death(undef);
$self->specialize_and_compile_wrapper( $specialization_arghash );
......
......@@ -243,6 +243,7 @@ sub main {
}
if( $self->{'logic_name'} ) { # FIXME: for now, logic_name will override analysis_pattern quietly
# warn "-logic_name is now deprecated, please use -analyses_pattern that extends the functionality of -logic_name .\n";
$self->{'analyses_pattern'} = $self->{'logic_name'};
}
......@@ -428,7 +429,7 @@ __DATA__
=head1 NAME
beekeeper.pl
beekeeper.pl [options]
=head1 DESCRIPTION
......@@ -450,8 +451,8 @@ __DATA__
# Run the pipeline in automatic mode (-loop), run all the workers locally (-meadow_type LOCAL) and allow for 3 parallel workers (-total_running_workers_max 3)
beekeeper.pl -url mysql://username:secret@hostname:port/long_mult_test -meadow_type LOCAL -total_running_workers_max 3 -loop
# Run in automatic mode, but only restrict to running the 'fast_blast' analysis
beekeeper.pl -url mysql://username:secret@hostname:port/long_mult_test -logic_name fast_blast -loop
# Run in automatic mode, but only restrict to running blast-related analyses with the exception of analyses 4..6
beekeeper.pl -url mysql://username:secret@hostname:port/long_mult_test -analyses_pattern 'blast%-4..6' -loop
# Restrict the normal execution to one iteration only - can be used for testing a newly set up pipeline
beekeeper.pl -url mysql://username:secret@hostname:port/long_mult_test -run
......@@ -494,11 +495,11 @@ __DATA__
=head2 Worker control
-job_limit <num> : #jobs to run before worker can die naturally
-analyses_pattern <string> : restrict the sync operation, printing of stats or looping of the beekeeper to the specified subset of analyses
-can_respecialize <0|1> : allow workers to re-specialize into another analysis (within resource_class) after their previous analysis was exhausted
-life_span <num> : life_span limit for each worker
-logic_name <string> : restrict the pipeline stat/runs to this analysis logic_name
-job_limit <num> : #jobs to run before worker can die naturally
-retry_throwing_jobs 0|1 : if a job dies *knowingly*, should we retry it by default?
-can_respecialize <0|1> : allow workers to re-specialize into another analysis (within resource_class) after their previous analysis was exhausted
-hive_log_dir <path> : directory where stdout/stderr of the hive is redirected
-debug <debug_level> : set debug level of the workers
......
......@@ -124,6 +124,16 @@ my $worker = $queen->create_new_worker(
-debug => $debug,
);
if( $logic_name ) {
# warn "-logic_name is now deprecated, please use -analyses_pattern that extends the functionality of -logic_name and -analysis_id .\n";
$analyses_pattern = $logic_name;
} elsif ( $analysis_id ) {
# warn "-analysis_id is now deprecated, please use -analyses_pattern that extends the functionality of -analysis_id and -logic_name .\n";
$analyses_pattern = $analysis_id;
}
my $specialization_arghash = ($analyses_pattern || $analysis_id || $logic_name || $job_id) && {
-analyses_pattern => $analyses_pattern,
-analysis_id => $analysis_id,
......@@ -141,7 +151,7 @@ __DATA__
=head1 NAME
runWorker.pl
runWorker.pl [options]
=head1 DESCRIPTION
......@@ -159,8 +169,11 @@ __DATA__
# Run one local worker process in ehive_dbname and let the system pick up the analysis from the given resource_class
runWorker.pl -url mysql://username:secret@hostname:port/ehive_dbname -rc_name low_mem
# Run one local worker process in ehive_dbname and specify the logic_name
runWorker.pl -url mysql://username:secret@hostname:port/ehive_dbname -logic_name fast_blast
# Run one local worker process in ehive_dbname and constrain its initial specialization within a subset of analyses
runWorker.pl -url mysql://username:secret@hostname:port/ehive_dbname -analyses_pattern '1..15,analysis_X,21'
# Run one local worker process in ehive_dbname and allow it to respecialize within a subset of analyses
runWorker.pl -url mysql://username:secret@hostname:port/ehive_dbname -can_respecialize 1 -analyses_pattern 'blast%-4..6'
# Run a specific job in a local worker process:
runWorker.pl -url mysql://username:secret@hostname:port/ehive_dbname -job_id 123456
......@@ -169,16 +182,15 @@ __DATA__
=head2 Connection parameters:
-reg_conf <path> : path to a Registry configuration file
-reg_alias <string> : species/alias name for the Hive DBAdaptor
-url <url string> : url defining where database is located
-reg_conf <path> : path to a Registry configuration file
-reg_alias <string> : species/alias name for the Hive DBAdaptor
-url <url string> : url defining where database is located
=head2 Task specificaton parameters:
-rc_id <id> : resource class id
-rc_name <string> : resource class name
-analysis_id <id> : pre-specify this worker in a particular analysis defined by database id
-logic_name <string> : pre-specify this worker in a particular analysis defined by name
-analyses_pattern <string> : restrict the specialization of the Worker to the specified subset of Analyses
-job_id <id> : run a specific job defined by its database id
-force 0|1 : set to 1 if you want to force running a Worker over a BLOCKED analysis or to run a specific DONE/SEMAPHORED job_id
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment