Added script help and API pod to describe the new pattern-based fetching of Analyses

691a441e · Leo Gordon · b47a2081 · 691a441e · 691a441e · 691a441e
Commit 691a441e authored 10 years ago by Leo Gordon
--- a/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisAdaptor.pm
+++ b/modules/Bio/EnsEMBL/Hive/DBSQL/AnalysisAdaptor.pm
@@ -104,6 +104,22 @@ sub fetch_by_url_query {
 }


+=head2 fetch_all_by_pattern
+
+  Arg [1]    : (optional) string $pattern
+  Example    : my $first_fifteen_analyses_and_two_more = $analysis_adaptor->fetch_all_by_pattern( '1..15,analysis_X,21' );
+  Example    : my $two_open_ranges = $analysis_adaptor->fetch_all_by_pattern( '..7,10..' );
+  Example    : my $double_exclusion = $analysis_adaptor->fetch_all_by_pattern( '1..15-3..5+4' );
+  Example    : my $blast_related_with_exceptions = $analysis_adaptor->fetch_all_by_pattern( 'blast%-12-%funnel' );
+  Description: Fetches an arrayref of non-repeating Analyses objects by interpreting a pattern.
+                The pattern can contain individual analyses_ids, individual logic_names,
+                open and closed ranges of analysis_ids, wildcard patterns of logic_names,
+                merges (+ or ,) and exclusions (-) of the above subsets.
+  Exceptions : none
+  Caller     : both beekeeper.pl (for scheduling) and runWorker.pl (for specialization)
+
+=cut
+
 sub fetch_all_by_pattern {
    my ($self, $pattern) = @_;


--- a/modules/Bio/EnsEMBL/Hive/Worker.pm
+++ b/modules/Bio/EnsEMBL/Hive/Worker.pm
@@ -526,7 +526,7 @@ sub run {
            $self->adaptor->db->get_AnalysisStatsAdaptor->update_status( $self->current_role->analysis_id, 'ALL_CLAIMED' );
        }

-        if( $cod =~ /^(NO_WORK|HIVE_OVERLOAD)$/ and $self->can_respecialize and (!$specialization_arghash or $specialization_arghash->{'-analyses_pattern'}!~/^[\w\d]+$/) ) {
+        if( $cod =~ /^(NO_WORK|HIVE_OVERLOAD)$/ and $self->can_respecialize and (!$specialization_arghash or $specialization_arghash->{'-analyses_pattern'}!~/^\w+$/) ) {
            $self->adaptor->db->get_AnalysisStatsAdaptor->decrease_running_workers( $self->current_role->analysis->dbID );  # FIXME: tidy up this counting of active roles
            $self->cause_of_death(undef);
            $self->specialize_and_compile_wrapper( $specialization_arghash );

--- a/scripts/beekeeper.pl
+++ b/scripts/beekeeper.pl
@@ -243,6 +243,7 @@ sub main {
    }

    if( $self->{'logic_name'} ) {   # FIXME: for now, logic_name will override analysis_pattern quietly
+#        warn "-logic_name is now deprecated, please use -analyses_pattern that extends the functionality of -logic_name .\n";
        $self->{'analyses_pattern'} = $self->{'logic_name'};
    }

@@ -428,7 +429,7 @@ __DATA__

 =head1 NAME

-    beekeeper.pl
+    beekeeper.pl [options]

 =head1 DESCRIPTION

@@ -450,8 +451,8 @@ __DATA__
        # Run the pipeline in automatic mode (-loop), run all the workers locally (-meadow_type LOCAL) and allow for 3 parallel workers (-total_running_workers_max 3)
    beekeeper.pl -url mysql://username:secret@hostname:port/long_mult_test -meadow_type LOCAL -total_running_workers_max 3 -loop

-        # Run in automatic mode, but only restrict to running the 'fast_blast' analysis
-    beekeeper.pl -url mysql://username:secret@hostname:port/long_mult_test -logic_name fast_blast -loop
+        # Run in automatic mode, but only restrict to running blast-related analyses with the exception of analyses 4..6
+    beekeeper.pl -url mysql://username:secret@hostname:port/long_mult_test -analyses_pattern 'blast%-4..6' -loop

        # Restrict the normal execution to one iteration only - can be used for testing a newly set up pipeline
    beekeeper.pl -url mysql://username:secret@hostname:port/long_mult_test -run
@@ -494,11 +495,11 @@ __DATA__

 =head2 Worker control

-    -job_limit <num>            : #jobs to run before worker can die naturally
+    -analyses_pattern <string>  : restrict the sync operation, printing of stats or looping of the beekeeper to the specified subset of analyses
+    -can_respecialize <0|1>     : allow workers to re-specialize into another analysis (within resource_class) after their previous analysis was exhausted
    -life_span <num>            : life_span limit for each worker
-    -logic_name <string>        : restrict the pipeline stat/runs to this analysis logic_name
+    -job_limit <num>            : #jobs to run before worker can die naturally
    -retry_throwing_jobs 0|1    : if a job dies *knowingly*, should we retry it by default?
-    -can_respecialize <0|1>     : allow workers to re-specialize into another analysis (within resource_class) after their previous analysis was exhausted
    -hive_log_dir <path>        : directory where stdout/stderr of the hive is redirected
    -debug <debug_level>        : set debug level of the workers


--- a/scripts/runWorker.pl
+++ b/scripts/runWorker.pl
@@ -124,6 +124,16 @@ my $worker = $queen->create_new_worker(
         -debug                 => $debug,
 );

+
+if( $logic_name ) {
+#    warn "-logic_name is now deprecated, please use -analyses_pattern that extends the functionality of -logic_name and -analysis_id .\n";
+    $analyses_pattern = $logic_name;
+} elsif ( $analysis_id ) {
+#    warn "-analysis_id is now deprecated, please use -analyses_pattern that extends the functionality of -analysis_id and -logic_name .\n";
+    $analyses_pattern = $analysis_id;
+}
+
+
 my $specialization_arghash = ($analyses_pattern || $analysis_id || $logic_name || $job_id) && {
     -analyses_pattern      => $analyses_pattern,
     -analysis_id           => $analysis_id,
@@ -141,7 +151,7 @@ __DATA__

 =head1 NAME

-    runWorker.pl
+    runWorker.pl [options]

 =head1 DESCRIPTION

@@ -159,8 +169,11 @@ __DATA__
        # Run one local worker process in ehive_dbname and let the system pick up the analysis from the given resource_class
    runWorker.pl -url mysql://username:secret@hostname:port/ehive_dbname -rc_name low_mem

-        # Run one local worker process in ehive_dbname and specify the logic_name
-    runWorker.pl -url mysql://username:secret@hostname:port/ehive_dbname -logic_name fast_blast
+        # Run one local worker process in ehive_dbname and constrain its initial specialization within a subset of analyses
+    runWorker.pl -url mysql://username:secret@hostname:port/ehive_dbname -analyses_pattern '1..15,analysis_X,21'
+
+        # Run one local worker process in ehive_dbname and allow it to respecialize within a subset of analyses
+    runWorker.pl -url mysql://username:secret@hostname:port/ehive_dbname -can_respecialize 1 -analyses_pattern 'blast%-4..6'

        # Run a specific job in a local worker process:
    runWorker.pl -url mysql://username:secret@hostname:port/ehive_dbname -job_id 123456
@@ -169,16 +182,15 @@ __DATA__

 =head2 Connection parameters:

-    -reg_conf <path>        : path to a Registry configuration file
-    -reg_alias <string>     : species/alias name for the Hive DBAdaptor
-    -url <url string>       : url defining where database is located
+    -reg_conf <path>            : path to a Registry configuration file
+    -reg_alias <string>         : species/alias name for the Hive DBAdaptor
+    -url <url string>           : url defining where database is located

 =head2 Task specificaton parameters:

    -rc_id <id>                 : resource class id
    -rc_name <string>           : resource class name
-    -analysis_id <id>           : pre-specify this worker in a particular analysis defined by database id
-    -logic_name <string>        : pre-specify this worker in a particular analysis defined by name
+    -analyses_pattern <string>  : restrict the specialization of the Worker to the specified subset of Analyses
    -job_id <id>                : run a specific job defined by its database id
    -force 0|1                  : set to 1 if you want to force running a Worker over a BLOCKED analysis or to run a specific DONE/SEMAPHORED job_id