HiveGeneric_conf.pm 23.3 KB
Newer Older
Leo Gordon's avatar
Leo Gordon committed
1 2 3 4 5

=pod 

=head1 NAME

Leo Gordon's avatar
Leo Gordon committed
6
Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf
Leo Gordon's avatar
Leo Gordon committed
7 8 9 10 11 12 13 14 15 16 17 18 19 20

=head1 SYNOPSIS

    # Example 1: specifying only the mandatory option:
init_pipeline.pl Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf -password <mypass>

    # Example 2: specifying the mandatory options as well as overriding some defaults:
init_pipeline.pl Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf -ensembl_cvs_root_dir ~/ensembl_main -pipeline_db -host <myhost> -pipeline_db -dbname <mydbname> -password <mypass>

=head1 DESCRIPTION

Generic configuration module for all Hive pipelines with loader functionality.
All other Hive PipeConfig modules should inherit from this module and will probably need to redefine some or all of the following interface methods:

21
    * default_options:                  returns a hash of (possibly multilevel) defaults for the options on which depend the rest of the configuration
Leo Gordon's avatar
Leo Gordon committed
22

23
    * pipeline_create_commands:         returns a list of strings that will be executed as system commands needed to create and set up the pipeline database
Leo Gordon's avatar
Leo Gordon committed
24

25
    * pipeline_wide_parameters:         returns a hash of pipeline-wide parameter names and their values
Leo Gordon's avatar
Leo Gordon committed
26

27
    * resource_classes:                 returns a hash of resource class definitions
Leo Gordon's avatar
Leo Gordon committed
28

29 30 31
    * pipeline_analyses:                returns a list of hash structures that define analysis objects bundled with definitions of corresponding jobs, rules and resources

    * beekeeper_extra_cmdline_options   returns a string with command line options that you want to be passed to the beekeeper.pl
Leo Gordon's avatar
Leo Gordon committed
32 33 34 35 36 37 38 39 40 41 42 43 44

When defining anything except the keys of default_options() a call to $self->o('myoption') can be used.
This call means "substitute this call for the value of 'myoption' at the time of configuring the pipeline".
All option names mentioned in $self->o() calls within the five interface methods above can be given non-default values from the command line.

Please make sure you have studied the pipeline configuraton examples in Bio::EnsEMBL::Hive::PipeConfig before creating your own PipeConfig modules.

=head1 CONTACT

  Please contact ehive-users@ebi.ac.uk mailing list with questions/suggestions.

=cut

45 46 47 48 49

package Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;

use strict;
use warnings;
50

51 52 53
use Bio::EnsEMBL::Utils::Argument;          # import 'rearrange()'
use Bio::EnsEMBL::Hive::Utils 'stringify';  # import 'stringify()'
use Bio::EnsEMBL::Hive::DBSQL::DBAdaptor;
54
use Bio::EnsEMBL::Hive::DBSQL::AnalysisJobAdaptor;
55
use Bio::EnsEMBL::Hive::Analysis;
56 57
use Bio::EnsEMBL::Hive::Extensions;

Leo Gordon's avatar
Leo Gordon committed
58 59
use base ('Bio::EnsEMBL::Hive::DependentOptions');

60

61 62
# ---------------------------[the following methods will be overridden by specific pipelines]-------------------------

63

Leo Gordon's avatar
Leo Gordon committed
64 65 66 67 68 69 70
=head2 default_options

    Description : Interface method that should return a hash of option_name->default_option_value pairs.
                  Please see existing PipeConfig modules for examples.

=cut

71 72 73
sub default_options {
    my ($self) = @_;
    return {
74 75
        'ensembl_cvs_root_dir'  => $self->o('ENV', 'ENSEMBL_CVS_ROOT_DIR'),     # it will make sense to set this variable if you are going to use ehive frequently
        'password'              => $self->o('ENV', 'ENSADMIN_PSW'),             # people will have to make an effort NOT to insert it into config files like .bashrc etc
76

77 78
        'host'                  => 'localhost',
        'pipeline_name'         => 'hive_generic',
Leo Gordon's avatar
Leo Gordon committed
79
        'hive_use_triggers'     => 0,                   # there have been a few cases of big pipelines misbehaving with triggers on, let's keep the default off.
80 81

        'pipeline_db'   => {
82
            -host   => $self->o('host'),
83 84 85
            -port   => 3306,
            -user   => 'ensadmin',
            -pass   => $self->o('password'),
86
            -dbname => $self->o('ENV', 'USER').'_'.$self->o('pipeline_name'),  # example of a linked definition (resolved via saturation)
87 88 89 90
        },
    };
}

91

Leo Gordon's avatar
Leo Gordon committed
92 93 94 95 96 97 98
=head2 pipeline_create_commands

    Description : Interface method that should return a list of command lines to be run in order to create and set up the pipeline database.
                  Please see existing PipeConfig modules for examples.

=cut

99
sub pipeline_create_commands {
100 101
    my $self    = shift @_;
    my $db_conn = shift @_ || 'pipeline_db';
102

103
    return ($self->o($db_conn, '-driver') eq 'sqlite')
104
        ? [
105
                # standard eHive tables, triggers and procedures:
106
            $self->db_connect_command($db_conn).' <'.$self->o('ensembl_cvs_root_dir').'/ensembl-hive/sql/tables.sqlite',
107
            $self->o('hive_use_triggers') ? ( $self->db_connect_command($db_conn).' <'.$self->o('ensembl_cvs_root_dir').'/ensembl-hive/sql/triggers.sqlite' ) : (),
108
            $self->db_connect_command($db_conn).' <'.$self->o('ensembl_cvs_root_dir').'/ensembl-hive/sql/procedures.sqlite',
109 110
        ]
        : [
111
            'mysql '.$self->dbconn_2_mysql($db_conn, 0)." -e 'CREATE DATABASE ".$self->o('pipeline_db', '-dbname')."'",
112

113
                # standard eHive tables, triggers, foreign_keys and procedures:
114
            $self->db_connect_command($db_conn).' <'.$self->o('ensembl_cvs_root_dir').'/ensembl-hive/sql/tables.sql',
115
            $self->o('hive_use_triggers') ? ( $self->db_connect_command($db_conn).' <'.$self->o('ensembl_cvs_root_dir').'/ensembl-hive/sql/triggers.mysql' ) : (),
116 117
            $self->db_connect_command($db_conn).' <'.$self->o('ensembl_cvs_root_dir').'/ensembl-hive/sql/foreign_keys.mysql',
            $self->db_connect_command($db_conn).' <'.$self->o('ensembl_cvs_root_dir').'/ensembl-hive/sql/procedures.mysql',
118
        ];
119 120
}

121

Leo Gordon's avatar
Leo Gordon committed
122 123 124 125 126 127 128 129
=head2 pipeline_wide_parameters

    Description : Interface method that should return a hash of pipeline_wide_parameter_name->pipeline_wide_parameter_value pairs.
                  The value doesn't have to be a scalar, can be any Perl structure now (will be stringified and de-stringified automagically).
                  Please see existing PipeConfig modules for examples.

=cut

130 131 132 133 134 135 136
sub pipeline_wide_parameters {
    my ($self) = @_;
    return {
        'pipeline_name'  => $self->o('pipeline_name'),       # name the pipeline to differentiate the submitted processes
    };
}

137

Leo Gordon's avatar
Leo Gordon committed
138 139 140 141 142 143 144
=head2 resource_classes

    Description : Interface method that should return a hash of resource_description_id->resource_description_hash.
                  Please see existing PipeConfig modules for examples.

=cut

145 146 147
sub resource_classes {
    my ($self) = @_;
    return {
148 149 150 151 152 153
## Old style:
#        1 => { -desc => 'default',  'LSF' => '' },
#        2 => { -desc => 'urgent',   'LSF' => '-q yesterday' },
## New style:
        'default' => { 'LSF' => '' },
        'urgent'  => { 'LSF' => '-q yesterday' },
154 155 156
    };
}

157

Leo Gordon's avatar
Leo Gordon committed
158 159 160 161 162 163 164
=head2 pipeline_analyses

    Description : Interface method that should return a list of hashes that define analysis bundled with corresponding jobs, dataflow and analysis_ctrl rules and resource_id.
                  Please see existing PipeConfig modules for examples.

=cut

165 166 167 168 169 170 171
sub pipeline_analyses {
    my ($self) = @_;
    return [
    ];
}


172 173 174 175 176 177 178 179 180 181 182 183 184
=head2 beekeeper_extra_cmdline_options

    Description : Interface method that should return a string with extra parameters that you want to be passed to beekeeper.pl

=cut

sub beekeeper_extra_cmdline_options {
    my ($self) = @_;

    return '';
}


185 186 187
# ---------------------------------[now comes the interfacing stuff - feel free to call but not to modify]--------------------


Leo Gordon's avatar
Leo Gordon committed
188
sub pre_options {
189 190
    my $self = shift @_;

Leo Gordon's avatar
Leo Gordon committed
191 192 193 194 195
    return {
        'help!' => '',
        'job_topup!' => '',
        'analysis_topup!' => '',
        'hive_driver' => '',
196
#        'hive_use_triggers' => '',
Leo Gordon's avatar
Leo Gordon committed
197
    };
198 199
}

200

Leo Gordon's avatar
Leo Gordon committed
201 202 203 204 205 206
=head2 dbconn_2_mysql

    Description : A convenience method used to stringify a connection-parameters hash into a parameter string that both mysql and beekeeper.pl can understand

=cut

207 208 209 210 211
sub dbconn_2_mysql {    # will save you a lot of typing
    my ($self, $db_conn, $with_db) = @_;

    return '--host='.$self->o($db_conn,'-host').' '
          .'--port='.$self->o($db_conn,'-port').' '
212 213
          .'--user="'.$self->o($db_conn,'-user').'" '
          .'--pass="'.$self->o($db_conn,'-pass').'" '
214
          .($with_db ? ($self->o($db_conn,'-dbname').' ') : '');
215 216
}

217 218 219 220 221 222 223 224 225 226

=head2 db_connect_command

    Description : A convenience method used to stringify a command to connect to the db OR pipe an sql file into it.

=cut

sub db_connect_command {
    my ($self, $db_conn) = @_;

227
    return ($self->o($db_conn, '-driver') eq 'sqlite')
228 229 230 231 232 233 234 235 236 237 238 239 240 241
        ? 'sqlite3 '.$self->o($db_conn, '-dbname')
        : 'mysql '.$self->dbconn_2_mysql($db_conn, 1);
}


=head2 db_execute_command

    Description : A convenience method used to stringify a command to connect to the db OR pipe an sql file into it.

=cut

sub db_execute_command {
    my ($self, $db_conn, $sql_command) = @_;

242
    return ($self->o($db_conn, '-driver') eq 'sqlite')
243 244 245 246 247
        ? 'sqlite3 '.$self->o($db_conn, '-dbname')." '$sql_command'"
        : 'mysql '.$self->dbconn_2_mysql($db_conn, 1)." -e '$sql_command'";
}


Leo Gordon's avatar
Leo Gordon committed
248 249 250 251 252 253
=head2 dbconn_2_url

    Description :  A convenience method used to stringify a connection-parameters hash into a 'url' that beekeeper.pl will undestand

=cut

254 255 256
sub dbconn_2_url {
    my ($self, $db_conn) = @_;

257
    return ($self->o($db_conn, '-driver') eq 'sqlite')
258 259
        ? $self->o($db_conn, '-driver').':///'.$self->o($db_conn,'-dbname')
        : $self->o($db_conn, '-driver').'://'.$self->o($db_conn,'-user').':'.$self->o($db_conn,'-pass').'@'.$self->o($db_conn,'-host').':'.$self->o($db_conn,'-port').'/'.$self->o($db_conn,'-dbname');
260 261
}

Leo Gordon's avatar
Leo Gordon committed
262 263 264 265 266
sub pipeline_url {
    my $self = shift @_;

    return $self->dbconn_2_url('pipeline_db'); # used to force vivification of the whole 'pipeline_db' structure (used in run() )
}
267

Leo Gordon's avatar
Leo Gordon committed
268 269 270 271 272 273 274 275 276 277 278 279 280
=head2 process_options

    Description : The method that does all the parameter parsing magic.
                  It is two-pass through the interface methods: first pass collects the options, second is intelligent substitution.

    Caller      : init_pipeline.pl or any other script that will drive this module.

    Note        : You can override parsing the command line bit by providing a hash as the argument to this method.
                  This hash should contain definitions of all the parameters you would otherwise be providing from the command line.
                  Useful if you are creating batches of hive pipelines using a script.

=cut

281
sub process_options {
Leo Gordon's avatar
Leo Gordon committed
282
    my $self = shift @_;
283

Leo Gordon's avatar
Leo Gordon committed
284 285 286
        # pre-patch definitely_used_options:
    $self->{'_extra_options'} = $self->load_cmdline_options( $self->pre_options() );
    $self->root()->{'pipeline_db'}{'-driver'} = $self->{'_extra_options'}{'hive_driver'} || 'mysql';
287

288
    $self->use_cases( [ 'pipeline_create_commands', 'pipeline_wide_parameters', 'resource_classes', 'pipeline_analyses', 'beekeeper_extra_cmdline_options', 'pipeline_url' ] );
Leo Gordon's avatar
Leo Gordon committed
289
    return $self->SUPER::process_options();
290 291
}

Leo Gordon's avatar
Leo Gordon committed
292

Leo Gordon's avatar
Leo Gordon committed
293 294 295 296 297 298 299 300
=head2 run

    Description : The method that uses the Hive/EnsEMBL API to actually create all the analyses, jobs, dataflow and control rules and resource descriptions.

    Caller      : init_pipeline.pl or any other script that will drive this module.

=cut

301
sub run {
302
    my $self  = shift @_;
Leo Gordon's avatar
Leo Gordon committed
303 304
    my $analysis_topup = $self->{'_extra_options'}{'analysis_topup'};
    my $job_topup      = $self->{'_extra_options'}{'job_topup'};
305

306
    unless($analysis_topup || $job_topup) {
307 308 309 310 311 312 313 314 315 316
        foreach my $cmd (@{$self->pipeline_create_commands}) {
            warn "Running the command:\n\t$cmd\n";
            if(my $retval = system($cmd)) {
                die "Return value = $retval, possibly an error\n";
            } else {
                warn "Done.\n\n";
            }
        }
    }

317
    my $hive_dba                     = Bio::EnsEMBL::Hive::DBSQL::DBAdaptor->new( %{$self->o('pipeline_db')} );
318
    my $resource_class_adaptor       = $hive_dba->get_ResourceClassAdaptor;
319
    
320 321 322 323 324 325 326 327 328 329
    unless($job_topup) {
        my $meta_container = $hive_dba->get_MetaContainer;
        warn "Loading pipeline-wide parameters ...\n";

        my $pipeline_wide_parameters = $self->pipeline_wide_parameters;
        while( my($meta_key, $meta_value) = each %$pipeline_wide_parameters ) {
            if($analysis_topup) {
                $meta_container->delete_key($meta_key);
            }
            $meta_container->store_key_value($meta_key, stringify($meta_value));
330
        }
331 332
        warn "Done.\n\n";

333 334 335
            # pre-load resource_class and resource_description tables:
        my $resource_description_adaptor    = $hive_dba->get_ResourceDescriptionAdaptor;
        warn "Loading the Resources ...\n";
336

337 338
        my $resource_classes_hash = $self->resource_classes;
        my @resource_classes_order = sort { ($b eq 'default') or -($a eq 'default') or ($a cmp $b) } keys %$resource_classes_hash; # put 'default' to the front
339
        my %seen_resource_name = ();
340 341
        foreach my $rc_id (@resource_classes_order) {
            my $mt2param = $resource_classes_hash->{$rc_id};
342

343
            my $rc_name = delete $mt2param->{-desc};
344
            if($rc_id!~/^\d+$/) {
345
                $rc_name  = $rc_id;
346
                $rc_id = undef;
347 348
            }

349
            if(!$rc_name or $seen_resource_name{lc($rc_name)}++) {
350 351 352
                die "Every resource has to have a unique description, please fix the PipeConfig file";
            }

353 354
            my $rc = $resource_class_adaptor->create_new(
                defined($rc_id) ? (-DBID   => $rc_id) : (),
355
                -NAME   => $rc_name,
356
            );
357
            $rc_id = $rc->dbID();
358

359
            warn "Creating resource_class $rc_name($rc_id).\n";
360

361 362
            while( my($meadow_type, $xparams) = each %$mt2param ) {
                $resource_description_adaptor->create_new(
363 364 365
                    -RESOURCE_CLASS_ID  => $rc_id,
                    -MEADOW_TYPE        => $meadow_type,
                    -PARAMETERS         => $xparams,
366 367
                );
            }
368
        }
369 370 371 372
        unless($seen_resource_name{'default'}) {
            warn "\tNB:You don't seem to have 'default' as one of the resource classes (forgot to inherit from SUPER::resource_classes ?) - creating one for you\n";
            $resource_class_adaptor->create_new(-NAME => 'default');
        }
373
        warn "Done.\n\n";
374 375 376
    }

    my $analysis_adaptor             = $hive_dba->get_AnalysisAdaptor;
377
    my $analysis_stats_adaptor       = $hive_dba->get_AnalysisStatsAdaptor;
378

379 380
    my %seen_logic_name = ();

381
    foreach my $aha (@{$self->pipeline_analyses}) {
382 383
        my ($logic_name, $module, $parameters_hash, $input_ids, $blocked, $batch_size, $hive_capacity, $failed_job_tolerance, $max_retry_count, $can_be_empty, $rc_id, $rc_name, $priority) =
             rearrange([qw(logic_name module parameters input_ids blocked batch_size hive_capacity failed_job_tolerance max_retry_count can_be_empty rc_id rc_name priority)], %$aha);
384

385 386
        unless($logic_name) {
            die "logic_name' must be defined in every analysis";
387 388
        }

389 390 391
        if($seen_logic_name{$logic_name}++) {
            die "an entry with logic_name '$logic_name' appears at least twice in the configuration file, can't continue";
        }
392

393 394
        my $analysis = $analysis_adaptor->fetch_by_logic_name($logic_name);
        if( $analysis ) {
395

396 397 398 399
            if($analysis_topup) {
                warn "Skipping creation of already existing analysis '$logic_name'.\n";
                next;
            }
400 401 402

        } else {

403 404 405 406 407
            if($job_topup) {
                die "Could not fetch analysis '$logic_name'";
            }

            warn "Creating analysis '$logic_name'.\n";
408

409 410
            if($rc_id) {
                warn "(-rc_id => $rc_id) syntax is deprecated, please start using (-rc_name => 'your_resource_class_name')";
411 412 413
            } else {
                $rc_name ||= 'default';
                my $rc = $resource_class_adaptor->fetch_by_name($rc_name ) or die "Could not fetch resource with name '$rc_name', please check that resource_classes() method of your PipeConfig either contain it or inherit from the parent class";
414 415 416
                $rc_id = $rc->dbID();
            }

417
            $analysis = Bio::EnsEMBL::Hive::Analysis->new(
418 419 420 421
                -logic_name         => $logic_name,
                -module             => $module,
                -parameters         => stringify($parameters_hash || {}),    # have to stringify it here, because Analysis code is external wrt Hive code
                -resource_class_id  => $rc_id,
422 423 424
            );
            $analysis_adaptor->store($analysis);

425
            $analysis_stats_adaptor->create_new_for_analysis_id($analysis->dbID);
426

427 428 429 430
            my $stats = $analysis->stats();
            $stats->batch_size( $batch_size )                       if(defined($batch_size));
            $stats->hive_capacity( $hive_capacity )                 if(defined($hive_capacity));
            $stats->failed_job_tolerance( $failed_job_tolerance )   if(defined($failed_job_tolerance));
431
            $stats->max_retry_count( $max_retry_count )             if(defined($max_retry_count));
432
            $stats->can_be_empty( $can_be_empty )                   if(defined($can_be_empty));
433
            $stats->priority( $priority )                           if(defined($priority));
434
            $stats->status($blocked ? 'BLOCKED' : 'READY');         # be careful, as this "soft" way of blocking may be accidentally unblocked by deep sync
435 436
            $stats->update();
        }
437 438

            # now create the corresponding jobs (if there are any):
439
        foreach my $input_id_hash (@{$input_ids || []}) {
440 441 442 443

            Bio::EnsEMBL::Hive::DBSQL::AnalysisJobAdaptor->CreateNewJob(
                -input_id       => $input_id_hash,  # input_ids are now centrally stringified in the AnalysisJobAdaptor
                -analysis       => $analysis,
444
                -input_job_id   => undef, # these jobs are created by the initialization script, not by another job
445 446 447 448
            );
        }
    }

449
    unless($job_topup) {
450

451 452 453 454
            # Now, run separately through the already created analyses and link them together:
            #
        my $ctrl_rule_adaptor            = $hive_dba->get_AnalysisCtrlRuleAdaptor;
        my $dataflow_rule_adaptor        = $hive_dba->get_DataflowRuleAdaptor;
455

456 457 458
        foreach my $aha (@{$self->pipeline_analyses}) {
            my ($logic_name, $wait_for, $flow_into) =
                 rearrange([qw(logic_name wait_for flow_into)], %$aha);
459

460
            my $analysis = $analysis_adaptor->fetch_by_logic_name($logic_name);
461

462 463 464 465
            $wait_for ||= [];
            $wait_for   = [ $wait_for ] unless(ref($wait_for) eq 'ARRAY'); # force scalar into an arrayref

                # create control rules:
466 467
            foreach my $condition_url (@$wait_for) {
                if(my $condition_analysis = $analysis_adaptor->fetch_by_logic_name_or_url($condition_url)) {
468

469 470 471 472 473
                    my $c_rule = Bio::EnsEMBL::Hive::AnalysisCtrlRule->new(
                            -condition_analysis_url => $condition_url,
                            -ctrled_analysis_id     => $analysis->dbID,
                    );
                    $ctrl_rule_adaptor->store( $c_rule, 1 );
474

475
                    warn $c_rule->toString."\n";
476
                } else {
477
                    die "Could not fetch analysis '$condition_url' to create a control rule";
478
                }
479 480
            }

481 482
            $flow_into ||= {};
            $flow_into   = { 1 => $flow_into } unless(ref($flow_into) eq 'HASH'); # force non-hash into a hash
483

484
            my %group_tag_to_funnel_dataflow_rule_id = ();
485

486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503
            my $semaphore_sign = '->';

            my @all_branch_tags = keys %$flow_into;
            foreach my $branch_tag ((grep {/^[A-Z]$semaphore_sign/} @all_branch_tags), (grep {/$semaphore_sign[A-Z]$/} @all_branch_tags), (grep {!/$semaphore_sign/} @all_branch_tags)) {

                my ($branch_name_or_code, $group_role, $group_tag);

                if($branch_tag=~/^([A-Z])$semaphore_sign(-?\w+)$/) {
                    ($branch_name_or_code, $group_role, $group_tag) = ($2, 'funnel', $1);
                } elsif($branch_tag=~/^(-?\w+)$semaphore_sign([A-Z])$/) {
                    ($branch_name_or_code, $group_role, $group_tag) = ($1, 'fan', $2);
                } elsif($branch_tag=~/^(-?\w+)$/) {
                    ($branch_name_or_code, $group_role, $group_tag) = ($1, '');
                } elsif($branch_tag=~/:/) {
                    die "Please use newer '2${semaphore_sign}A' and 'A${semaphore_sign}1' notation instead of '2:1' and '1'\n";
                } else {
                    die "Error parsing the group tag '$branch_tag'\n";
                }
504

505 506 507 508 509 510 511 512 513 514
                my $funnel_dataflow_rule_id = undef;    # NULL by default

                if($group_role eq 'fan') {
                    unless($funnel_dataflow_rule_id = $group_tag_to_funnel_dataflow_rule_id{$group_tag}) {
                        die "No funnel dataflow_rule defined for group '$group_tag'\n";
                    }
                }

                my $heirs = $flow_into->{$branch_tag};
                $heirs = [ $heirs ] unless(ref($heirs)); # force scalar into an arrayref first
515
                $heirs = { map { ($_ => undef) } @$heirs } if(ref($heirs) eq 'ARRAY'); # now force it into a hash if it wasn't
516

517 518 519
                while(my ($heir_url, $input_id_template_list) = each %$heirs) {
                    
                    $input_id_template_list = [ $input_id_template_list ] unless(ref($input_id_template_list) eq 'ARRAY');  # allow for more than one template per analysis
520

521
                    foreach my $input_id_template (@$input_id_template_list) {
522

523
                        my $heir_analysis = $analysis_adaptor->fetch_by_logic_name_or_url($heir_url);
524

525 526 527 528 529 530 531 532 533 534
                        my $df_rule = Bio::EnsEMBL::Hive::DataflowRule->new(
                            -from_analysis              => $analysis,
                            -to_analysis_url            => $heir_url,
                            -branch_code                => $dataflow_rule_adaptor->branch_name_2_code( $branch_name_or_code ),
                            -input_id_template          => $input_id_template,
                            -funnel_dataflow_rule_id    => $funnel_dataflow_rule_id,
                        );
                        $dataflow_rule_adaptor->store( $df_rule, 1 );

                        warn $df_rule->toString."\n";
535 536 537 538 539

                        if($group_role eq 'funnel') {
                            if($group_tag_to_funnel_dataflow_rule_id{$group_tag}) {
                                die "More than one funnel dataflow_rule defined for group '$group_tag'\n";
                            } else {
540
                                $group_tag_to_funnel_dataflow_rule_id{$group_tag} = $df_rule->dbID();
541 542
                            }
                        }
543 544 545
                    } # /for all templates
                } # /for all heirs
            } # /for all branch_tags
546 547 548 549 550 551
        }
    }

    my $url = $self->dbconn_2_url('pipeline_db');

    print "\n\n\tPlease run the following commands:\n\n";
552 553 554 555
    print "  beekeeper.pl -url $url -sync\t\t\t# (synchronize the Hive - should always be done before [re]starting a pipeline)\n\n";
    print "  beekeeper.pl -url $url ".$self->beekeeper_extra_cmdline_options()." -loop\t\t# (run the pipeline in automatic mode)\n";
    print "(OR)\n";
    print "  beekeeper.pl -url $url ".$self->beekeeper_extra_cmdline_options()." -run \t\t# (run one step of the pipeline - useful for debugging/learning)\n";
Leo Gordon's avatar
Leo Gordon committed
556
    print "(OR)\n";
557
    print "  runWorker.pl -url $url ".$self->beekeeper_extra_cmdline_options()."      \t\t# (run exactly one Worker locally - useful for debugging/learning)\n";
Leo Gordon's avatar
Leo Gordon committed
558 559

    print "\n\n\tTo connect to your pipeline database use the following line:\n\n";
560
    print "  ".$self->db_connect_command('pipeline_db')."\n\n";
561 562 563
}

1;
Leo Gordon's avatar
Leo Gordon committed
564