init_pipeline.pl 7.46 KB
Newer Older
Leo Gordon's avatar
Leo Gordon committed
1
#!/usr/bin/env perl
2
3
4
5

# A generic loader of hive pipelines

use strict;
Leo Gordon's avatar
Leo Gordon committed
6
use warnings;
7
8
use DBI;
use Getopt::Long;
Leo Gordon's avatar
Leo Gordon committed
9
10
use Data::Dumper;                   # NB: in this case it is not for testing but for actual data structure stringification
use Bio::EnsEMBL::Utils::Argument;  # import 'rearrange()'
11
use Bio::EnsEMBL::Hive::DBSQL::DBAdaptor;
12
use Bio::EnsEMBL::Hive::Extensions;
13

14
15
16
17
18
19
sub dbconn_2_url {
    my $db_conn = shift @_;

    return "mysql://$db_conn->{-user}:$db_conn->{-pass}\@$db_conn->{-host}:$db_conn->{-port}/$db_conn->{-dbname}";
}

20
21
sub main {

22
23
    my $topup_flag  = 0;  # do not run initial scripts and only add new analyses+jobs (ignore the fetchable analyses)
    my $config_file = '';
24
25

    GetOptions(
26
               'topup=i'    => \$topup_flag,
27
28
29
30
31
               'conf=s'     => \$config_file,
    );

    unless($config_file and (-f $config_file)) {
        warn "Please supply a valid pipeline configuration file using '-conf' option\n";
Leo Gordon's avatar
Leo Gordon committed
32
        warn "Usage example:\n\t$0 -conf ../docs/long_mult_pipeline.conf\n";
33
34
35
        exit(1);
    }

36
    my $self = bless ( do $config_file );
37

38
    if(!$topup_flag && $self->{-pipeline_create_commands}) {
39
40
41
42
43
44
45
        foreach my $cmd (@{$self->{-pipeline_create_commands}}) {
            warn "Running the command:\n\t$cmd\n";
            if(my $retval = system($cmd)) {
                die "Return value = $retval, possibly an error\n";
            } else {
                warn "Done.\n\n";
            }
46
47
48
        }
    }

Leo Gordon's avatar
Leo Gordon committed
49
50
    my $hive_dba                     = new Bio::EnsEMBL::Hive::DBSQL::DBAdaptor(%{$self->{-pipeline_db}});
    
51
52
53
54
55
56
57
58
59
60
61
62
63
64
    if($self->{-pipeline_wide_parameters}) {
        my $meta_container = $hive_dba->get_MetaContainer;

        warn "Loading pipeline-wide parameters ...\n";

        while( my($meta_key, $meta_value) = each %{$self->{-pipeline_wide_parameters}} ) {
            if($topup_flag) {
                $meta_container->delete_key($meta_key);
            }
            $meta_container->store_key_value($meta_key, $meta_value);
        }

        warn "Done.\n\n";
    }
65
66
67

        # pre-load the resource_description table
    if($self->{-resource_classes}) {
68
69
        my $resource_description_adaptor = $hive_dba->get_ResourceDescriptionAdaptor;

70
71
72
73
74
75
76
77
78
79
80
81
        warn "Loading the ResourceDescriptions ...\n";

        while( my($rc_id, $mt2param) = each %{$self->{-resource_classes}} ) {
            my $description = delete $mt2param->{-desc};
            while( my($meadow_type, $xparams) = each %$mt2param ) {
                $resource_description_adaptor->create_new(
                    -RC_ID       => $rc_id,
                    -MEADOW_TYPE => $meadow_type,
                    -PARAMETERS  => $xparams,
                    -DESCRIPTION => $description,
                );
            }
Leo Gordon's avatar
Leo Gordon committed
82
        }
83
84

        warn "Done.\n\n";
Leo Gordon's avatar
Leo Gordon committed
85
86
87
    }

    my $analysis_adaptor             = $hive_dba->get_AnalysisAdaptor;
88
89

        # tune Data::Dumper module to produce the output we want:
90
91
    $Data::Dumper::Indent     = 0;  # we want everything on one line
    $Data::Dumper::Terse      = 1;  # and we want it without dummy variable names
92
    $Data::Dumper::Sortkeys   = 1;  # make stringification more deterministic
93
94

    foreach my $aha (@{$self->{-pipeline_analyses}}) {
Leo Gordon's avatar
Leo Gordon committed
95
96
        my ($logic_name, $module, $parameters, $input_ids, $blocked, $batch_size, $hive_capacity, $rc_id) =
             rearrange([qw(logic_name module parameters input_ids blocked batch_size hive_capacity rc_id)], %$aha);
97
98
99
100
101

        if($topup_flag and $analysis_adaptor->fetch_by_logic_name($logic_name)) {
            warn "Skipping already existing analysis '$logic_name'\n";
            next;
        }
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118

        warn "Creating '$logic_name'...\n";

        my $analysis = Bio::EnsEMBL::Analysis->new (
            -db              => '',
            -db_file         => '',
            -db_version      => '1',
            -logic_name      => $logic_name,
            -module          => $module,
            -parameters      => Dumper($parameters),
        );

        $analysis_adaptor->store($analysis);

        my $stats = $analysis->stats();
        $stats->batch_size( $batch_size )       if(defined($batch_size));

Leo Gordon's avatar
Leo Gordon committed
119
120
# ToDo: hive_capacity for some analyses is set to '-1' (i.e. "not limited")
# Do we want this behaviour BY DEFAULT?
121
122
        $stats->hive_capacity( $hive_capacity ) if(defined($hive_capacity));

Leo Gordon's avatar
Leo Gordon committed
123
124
        $stats->rc_id( $rc_id ) if(defined($rc_id));

125
126
127
128
129
130
131
            # some analyses will be waiting for human intervention in blocked state:
        $stats->status($blocked ? 'BLOCKED' : 'READY');
        $stats->update();

            # now create the corresponding jobs (if there are any):
        foreach my $input_id (@$input_ids) {

132
            Bio::EnsEMBL::Hive::DBSQL::AnalysisJobAdaptor->CreateNewJob(
133
134
                -input_id       => Dumper($input_id),
                -analysis       => $analysis,
135
                -input_job_id   => 0, # because these jobs are created by the initialization script, not by another job
136
137
            );
        }
138
139
140
141
    }

        # Now, run separately through the already created analyses and link them together:
        #
Leo Gordon's avatar
Leo Gordon committed
142
143
    my $ctrl_rule_adaptor            = $hive_dba->get_AnalysisCtrlRuleAdaptor;
    my $dataflow_rule_adaptor        = $hive_dba->get_DataflowRuleAdaptor;
144
145

    foreach my $aha (@{$self->{-pipeline_analyses}}) {
Leo Gordon's avatar
Leo Gordon committed
146
147
        my ($logic_name, $wait_for, $flow_into) =
             rearrange([qw(logic_name wait_for flow_into)], %$aha);
148
149

        my $analysis = $analysis_adaptor->fetch_by_logic_name($logic_name);
150
151
152

            # create control rules:
        foreach my $condition_logic_name (@$wait_for) {
153
154
155
156
157
158
159
160
            if(my $condition_analysis = $analysis_adaptor->fetch_by_logic_name($condition_logic_name)) {
                $ctrl_rule_adaptor->create_rule( $condition_analysis, $analysis);
                warn "Created Control rule: $condition_logic_name -| $logic_name\n";
            } else {
                die "Could not fetch analysis '$condition_logic_name' to create a control rule";
            }
        }

161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
        if(req($flow_into) eq 'HASH') { # branched format:
            foreach my $branch_code (sort {$a <=> $b} keys %$flow_into) {
                foreach my $heir_logic_name (@{$flow_into->{$branch_code}}) {

                    if(my $heir_analysis = $analysis_adaptor->fetch_by_logic_name($heir_logic_name)) {
                        $dataflow_rule_adaptor->create_rule( $analysis, $heir_analysis, $branch_code);
                        warn "Created DataFlow rule: [$branch_code] $logic_name -> $heir_logic_name\n";
                    } else {
                        die "Could not fetch analysis '$heir_logic_name' to create a dataflow rule";
                    }
                }
            }
        } elsif(ref($flow_into) eq 'ARRAY') {   # array format (deprecated)

            foreach my $heir (@$flow_into) {
                my ($heir_logic_name, $branch_code) = (ref($heir) eq 'ARRAY') ? (@$heir, 1) : ($heir, 1);

                if(my $heir_analysis = $analysis_adaptor->fetch_by_logic_name($heir_logic_name)) {
                    $dataflow_rule_adaptor->create_rule( $analysis, $heir_analysis, $branch_code);
                    warn "Created DataFlow rule: [$branch_code] $logic_name -> $heir_logic_name\n";
                } else {
                    die "Could not fetch analysis '$heir_logic_name' to create a dataflow rule";
                }
184
            }
185
186
187
        }
    }

188
189
    my $url = dbconn_2_url($self->{-pipeline_db});

190
    print "\n\n\tPlease run the following commands:\n\n";
191
192
    print "  beekeeper.pl -url $url -sync\n";
    print "  beekeeper.pl -url $url -loop\n";
193
194
195
196
}

main();