seed_pipeline.pl 6.06 KB
Newer Older
1 2 3 4 5
#!/usr/bin/env perl

use strict;
use warnings;

6 7 8 9
    # Finding out own path in order to reference own components (including own modules):
use Cwd            ();
use File::Basename ();
BEGIN {
10
    $ENV{'EHIVE_ROOT_DIR'} ||= File::Basename::dirname( File::Basename::dirname( Cwd::realpath($0) ) );
11
    unshift @INC, $ENV{'EHIVE_ROOT_DIR'}.'/modules';
12 13
}

14

15
use Getopt::Long;
16
use Bio::EnsEMBL::Hive::AnalysisJob;
17 18
use Bio::EnsEMBL::Hive::DBSQL::DBAdaptor;
use Bio::EnsEMBL::Hive::DBSQL::AnalysisJobAdaptor;
19
use Bio::EnsEMBL::Hive::Utils ('destringify', 'stringify', 'script_usage');
20

21 22 23 24 25 26 27 28 29 30 31 32
sub show_seedable_analyses {
    my ($hive_dba) = @_;

    my $analyses    = $hive_dba->get_AnalysisAdaptor->fetch_all();
    my $incoming    = $hive_dba->get_DataflowRuleAdaptor->fetch_HASHED_FROM_to_analysis_url_TO_dataflow_rule_id();
    my $job_adaptor = $hive_dba->get_AnalysisJobAdaptor;

    print "\nYou haven't specified neither -logic_name nor -analysis_id of the analysis being seeded.\n";
    print "\nSeedable analyses without incoming dataflow:\n";
    foreach my $analysis (@$analyses) {
        my $logic_name = $analysis->logic_name;
        unless($incoming->{$logic_name}) {
33
            my $analysis_id = $analysis->dbID;
34
            my ($example_job) = @{ $job_adaptor->fetch_some_by_analysis_id_limit( $analysis_id, 1 ) };
35
            print "\t$logic_name ($analysis_id)\t\t".($example_job ? "Example input_id:   '".$example_job->input_id."'" : "[not populated yet]")."\n";
36 37
        }
    }
38 39
}

40 41

sub main {
42
    my ($url, $reg_conf, $reg_type, $reg_alias, $nosqlvc, $analysis_id, $logic_name, $input_id);
43 44 45

    GetOptions(
                # connect to the database:
46 47 48 49 50 51
            'url=s'                      => \$url,
            'reg_conf|regfile=s'         => \$reg_conf,
            'reg_type=s'                 => \$reg_type,
            'reg_alias|regname=s'        => \$reg_alias,
            'nosqlvc=i'                  => \$nosqlvc,      # using "=i" instead of "!" for consistency with scripts where it is a propagated option

52 53 54 55 56 57 58 59 60 61

                # identify the analysis:
            'analysis_id=i'         => \$analysis_id,
            'logic_name=s'          => \$logic_name,

                # specify the input_id (as a string):
            'input_id=s'            => \$input_id,
    );

    my $hive_dba;
62 63 64 65 66 67 68 69
    if($url or $reg_alias) {
        $hive_dba = Bio::EnsEMBL::Hive::DBSQL::DBAdaptor->new(
                -url                            => $url,
                -reg_conf                       => $reg_conf,
                -reg_type                       => $reg_type,
                -reg_alias                      => $reg_alias,
                -no_sql_schema_version_check    => $nosqlvc,
        );
70 71 72
    } else {
        warn "\nERROR: Connection parameters (url or reg_conf+reg_alias) need to be specified\n";
        script_usage(1);
73 74
    }

75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
    my $analysis_adaptor = $hive_dba->get_AnalysisAdaptor;
    my $analysis; 
    if($logic_name) {
        $analysis = $analysis_adaptor->fetch_by_logic_name( $logic_name )
            or die "Could not fetch analysis '$logic_name'";
    } elsif($analysis_id) {
        $analysis = $analysis_adaptor->fetch_by_dbID( $analysis_id )
            or die "Could not fetch analysis with dbID='$analysis_id'";
    } else {
        show_seedable_analyses($hive_dba);
        exit(0);
    }

    unless($input_id) {
        $input_id = '{}';
        warn "Since -input_id has not been set, assuming input_id='$input_id'\n";
    }
92 93 94 95 96 97 98
    my $dinput_id = destringify($input_id);
    if (!ref($dinput_id)) {
        die "'$input_id' cannot be eval'ed, likely because of a syntax error\n";
    }
    if (ref($dinput_id) ne 'HASH') {
        die "'$input_id' is not a hash\n";
    }
99

100 101 102
    my $job = Bio::EnsEMBL::Hive::AnalysisJob->new(
        -prev_job_id    => undef,   # this job has been created by the initialization script, not by another job
        -analysis_id    => $analysis->dbID,
103
        -input_id       => $dinput_id,      # Make sure all job creations undergo re-stringification to avoid alternative "spellings" of the same input_id hash
104 105 106
    );

    my ($job_id) = @{ $hive_dba->get_AnalysisJobAdaptor->store_jobs_and_adjust_counters( [ $job ] ) };
107

108
    if($job_id) {
109

110 111 112 113 114 115
        print "Job $job_id [ ".$analysis->logic_name.'('.$analysis->dbID.")] : '$input_id'\n";

    } else {

        warn "Could not create job '$input_id' (it may have been created already)\n";
    }
116
}
117

118
main();
119 120 121 122 123 124 125 126 127 128 129

__DATA__

=pod

=head1 NAME

    seed_pipeline.pl

=head1 SYNOPSIS

130
    seed_pipeline.pl {-url <url> | -reg_conf <reg_conf> [-reg_type <reg_type>] -reg_alias <reg_alias>} [ {-analysis_id <analysis_id> | -logic_name <logic_name>} [ -input_id <input_id> ] ]
131 132 133 134 135

=head1 DESCRIPTION

    seed_pipeline.pl is a generic script that is used to create {initial or top-up} jobs for hive pipelines

136
=head1 USAGE EXAMPLES
137

138 139 140 141 142 143 144 145 146
        # find out which analyses may need seeding (with an example input_id):

    seed_pipeline.pl -url "mysql://ensadmin:${ENSADMIN_PSW}@localhost:3306/lg4_long_mult"


        # seed one job into the "start" analysis:

    seed_pipeline.pl -url "mysql://ensadmin:${ENSADMIN_PSW}@localhost:3306/lg4_long_mult" \
                     -logic_name start -input_id '{"a_multiplier" => 2222222222, "b_multiplier" => 3434343434}'
147

148 149
=head1 LICENSE

150 151
    Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
    Copyright [2016] EMBL-European Bioinformatics Institute
152 153 154 155 156 157 158 159 160 161

    Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
    You may obtain a copy of the License at

         http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software distributed under the License
    is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and limitations under the License.

162 163
=head1 CONTACT

164
    Please subscribe to the Hive mailing list:  http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users  to discuss Hive-related questions or to be notified of our updates
165 166

=cut
167