ApplyToDatabases_conf.pm 3.63 KB
Newer Older
1 2 3 4
=pod 

=head1 NAME

5
    Bio::EnsEMBL::Hive::PipeConfig::ApplyToDatabases_conf
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22

=head1 SYNOPSIS

    init_pipeline.pl Bio::EnsEMBL::Hive::PipeConfig::ApplyToDatabases_conf -password <your_password>

    init_pipeline.pl Bio::EnsEMBL::Hive::PipeConfig::ApplyToDatabases_conf -password <your_password> -only_databases '%'

=head1 DESCRIPTION  

    This is an example framework to run queries against databases whose names have been fetched from server:

    Analysis_1: JobFactory.pm is used to turn the list of databases on a particular mysql instance into jobs

    these jobs are sent down the branch #2 into the second analysis

    Analysis_2: Use SqlCmd.pm to run queries directly or another JobFactory.pm if you need a further fan on tables.

23 24
=head1 LICENSE

25 26
    Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
    Copyright [2016] EMBL-European Bioinformatics Institute
27 28 29 30 31 32 33 34 35 36

    Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
    You may obtain a copy of the License at

         http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software distributed under the License
    is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and limitations under the License.

37 38
=head1 CONTACT

39
  Please subscribe to the Hive mailing list:  http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users  to discuss Hive-related questions or to be notified of our updates
40 41 42

=cut

43

44 45 46 47 48 49 50 51 52
package Bio::EnsEMBL::Hive::PipeConfig::ApplyToDatabases_conf;

use strict;
use warnings;
use base ('Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf');  # All Hive databases configuration files should inherit from HiveGeneric, directly or indirectly

sub default_options {
    my ($self) = @_;
    return {
53
        %{ $self->SUPER::default_options() },               # inherit other stuff from the base class
54 55 56

        'pipeline_name' => 'apply_to_databases',            # name used by the beekeeper to prefix job names on the farm

57 58
        'source_server1' => 'mysql://ensadmin:'.$self->o('password').'@127.0.0.1:3306/',
        'source_server2' => 'mysql://ensadmin:'.$self->o('password').'@127.0.0.1:2914/',
59

60
        'only_databases'   => '%\_core\_%',                 # use '%' to get a list of all available databases
61 62 63 64 65 66 67 68 69
    };
}

sub pipeline_analyses {
    my ($self) = @_;
    return [
        {   -logic_name => 'get_databases',
            -module     => 'Bio::EnsEMBL::Hive::RunnableDB::JobFactory',
            -parameters => {
70 71
                'inputquery'   => q{SHOW DATABASES LIKE "}.$self->o('only_databases').q{"},
                'column_names' => [ 'dbname' ],
72 73
            },
            -input_ids => [
74 75
                { 'db_conn' => $self->o('source_server1') },
                { 'db_conn' => $self->o('source_server2') },
76 77
            ],
            -flow_into => {
78 79
                2 => { 'run_sql' => { 'db_conn' => '#db_conn##dbname#' },
                }
80 81 82
            },
        },

83
        {   -logic_name    => 'run_sql',
84 85 86
            -module        => 'Bio::EnsEMBL::Hive::RunnableDB::Dummy',  # use SqlCmd.pm to run your query or another JobFactory.pm to make another fan on table names
            -parameters    => {
            },
87
            -analysis_capacity => 10,       # allow several workers to perform identical tasks in parallel
88 89 90 91 92 93 94 95 96
            -input_ids     => [
                # (jobs for this analysis will be flown_into via branch-2 from 'get_databases' jobs above)
            ],
        },
    ];
}

1;