TableDumperZipper_conf.pm 5.38 KB
Newer Older
1 2 3 4
=pod 

=head1 NAME

5
    Bio::EnsEMBL::Hive::PipeConfig::TableDumperZipper_conf
6 7 8

=head1 SYNOPSIS

9
    init_pipeline.pl Bio::EnsEMBL::Hive::PipeConfig::TableDumperZipper_conf -password $ENSADMIN_PSW -db_conn "mysql://ensadmin:${ENSADMIN_PSW}@localhost/lg4_long_mult"
10

11 12 13 14 15
    seed_pipeline.pl -url "mysql://ensadmin:${ENSADMIN_PSW}@localhost:3306/lg4_zip_tables" -logic_name find_tables -input_id "{'only_tables' => '%_result'}"

    runWorker.pl -url mysql://ensadmin:${ENSADMIN_PSW}@localhost:3306/lg4_zip_tables
    runWorker.pl -url mysql://ensadmin:${ENSADMIN_PSW}@localhost:3306/lg4_zip_tables
    runWorker.pl -url mysql://ensadmin:${ENSADMIN_PSW}@localhost:3306/lg4_zip_tables
16 17 18

=head1 DESCRIPTION  

19
    This is an example pipeline put together from three analyses (with pre-existing Runnables) :
20 21 22

    Analysis_1: JobFactory.pm is used to turn the list of tables of the given database into jobs

Leo Gordon's avatar
Leo Gordon committed
23
        these jobs are sent down the branch #2 into the second analysis
24

25
    Analysis_2: SystemCmd.pm is used to dump individual tables; each flows via branch #1 into Analysis_3
26

27
    Analysis_3: another instance of SystemCmd.pm is used to compress an individual table dump file
28

29 30
=head1 LICENSE

31
    Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Matthieu Muffato's avatar
Matthieu Muffato committed
32
    Copyright [2016-2018] EMBL-European Bioinformatics Institute
33 34 35 36 37 38 39 40 41 42 43 44

    Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
    You may obtain a copy of the License at

         http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software distributed under the License
    is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and limitations under the License.

=head1 CONTACT

45
    Please subscribe to the Hive mailing list:  http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users  to discuss Hive-related questions or to be notified of our updates
46

47 48
=cut

49

Leo Gordon's avatar
Leo Gordon committed
50
package Bio::EnsEMBL::Hive::PipeConfig::TableDumperZipper_conf;
51 52 53

use strict;
use warnings;
Leo Gordon's avatar
Leo Gordon committed
54

55 56
use base ('Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf');  # All Hive databases configuration files should inherit from HiveGeneric, directly or indirectly

Leo Gordon's avatar
Leo Gordon committed
57

58 59 60 61
=head2 pipeline_wide_parameters

    Description : Interface method that should return a hash of pipeline_wide_parameter_name->pipeline_wide_parameter_value pairs.
                  The value doesn't have to be a scalar, can be any Perl structure (will be stringified and de-stringified automagically).
Leo Gordon's avatar
Leo Gordon committed
62 63 64

=cut

65
sub pipeline_wide_parameters {
66
    my ($self) = @_;
67 68
    return {
        %{$self->SUPER::pipeline_wide_parameters},          # here we inherit anything from the base class, then add our own stuff
69

70 71 72 73 74 75
        'db_conn'       => $self->o('db_conn'),
        'dumping_flags' => '-t',    # '-t' for "dump without table definition" or '' for "dump with table definition"
        'directory'     => '.',     # directory where both source and target files are located
        'matching_op'   => 'LIKE',  # 'LIKE' or 'NOT LIKE'
        'only_tables'   => '%',     # any wildcard understood by MySQL
    };
76 77
}

78

Leo Gordon's avatar
Leo Gordon committed
79 80 81 82 83
=head2 pipeline_analyses

    Description : Implements pipeline_analyses() interface method of Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf that defines the structure of the pipeline: analyses, jobs, rules, etc.
                  Here it defines two analyses:

84 85 86 87
                    * 'find_tables'         generates a list of tables whose names match the pattern #only_tables#
                      Each job of this analysis will dataflow (create jobs) via branch #2 into 'table_dumper' analysis.

                    * 'table_dumper'        dumps table contents (possibly with table definition) and flows via branch #1 into 'file_compressor' analysis.
Leo Gordon's avatar
Leo Gordon committed
88

89
                    * 'file_compressor'     compresses the dump file
Leo Gordon's avatar
Leo Gordon committed
90 91 92

=cut

93 94 95
sub pipeline_analyses {
    my ($self) = @_;
    return [
96
        {   -logic_name => 'find_tables',
97 98
            -module     => 'Bio::EnsEMBL::Hive::RunnableDB::JobFactory',
            -parameters => {
99 100 101 102 103 104 105 106 107 108 109 110 111
                'inputquery'    => 'SELECT table_name FROM information_schema.tables WHERE table_schema = "#mysql_dbname:db_conn#" AND table_name #matching_op# "#only_tables#"',
            },
            -flow_into => {
#                2 => { 'table_dumper' => { 'table_name' => '#table_name#', 'db_conn' => '#db_conn#' }, },
                2 => [ 'table_dumper' ],
            },
        },

        {   -logic_name    => 'table_dumper',
            -module        => 'Bio::EnsEMBL::Hive::RunnableDB::SystemCmd',
            -parameters    => {
                'filename'   => '#directory#/#table_name#.sql',
                'cmd'        => 'mysqldump #mysql_conn:db_conn# #dumping_flags# #table_name# >#filename#',
112
            },
113
            -analysis_capacity => 2,
114
            -flow_into => {
115 116
#                1 => { 'file_compressor' => { 'filename' => '#filename#' }, },
                1 => [ 'file_compressor' ],
117 118 119
            },
        },

120
        {   -logic_name    => 'file_compressor',
121 122
            -module        => 'Bio::EnsEMBL::Hive::RunnableDB::SystemCmd',
            -parameters    => {
123 124
                'filename'   => '#directory#/#table_name#.sql',
                'cmd'        => 'gzip #filename#',
125
            },
126
            -analysis_capacity => 8,
127 128 129 130 131 132
        },
    ];
}

1;