TableDumperZipper_conf.pm 4.94 KB
Newer Older
1
2
3
4
5

=pod 

=head1 NAME

Leo Gordon's avatar
Leo Gordon committed
6
  Bio::EnsEMBL::Hive::PipeConfig::TableDumperZipper_conf
7
8
9

=head1 SYNOPSIS

10
    init_pipeline.pl Bio::EnsEMBL::Hive::PipeConfig::TableDumperZipper_conf -password $ENSADMIN_PSW -db_conn "mysql://ensadmin:${ENSADMIN_PSW}@localhost/lg4_long_mult"
11

12
13
14
15
16
    seed_pipeline.pl -url "mysql://ensadmin:${ENSADMIN_PSW}@localhost:3306/lg4_zip_tables" -logic_name find_tables -input_id "{'only_tables' => '%_result'}"

    runWorker.pl -url mysql://ensadmin:${ENSADMIN_PSW}@localhost:3306/lg4_zip_tables
    runWorker.pl -url mysql://ensadmin:${ENSADMIN_PSW}@localhost:3306/lg4_zip_tables
    runWorker.pl -url mysql://ensadmin:${ENSADMIN_PSW}@localhost:3306/lg4_zip_tables
17
18
19

=head1 DESCRIPTION  

20
    This is an example pipeline put together from three analyses (with pre-existing Runnables) :
21
22
23

    Analysis_1: JobFactory.pm is used to turn the list of tables of the given database into jobs

Leo Gordon's avatar
Leo Gordon committed
24
        these jobs are sent down the branch #2 into the second analysis
25

26
    Analysis_2: SystemCmd.pm is used to dump individual tables; each flows via branch #1 into Analysis_3
27

28
    Analysis_3: another instance of SystemCmd.pm is used to compress an individual table dump file
29
30
31

=cut

32

Leo Gordon's avatar
Leo Gordon committed
33
package Bio::EnsEMBL::Hive::PipeConfig::TableDumperZipper_conf;
34
35
36

use strict;
use warnings;
Leo Gordon's avatar
Leo Gordon committed
37

38
39
use base ('Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf');  # All Hive databases configuration files should inherit from HiveGeneric, directly or indirectly

Leo Gordon's avatar
Leo Gordon committed
40
41
42
43
44
45
=head2 default_options

    Description : Implements default_options() interface method of Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf that is used to initialize default options.

=cut

46
47
48
sub default_options {
    my ($self) = @_;
    return {
49
        %{ $self->SUPER::default_options() },               # inherit other stuff from the base class
50

51
        'pipeline_name'     => 'zip_tables',                # name used by the beekeeper to prefix job names on the farm
52
53
54
    };
}

Leo Gordon's avatar
Leo Gordon committed
55

56
57
58
59
=head2 pipeline_wide_parameters

    Description : Interface method that should return a hash of pipeline_wide_parameter_name->pipeline_wide_parameter_value pairs.
                  The value doesn't have to be a scalar, can be any Perl structure (will be stringified and de-stringified automagically).
Leo Gordon's avatar
Leo Gordon committed
60
61
62

=cut

63
sub pipeline_wide_parameters {
64
    my ($self) = @_;
65
66
    return {
        %{$self->SUPER::pipeline_wide_parameters},          # here we inherit anything from the base class, then add our own stuff
67

68
69
70
71
72
73
        'db_conn'       => $self->o('db_conn'),
        'dumping_flags' => '-t',    # '-t' for "dump without table definition" or '' for "dump with table definition"
        'directory'     => '.',     # directory where both source and target files are located
        'matching_op'   => 'LIKE',  # 'LIKE' or 'NOT LIKE'
        'only_tables'   => '%',     # any wildcard understood by MySQL
    };
74
75
}

76

Leo Gordon's avatar
Leo Gordon committed
77
78
79
80
81
=head2 pipeline_analyses

    Description : Implements pipeline_analyses() interface method of Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf that defines the structure of the pipeline: analyses, jobs, rules, etc.
                  Here it defines two analyses:

82
83
84
85
                    * 'find_tables'         generates a list of tables whose names match the pattern #only_tables#
                      Each job of this analysis will dataflow (create jobs) via branch #2 into 'table_dumper' analysis.

                    * 'table_dumper'        dumps table contents (possibly with table definition) and flows via branch #1 into 'file_compressor' analysis.
Leo Gordon's avatar
Leo Gordon committed
86

87
                    * 'file_compressor'     compresses the dump file
Leo Gordon's avatar
Leo Gordon committed
88
89
90

=cut

91
92
93
sub pipeline_analyses {
    my ($self) = @_;
    return [
94
        {   -logic_name => 'find_tables',
95
96
            -module     => 'Bio::EnsEMBL::Hive::RunnableDB::JobFactory',
            -parameters => {
97
98
99
100
101
102
103
104
105
106
107
108
109
                'inputquery'    => 'SELECT table_name FROM information_schema.tables WHERE table_schema = "#mysql_dbname:db_conn#" AND table_name #matching_op# "#only_tables#"',
            },
            -flow_into => {
#                2 => { 'table_dumper' => { 'table_name' => '#table_name#', 'db_conn' => '#db_conn#' }, },
                2 => [ 'table_dumper' ],
            },
        },

        {   -logic_name    => 'table_dumper',
            -module        => 'Bio::EnsEMBL::Hive::RunnableDB::SystemCmd',
            -parameters    => {
                'filename'   => '#directory#/#table_name#.sql',
                'cmd'        => 'mysqldump #mysql_conn:db_conn# #dumping_flags# #table_name# >#filename#',
110
            },
111
            -analysis_capacity => 2,
112
            -flow_into => {
113
114
#                1 => { 'file_compressor' => { 'filename' => '#filename#' }, },
                1 => [ 'file_compressor' ],
115
116
117
            },
        },

118
        {   -logic_name    => 'file_compressor',
119
120
            -module        => 'Bio::EnsEMBL::Hive::RunnableDB::SystemCmd',
            -parameters    => {
121
122
                'filename'   => '#directory#/#table_name#.sql',
                'cmd'        => 'gzip #filename#',
123
            },
124
            -analysis_capacity => 8,
125
126
127
128
129
130
        },
    ];
}

1;