DatabaseDumper.pm 7.1 KB
Newer Older
1 2 3 4
=pod 

=head1 NAME

5 6 7 8 9 10
    Bio::EnsEMBL::Hive::RunnableDB::DatabaseDumper

=head1 SYNOPSIS

    standaloneJob.pl RunnableDB/DatabaseDumper.pm -exclude_ehive 1 -exclude_list 1 \
        -table_list "['peptide_align_%']" -src_db_conn mysql://ensro@127.0.0.1:4313/mm14_compara_homology_67 -output_file ~/dump1.sql
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30

=head1 DESCRIPTION

This is a Runnable to dump the tables of a database (by default,
all of them).

The following parameters are accepted:

 - src_db_conn : the connection parameters to the database to be
    dumped (by default, the current eHive database if available)

 - exclude_ehive [boolean=0] : do we exclude the eHive-specific tables
    from the dump

 - table_list [string or array of strings]: the list of tables
    to include in the dump. The '%' wildcard is accepted.

 - exclude_list [boolean=0] : do we consider 'table_list' as a list
    of tables to be excluded from the dump (instead of included)

31 32
 - output_file [string] : the file to write the dump to. If the filename
    ends with ".gz", the file is compressed with "gzip" (default parameters)
33

34 35
 - output_db [string] : URL of a database to write the dump to. In this
    mode, the Runnable acts like MySQLTransfer
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56

If "table_list" is undefined or maps to an empty list, the list
of tables to be dumped is decided accordingly to "exclude_list" (EL)
and "exclude_ehive" (EH). "exclude_list" controls the whole list of
non-eHive tables.

EL EH    List of tables to dump

0  0  => all the tables
0  1  => all the tables, except the eHive ones
1  0  => all the tables, except the non-eHive ones = only the eHive tables
1  1  => both eHive and non-eHive tables are excluded = nothing is dumped

If "table_list" is defined to non-empty list T, the table of decision is:

EL EH    List of tables to dump

0  0  => all the tables in T + the eHive tables
0  1  => all the tables in T
1  0  => all the tables, except the ones in T
1  1  => all the tables, except the ones in T and the eHive ones
57

58 59
=head1 LICENSE

60
    Copyright [1999-2014] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
61

62 63 64 65 66 67 68 69 70 71 72 73
    Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
    You may obtain a copy of the License at

         http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software distributed under the License
    is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and limitations under the License.

=head1 CONTACT

    Please contact ehive-users@ebi.ac.uk mailing list with questions/suggestions.
74 75 76

=cut

77

78 79 80
package Bio::EnsEMBL::Hive::RunnableDB::DatabaseDumper;

use strict;
81 82 83
use warnings;

use Bio::EnsEMBL::Hive::Utils ('go_figure_dbc');
84 85 86 87 88 89

use base ('Bio::EnsEMBL::Hive::Process');

sub fetch_input {
    my $self = shift @_;

90
    # The final list of tables
91
    my @tables = ();
92
    $self->param('tables', \@tables);
93
    my @ignores = ();
94 95
    $self->param('ignores', \@ignores);

96
    # Would be good to have this from eHive
97
    my @ehive_tables = qw(hive_meta worker dataflow_rule analysis_base analysis_ctrl_rule job accu log_message job_file analysis_data resource_description analysis_stats analysis_stats_monitor monitor msg progress resource_class);
98
    $self->param('nb_ehive_tables', scalar(@ehive_tables));
99 100

    # Connection parameters
101
    my $src_db_conn  = $self->param('src_db_conn');
102
    my $src_dbc = $src_db_conn ? go_figure_dbc($src_db_conn) : $self->data_dbc;
103
    $self->param('src_dbc', $src_dbc);
104

105
    $self->input_job->transient_error(0);
106 107
    die 'Only the "mysql" driver is supported.' if $src_dbc->driver ne 'mysql';

108
    # Get the table list in either "tables" or "ignores"
109
    my $table_list = $self->_get_table_list;
110 111
    print "table_list: ", scalar(@$table_list), " ", join('/', @$table_list), "\n" if $self->debug;

112 113
    if ($self->param('exclude_list')) {
        push @ignores, @$table_list;
114
    } else {
115
        push @tables, @$table_list;
116
    }
117

118
    # eHive tables are dumped unless exclude_ehive is defined
119 120
    if ($self->param('exclude_ehive')) {
        push @ignores, @ehive_tables;
121 122 123
    } elsif (scalar(@$table_list) and not $self->param('exclude_list')) {
        push @tables, @ehive_tables;
    } elsif (not scalar(@$table_list) and $self->param('exclude_list')) {
124
        push @tables, @ehive_tables;
125 126
    }

127 128
    # Output file / output database
    $self->param('output_file') || $self->param('output_db') || die 'One of the parameters "output_file" and "output_db" is mandatory';
129
    unless ($self->param('output_file')) {
130
        $self->param('real_output_db', go_figure_dbc( $self->param('output_db') ) );
131
        die 'Only the "mysql" driver is supported.' if $self->param('real_output_db')->driver ne 'mysql';
132 133 134 135 136 137 138 139 140 141 142 143
    }

    $self->input_job->transient_error(1);
}


# Splits a string into a list of strings
# Ask the database for the list of tables that match the wildcard "%"

sub _get_table_list {
    my $self = shift @_;

144
    my $table_list = $self->param('table_list') || '';
145
    my @newtables = ();
146
    my $dbc = $self->param('src_dbc');
147
    foreach my $initable (ref($table_list) eq 'ARRAY' ? @$table_list : split(' ', $table_list)) {
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
        if ($initable =~ /%/) {
            $initable =~ s/_/\\_/g;
            my $sth = $dbc->db_handle->table_info(undef, undef, $initable, undef);
            push @newtables, map( {$_->[2]} @{$sth->fetchall_arrayref});
        } else {
            push @newtables, $initable;
        }
    }
    return \@newtables;
}


sub run {
    my $self = shift @_;

    my $src_dbc = $self->param('src_dbc');
    my $tables = $self->param('tables');
    my $ignores = $self->param('ignores');

167 168 169 170 171 172 173
    print "tables: ", scalar(@$tables), " ", join('/', @$tables), "\n" if $self->debug;
    print "ignores: ", scalar(@$ignores), " ", join('/', @$ignores), "\n" if $self->debug;

    # We have to exclude everything
    return if ($self->param('exclude_ehive') and $self->param('exclude_list') and scalar(@$ignores) == $self->param('nb_ehive_tables'));

    # mysqldump command
174 175 176 177 178 179 180 181 182 183 184
    my $output = "";
    if ($self->param('output_file')) {
        if (lc $self->param('output_file') =~ /\.gz$/) {
            $output = sprintf(' | gzip > %s', $self->param('output_file'));
        } else {
            $output = sprintf('> %s', $self->param('output_file'));
        }
    } else {
        $output = sprintf(' | mysql %s', $self->mysql_conn_from_dbc($self->param('real_output_db')));
    };

185 186 187
    my $cmd = join(' ', 
        'mysqldump',
        $self->mysql_conn_from_dbc($src_dbc),
188
        '--skip-lock-tables',
189
        @$tables,
190
        (map {sprintf('--ignore-table=%s.%s', $src_dbc->dbname, $_)} @$ignores),
191
        $output
192
    );
193
    print "$cmd\n" if $self->debug;
194 195 196 197 198 199 200

    # We have to skip the dump
    return if ($self->param('skip_dump'));

    # OK, we can dump
    if(my $return_value = system($cmd)) {
        die "system( $cmd ) failed: $return_value";
201 202 203 204 205 206 207
    }
}


sub mysql_conn_from_dbc {
    my ($self, $dbc) = @_; 

208
    return '--host='.$dbc->host.' --port='.$dbc->port." --user='".$dbc->username."' --password='".$dbc->password."' ".$dbc->dbname;
209 210 211 212
}


1;