Commit da278e1f authored by Matthieu Muffato's avatar Matthieu Muffato
Browse files

Merge branch 'version/2.1' into version/2.2

* version/2.1:
  Documentation update to reflect the latest changes
  bugfix: more accurrate way of detecting that there is nothing to dump
  Don't die if a table doesn't exist
  bugfix: a file should be created even if there is nothing to dump
  bugfix: die if the input_id is not a valid hash
parents 1bff3ab7 e71b03cb
...@@ -36,13 +36,20 @@ The following parameters are accepted: ...@@ -36,13 +36,20 @@ The following parameters are accepted:
- skip_dump [boolean=0] : set this to 1 to skip the dump - skip_dump [boolean=0] : set this to 1 to skip the dump
The decision process regarding which tables should be dumped is quite complex.
The following sections explain the various scenarios.
1. eHive database
1.a. Hybrid database
If "table_list" is undefined or maps to an empty list, the list If "table_list" is undefined or maps to an empty list, the list
of tables to be dumped is decided accordingly to "exclude_list" (EL) of tables to be dumped is decided accordingly to "exclude_list" (EL)
and "exclude_ehive" (EH). "exclude_list" controls the whole list of and "exclude_ehive" (EH). "exclude_list" controls the whole list of
non-eHive tables. non-eHive tables.
EL EH List of tables to dump EL EH List of tables to dump
0 0 => all the tables 0 0 => all the tables
0 1 => all the tables, except the eHive ones 0 1 => all the tables, except the eHive ones
1 0 => all the tables, except the non-eHive ones = only the eHive tables 1 0 => all the tables, except the non-eHive ones = only the eHive tables
...@@ -51,12 +58,35 @@ EL EH List of tables to dump ...@@ -51,12 +58,35 @@ EL EH List of tables to dump
If "table_list" is defined to non-empty list T, the table of decision is: If "table_list" is defined to non-empty list T, the table of decision is:
EL EH List of tables to dump EL EH List of tables to dump
0 0 => all the tables in T + the eHive tables 0 0 => all the tables in T + the eHive tables
0 1 => all the tables in T 0 1 => all the tables in T
1 0 => all the tables, except the ones in T 1 0 => all the tables, except the ones in T
1 1 => all the tables, except the ones in T and the eHive ones 1 1 => all the tables, except the ones in T and the eHive ones
1.b. eHive-only database
The decision table can be simplified if the database only contains eHive tables.
In particular, the "exclude_list" and "table_list" parameters have no effect.
EH List of tables to dump
0 => All the eHive tables, i.e. the whole database
1 => No eHive tables, i.e. nothing
2. non-eHive database
The "exclude_ehive" parameter is ignored.
empty "table_list":
EL List of tables to dump
0 => all the tables
1 => all the tables are excluded = nothing is dumped
non-empty "table_list" T:
EL List of tables to dump
0 => all the tables in T
1 => all the tables, except the ones in T
=head1 LICENSE =head1 LICENSE
Copyright [1999-2016] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute Copyright [1999-2016] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
...@@ -112,10 +142,6 @@ sub fetch_input { ...@@ -112,10 +142,6 @@ sub fetch_input {
my @ignores = (); my @ignores = ();
$self->param('ignores', \@ignores); $self->param('ignores', \@ignores);
# Would be good to have this from eHive
my @ehive_tables = qw(hive_meta pipeline_wide_parameters worker dataflow_rule analysis_base analysis_ctrl_rule job accu log_message job_file analysis_data resource_description analysis_stats analysis_stats_monitor role msg progress resource_class worker_resource_usage);
$self->param('nb_ehive_tables', scalar(@ehive_tables));
# Connection parameters # Connection parameters
my $src_db_conn = $self->param('src_db_conn'); my $src_db_conn = $self->param('src_db_conn');
my $src_dbc = $src_db_conn ? go_figure_dbc($src_db_conn) : $self->data_dbc; my $src_dbc = $src_db_conn ? go_figure_dbc($src_db_conn) : $self->data_dbc;
...@@ -125,24 +151,43 @@ sub fetch_input { ...@@ -125,24 +151,43 @@ sub fetch_input {
die 'Only the "mysql" driver is supported.' if $src_dbc->driver ne 'mysql'; die 'Only the "mysql" driver is supported.' if $src_dbc->driver ne 'mysql';
# Get the table list in either "tables" or "ignores" # Get the table list in either "tables" or "ignores"
my $table_list = $self->_get_table_list; my $table_list = $self->_get_table_list($self->param('table_list') || '');
print "table_list: ", scalar(@$table_list), " ", join('/', @$table_list), "\n" if $self->debug; print "table_list: ", scalar(@$table_list), " ", join('/', @$table_list), "\n" if $self->debug;
my $nothing_to_dump = 0;
if ($self->param('exclude_list')) { if ($self->param('exclude_list')) {
push @ignores, @$table_list; push @ignores, @$table_list;
$nothing_to_dump = 1 if !$self->param('table_list');
} else { } else {
push @tables, @$table_list; push @tables, @$table_list;
$nothing_to_dump = 1 if $self->param('table_list') and !@$table_list;
}
# Would be good to have this from eHive
my @ref_ehive_tables = qw(hive_meta pipeline_wide_parameters worker dataflow_rule analysis_base analysis_ctrl_rule job accu log_message job_file analysis_data resource_description analysis_stats analysis_stats_monitor role msg progress resource_class worker_resource_usage);
## Only eHive databases have a table named "hive_meta"
my $meta_sth = $src_dbc->db_handle->table_info(undef, undef, 'hive_meta');
my @ehive_tables;
if ($meta_sth->fetchrow_arrayref) {
# The hard-coded list is comprehensive, so some tables may not be
# in this database (which may be on a different version)
push @ehive_tables, @{$self->_get_table_list($_)} for @ref_ehive_tables;
} }
$meta_sth->finish();
# eHive tables are dumped unless exclude_ehive is defined # eHive tables are ignored if exclude_ehive is set
if ($self->param('exclude_ehive')) { if ($self->param('exclude_ehive')) {
push @ignores, @ehive_tables; push @ignores, @ehive_tables;
} elsif (scalar(@$table_list) and not $self->param('exclude_list')) { } elsif (@ehive_tables) {
push @tables, @ehive_tables; if (@tables || $nothing_to_dump) {
} elsif (not scalar(@$table_list) and $self->param('exclude_list')) { push @tables, @ehive_tables;
push @tables, @ehive_tables; $nothing_to_dump = 0;
}
} }
$self->param('nothing_to_dump', $nothing_to_dump);
# Output file / output database # Output file / output database
$self->param('output_file') || $self->param('output_db') || die 'One of the parameters "output_file" and "output_db" is mandatory'; $self->param('output_file') || $self->param('output_db') || die 'One of the parameters "output_file" and "output_db" is mandatory';
unless ($self->param('output_file')) { unless ($self->param('output_file')) {
...@@ -156,21 +201,18 @@ sub fetch_input { ...@@ -156,21 +201,18 @@ sub fetch_input {
# Splits a string into a list of strings # Splits a string into a list of strings
# Ask the database for the list of tables that match the wildcard "%" # Ask the database for the list of tables that match the wildcard "%"
# and also select the tables that actually exist
sub _get_table_list { sub _get_table_list {
my $self = shift @_; my ($self, $table_list) = @_;
my $table_list = $self->param('table_list') || '';
my @newtables = (); my @newtables = ();
my $dbc = $self->param('src_dbc'); my $dbc = $self->param('src_dbc');
foreach my $initable (ref($table_list) eq 'ARRAY' ? @$table_list : split(' ', $table_list)) { foreach my $initable (ref($table_list) eq 'ARRAY' ? @$table_list : split(' ', $table_list)) {
if ($initable =~ /%/) { if ($initable =~ /%/) {
$initable =~ s/_/\\_/g; $initable =~ s/_/\\_/g;
my $sth = $dbc->db_handle->table_info(undef, undef, $initable, undef);
push @newtables, map( {$_->[2]} @{$sth->fetchall_arrayref});
} else {
push @newtables, $initable;
} }
my $sth = $dbc->db_handle->table_info(undef, undef, $initable, undef);
push @newtables, map( {$_->[2]} @{$sth->fetchall_arrayref});
} }
return \@newtables; return \@newtables;
} }
...@@ -186,8 +228,14 @@ sub run { ...@@ -186,8 +228,14 @@ sub run {
print "tables: ", scalar(@$tables), " ", join('/', @$tables), "\n" if $self->debug; print "tables: ", scalar(@$tables), " ", join('/', @$tables), "\n" if $self->debug;
print "ignores: ", scalar(@$ignores), " ", join('/', @$ignores), "\n" if $self->debug; print "ignores: ", scalar(@$ignores), " ", join('/', @$ignores), "\n" if $self->debug;
# We have to exclude everything my @options = qw(--skip-lock-tables);
return if ($self->param('exclude_ehive') and $self->param('exclude_list') and scalar(@$ignores) == $self->param('nb_ehive_tables')); # Without any table names, mysqldump thinks that it should dump
# everything. We need to add special arguments to handle this
if ($self->param('nothing_to_dump')) {
print "everything is excluded, nothing to dump !\n" if $self->debug;
push @options, qw(--no-create-info --no-data);
$ignores = []; # to clean-up the command-line
}
# mysqldump command # mysqldump command
my $output = ""; my $output = "";
...@@ -204,7 +252,7 @@ sub run { ...@@ -204,7 +252,7 @@ sub run {
my $cmd = join(' ', my $cmd = join(' ',
'mysqldump', 'mysqldump',
$self->mysql_conn_from_dbc($src_dbc), $self->mysql_conn_from_dbc($src_dbc),
'--skip-lock-tables', @options,
@$tables, @$tables,
(map {sprintf('--ignore-table=%s.%s', $src_dbc->dbname, $_)} @$ignores), (map {sprintf('--ignore-table=%s.%s', $src_dbc->dbname, $_)} @$ignores),
$output $output
......
...@@ -89,11 +89,18 @@ sub main { ...@@ -89,11 +89,18 @@ sub main {
$input_id = '{}'; $input_id = '{}';
warn "Since -input_id has not been set, assuming input_id='$input_id'\n"; warn "Since -input_id has not been set, assuming input_id='$input_id'\n";
} }
my $dinput_id = destringify($input_id);
if (!ref($dinput_id)) {
die "'$input_id' cannot be eval'ed, likely because of a syntax error\n";
}
if (ref($dinput_id) ne 'HASH') {
die "'$input_id' is not a hash\n";
}
my $job = Bio::EnsEMBL::Hive::AnalysisJob->new( my $job = Bio::EnsEMBL::Hive::AnalysisJob->new(
'prev_job' => undef, # this job has been created by the initialization script, not by another job 'prev_job' => undef, # this job has been created by the initialization script, not by another job
'analysis' => $analysis, 'analysis' => $analysis,
'input_id' => destringify( $input_id ), # Make sure all job creations undergo re-stringification to avoid alternative "spellings" of the same input_id hash 'input_id' => $dinput_id, # Make sure all job creations undergo re-stringification to avoid alternative "spellings" of the same input_id hash
); );
my ($job_id) = @{ $hive_dba->get_AnalysisJobAdaptor->store_jobs_and_adjust_counters( [ $job ] ) }; my ($job_id) = @{ $hive_dba->get_AnalysisJobAdaptor->store_jobs_and_adjust_counters( [ $job ] ) };
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment