diff --git a/misc-scripts/update_mapping_set.pl b/misc-scripts/update_mapping_set.pl index aa324282adbbfad00b6b25500070a0643a35bf69..26c5337e9be01cd0e8b3a6a90fd1e7bffeda7867 100644 --- a/misc-scripts/update_mapping_set.pl +++ b/misc-scripts/update_mapping_set.pl @@ -130,7 +130,6 @@ my $database = 'information_schema'; my $old_dbh = DBI->connect("DBI:mysql:database=$database;host=$oldhost;port=$oldport",$olduser,$oldpass); foreach my $h ($host,$host2) { - my $error; my $dbh = DBI->connect("DBI:mysql:database=$database;host=$h;port=$port",$user,$pass); @@ -150,123 +149,134 @@ foreach my $h ($host,$host2) { foreach my $db_name (@{$dbs}){ my $current_dbname = $db_name->[0]; print STDERR "Going to update mapping for $current_dbname....\n"; - my $mapping_set_id = get_max_mapping_set_id($dbh,$current_dbname) + 1; - my $sth_seq_mapping = $dbh->prepare("INSERT INTO $current_dbname.seq_region_mapping VALUES(?,?,?)"); - my $sth_mapping_set = $dbh->prepare("INSERT INTO $current_dbname.mapping_set VALUES(?,?,?)"); - my $sth_update_build = $dbh->prepare("UPDATE $current_dbname.mapping_set SET internal_schema_build = ?"); - my $sth_update_old = $dbh->prepare("UPDATE $current_dbname.seq_region_mapping SET internal_seq_region_id = ? WHERE internal_seq_region_id = ?"); - my $sth_remove_deprecated = $dbh->prepare("DELETE FROM $current_dbname.seq_region_mapping WHERE internal_seq_region_id = ?"); - my $latest_schema_build = get_latest_schema_build($dbh, $current_dbname); + my $schema_build = get_schema_and_build($current_dbname); + my $latest_schema_build = get_latest_schema_build($dbh, $current_dbname); if ($latest_schema_build eq $schema_build) { print STDERR "$current_dbname already has a mapping for $schema_build, skipping\n"; next; } - my $current_assembly = get_assembly($dbh,$current_dbname) ; - my $count_removed = 0; - my $count_updated = 0; - my $count_added = 0; - $sth_update_build->execute($schema_build) unless $dry_run; if (!$compare_dbname) { $previous_dbname = &get_previous_dbname($old_dbh,$current_dbname,$release); } else { $previous_dbname = $compare_dbname; } -# If there is no previous database, no mapping needed - if (!defined($previous_dbname)) { - print STDERR "First instance known for $current_dbname, no mapping needed\n"; - next; - } - -# If it is a new assembly, no mapping needed - my $old_assembly = get_assembly($old_dbh,$previous_dbname); - if ($old_assembly ne $current_assembly) { - print STDERR "New assembly $current_assembly for $current_dbname, no mapping needed\n" ; - next; - } + update_mapping($dbh, $current_dbname, $previous_dbname, $dry_run); + my $mapping_set_id = get_max_mapping_set_id($dbh,$current_dbname) + 1; + my $sth_mapping_set = $dbh->prepare("INSERT INTO $current_dbname.mapping_set VALUES(?,?,?)"); + my $sth_update_build = $dbh->prepare("UPDATE $current_dbname.mapping_set SET internal_schema_build = ?"); my $previous_schema_build = get_schema_and_build($previous_dbname); my $new_mapping = $sth_mapping_set->execute($mapping_set_id,$schema_build,$previous_schema_build) unless $dry_run; + $sth_update_build->execute($schema_build) unless $dry_run; + } +} - if (!$new_mapping && !$dry_run) { - print STDERR "Mapping already run for this schema_build, please remove entry before proceeding\n" ; - exit; - } + + +sub update_mapping { + my ($dbh, $current_dbname, $previous_dbname, $dry_run) = @_; + my $error; + my $mapping_set_id = get_max_mapping_set_id($dbh,$current_dbname) + 1; + + my $sth_seq_mapping = $dbh->prepare("INSERT INTO $current_dbname.seq_region_mapping VALUES(?,?,?)"); + my $sth_update_old = $dbh->prepare("UPDATE $current_dbname.seq_region_mapping SET internal_seq_region_id = ? WHERE internal_seq_region_id = ?"); + my $sth_remove_deprecated = $dbh->prepare("DELETE FROM $current_dbname.seq_region_mapping WHERE internal_seq_region_id = ?"); + + my $current_assembly = get_assembly($dbh,$current_dbname) ; + + my $count_removed = 0; + my $count_updated = 0; + my $count_added = 0; + +# If there is no previous database, no mapping needed + if (!defined($previous_dbname)) { + print STDERR "First instance known for $current_dbname, no mapping needed\n"; + return; + } + +# If it is a new assembly, no mapping needed + my $old_assembly = get_assembly($old_dbh,$previous_dbname); + if ($old_assembly ne $current_assembly) { + print STDERR "New assembly $current_assembly for $current_dbname, no mapping needed\n" ; + return; + } # If there has been no change in seq_region, no mapping needed - my $cur_seq_region_checksum = &get_seq_region_checksum($dbh,$current_dbname); - my $previous_seq_region_checksum = &get_seq_region_checksum($old_dbh,$previous_dbname); - if ($cur_seq_region_checksum == $previous_seq_region_checksum) { - print STDERR "No change in seq_region for $current_dbname, no mapping needed\n"; - next; - } + my $cur_seq_region_checksum = &get_seq_region_checksum($dbh,$current_dbname); + my $previous_seq_region_checksum = &get_seq_region_checksum($old_dbh,$previous_dbname); + if ($cur_seq_region_checksum == $previous_seq_region_checksum) { + print STDERR "No change in seq_region for $current_dbname, no mapping needed\n"; + return; + } # There has been a seq_region change between releases, add the relation old_seq_region_id->new_seq_region_id - my $current_seq_region = &read_seq_region($dbh,$current_dbname); - my $old_seq_region = &read_seq_region($old_dbh,$previous_dbname); + my $current_seq_region = &read_seq_region($dbh,$current_dbname); + my $old_seq_region = &read_seq_region($old_dbh,$previous_dbname); # Build a hash of currently used seq region ids to ensure we do not map to overlapping IDs # i.e. a database has reused seq region IDs between releases - my $current_seq_region_ids = get_seq_region_ids($dbh, $current_dbname); + my $current_seq_region_ids = get_seq_region_ids($dbh, $current_dbname); # Update the seq_region_mapping table with the old->new seq_region_id relation - foreach my $seq_region_name (keys %{$old_seq_region}){ - my $current_name_hash = $current_seq_region->{$seq_region_name}; - my $old_name_hash = $old_seq_region->{$seq_region_name}; + foreach my $seq_region_name (keys %{$old_seq_region}){ + my $current_name_hash = $current_seq_region->{$seq_region_name}; + my $old_name_hash = $old_seq_region->{$seq_region_name}; # If the seq_region has disappeared, remove previous entries for that id - if (!defined $current_name_hash) { - my $id = get_seq_region_id($old_dbh,$previous_dbname, $seq_region_name); - $count_removed += $sth_remove_deprecated->execute($id) unless $dry_run; + if (!defined $current_name_hash) { + my $id = get_seq_region_id($old_dbh,$previous_dbname, $seq_region_name); + $count_removed += $sth_remove_deprecated->execute($id) unless $dry_run; + next; + } +# while ( my ($length, $hash_length) = each %{$old_name_hash}) + foreach my $length (keys %{$old_name_hash}){ + my $current_length_hash = $current_name_hash->{$length}; + my $old_length_hash = $old_name_hash->{$length}; + +# The seq_region might have a different length + if (!defined $current_length_hash) { next; } - foreach my $length (keys %{$old_name_hash}){ - my $current_length_hash = $current_name_hash->{$length}; - my $old_length_hash = $old_name_hash->{$length}; + foreach my $cs (keys %{$old_length_hash}) { + my $current_cs_hash = $current_length_hash->{$cs}; + my $old_cs_hash = $old_length_hash->{$cs}; -# The seq_region might have a different length - if (!defined $current_length_hash) { +# The coord system might have changed + if (!defined $current_cs_hash) { next; } - foreach my $cs (keys %{$old_length_hash}) { - my $current_cs_hash = $current_length_hash->{$cs}; - my $old_cs_hash = $old_length_hash->{$cs}; + foreach my $id (keys %{$old_cs_hash}) { + my $current_id = $current_cs_hash->{$id}; + my $old_id = $old_cs_hash->{$id}; -# The coord system might have changed - if (!defined $current_cs_hash) { +# If no change, no need to write out + if (!defined $current_id || $old_id == $current_id) { next; } - foreach my $id (keys %{$old_cs_hash}) { - my $current_id = $current_cs_hash->{$id}; - my $old_id = $old_cs_hash->{$id}; - -# If no change, no need to write out - if (!defined $current_id || $old_id == $current_id) { - next; - } - if(exists $current_seq_region_ids->{$old_id}) { - printf STDERR "Skipping the mapping for old id %d to current id %d as the old ID is in use in the DB. This means IDs have been reused. Do not reused seq_region_id primary keys\n", $old_id, $current_id; - $error = 1; - next; - } + if(exists $current_seq_region_ids->{$old_id}) { + printf STDERR "Skipping the mapping for old id %d to current id %d as the old ID is in use in the DB. This means IDs have been reused. Do not reused seq_region_id primary keys\n", $old_id, $current_id; + $error = 1; + next; + } # If there is a change, update any existing entries for this seq_region to the new id # Then, add a new entry to map said id to the old release - $count_updated += $sth_update_old->execute($current_id,$old_id) unless $dry_run; - $count_added += $sth_seq_mapping->execute($old_id,$current_id, $mapping_set_id) unless $dry_run; - if($dry_run) { - $count_updated++; - $count_added++; - } + $count_updated += $sth_update_old->execute($current_id,$old_id) unless $dry_run; + $count_added += $sth_seq_mapping->execute($old_id,$current_id, $mapping_set_id) unless $dry_run; + if($dry_run) { + $count_updated++; + $count_added++; } } } } - print STDERR "For $current_dbname, removed $count_removed, added $count_added, updated $count_updated seq_region_mapping entries\n\n" ; } + print STDERR "For $current_dbname, removed $count_removed, added $count_added, updated $count_updated seq_region_mapping entries\n\n" ; + if($error) { die "Error detected when loading the mapping sets. Check STDERR for more information";