Commit 78b1069f authored by Ian Longden's avatar Ian Longden
Browse files

many changes but the most important is to only add seq_region mapping if the...

many changes but the most important is to only add seq_region mapping if the seq_region_ids are different
parent bb674e0b
...@@ -24,12 +24,20 @@ Optional arguments: ...@@ -24,12 +24,20 @@ Optional arguments:
--host=host server where the core databases are stored --host=host server where the core databases are stored
(default: ens-staging) (default: ens-staging)
--oldhost = oldhost server where the old release databases are stored
--dbname=dbname if you want a single database to update --dbname=dbname if you want a single database to update
the mapping_set information (default: all the mapping_set information (default: all
databases) databases)
--port=port port (default: 3306) --port=port port (default: 3306)
--oldport=port old database server port (default: 5306)
--olduser=user old database server username
--oldpass=pass password for old database server
--help print help (this message) --help print help (this message)
=head1 DESCRIPTION =head1 DESCRIPTION
...@@ -51,7 +59,7 @@ Update mapping_set information for all databases in ens-staging in ...@@ -51,7 +59,7 @@ Update mapping_set information for all databases in ens-staging in
release NN (the usual use case in release process): release NN (the usual use case in release process):
$ ./update_mapping_set.pl --user ensadmin \ $ ./update_mapping_set.pl --user ensadmin \
--pass password --release NN --pass password --release NN --old_host ensembldb-ensembl.org
Update mapping_set information only for pig database in ens-genomics1: Update mapping_set information only for pig database in ens-genomics1:
...@@ -88,10 +96,14 @@ use constant NEW_MAPPING => 3; ...@@ -88,10 +96,14 @@ use constant NEW_MAPPING => 3;
## Command line options ## Command line options
my $host = 'ens-staging'; my $host = 'ens-staging';
my $oldhost = 'ensembldb.ensembl.org';
my $dbname = undef; my $dbname = undef;
my $user = undef; my $user = undef;
my $pass = undef; my $pass = undef;
my $port = 3306; my $port = 3306;
my $oldport = 5306;
my $olduser = "anonymous";
my $oldpass = undef;
my $help = undef; my $help = undef;
my $release = undef; my $release = undef;
...@@ -101,7 +113,11 @@ GetOptions('host=s' => \$host, ...@@ -101,7 +113,11 @@ GetOptions('host=s' => \$host,
'pass=s' => \$pass, 'pass=s' => \$pass,
'port=s' => \$port, 'port=s' => \$port,
'release=i' => \$release, 'release=i' => \$release,
'help' => \$help 'oldhost=s' => \$oldhost,
'oldport=s' => \$oldport,
'olduser=s' => \$olduser,
'oldpass=s' => \$oldpass,
'help' => \$help
); );
pod2usage(1) if($help); pod2usage(1) if($help);
...@@ -111,65 +127,78 @@ throw("--release argument required") if(!defined($release)); ...@@ -111,65 +127,78 @@ throw("--release argument required") if(!defined($release));
my $database = 'information_schema'; my $database = 'information_schema';
my $dbh = DBI->connect("DBI:mysql:database=$database;host=$host;port=$port",$user,$pass); my $dbh = DBI->connect("DBI:mysql:database=$database;host=$host;port=$port",$user,$pass);
my $old_dbh = DBI->connect("DBI:mysql:database=$database;host=$oldhost;port=$oldport",$olduser,$oldpass);
my $status; my $status;
my $database_name; my $database_name;
#since there is no database defined, will run it agains all core databases #since there is no database defined, will run it agains all core databases
my $pattern; my $pattern;
if (!defined ($dbname)){ if (!defined ($dbname)){
$pattern = '_core_\$release_'; $pattern = "_core_".$release."_";
} }
else{ else{
$pattern = $dbname; $pattern = $dbname;
} }
#fetch all databases matching the pattern #fetch all databases matching the pattern
print STDERR $pattern."\n";
my $sth = $dbh->prepare("SHOW DATABASES WHERE `database` REGEXP \'$pattern\'"); my $sth = $dbh->prepare("SHOW DATABASES WHERE `database` REGEXP \'$pattern\'");
$sth->execute(); $sth->execute();
my $dbs = $sth->fetchall_arrayref(); my $dbs = $sth->fetchall_arrayref();
my $schema_build; my $schema_build;
foreach my $db_name (@{$dbs}){ foreach my $db_name (@{$dbs}){
print STDERR "Going to update mapping for $db_name->[0]....\n"; print STDERR "Going to update mapping for $db_name->[0]....\n";
my $mapping_set_id; my $mapping_set_id;
my $current_seq_region = (); # hash containing the relation seq_region_name->seq_region_id for the current database my $current_seq_region = (); # hash containing the relation seq_region_name->seq_region_id for the current database
my $old_seq_region = (); #hash containing the previous database relation seq_region_name->seq_region_id my $old_seq_region = (); #hash containing the previous database relation seq_region_name->seq_region_id
my $sth_seq_mapping = $dbh->prepare("INSERT INTO $db_name->[0].seq_region_mapping VALUES(?,?,?)"); my $sth_seq_mapping = $dbh->prepare("INSERT INTO $db_name->[0].seq_region_mapping VALUES(?,?,?)");
my $sth_mapping_set = $dbh->prepare("INSERT INTO $db_name->[0].mapping_set VALUES(?,?)"); my $sth_mapping_set = $dbh->prepare("INSERT INTO $db_name->[0].mapping_set VALUES(?,?)");
$status = &mapping_status($dbh,$db_name->[0],\$mapping_set_id,$release); $status = &mapping_status($dbh,$old_dbh, $db_name->[0],\$mapping_set_id,$release);
$schema_build = get_schema_and_build($db_name->[0]); $schema_build = get_schema_and_build($db_name->[0]);
#add mapping_set information
$sth_mapping_set->execute($mapping_set_id,$schema_build); #add mapping_set information
if ($status == INITIAL_MAPPING){ if ($status == INITIAL_MAPPING){
#first time run the script, create new entry in mapping_set and copy seq_region entries in seq_region_mapping $sth_mapping_set->execute($mapping_set_id,$schema_build);
$current_seq_region = &read_seq_region($dbh,$db_name->[0]); #first time run the script, create new entry in mapping_set and copy seq_region entries in seq_region_mapping
#copy the seq_region_id in the seq_region_mapping
foreach my $seq_region_name (keys %{$current_seq_region}){ ############
#when copying there won't be any ambiguity with coord_systems #Actually NO only store the differences so for the initial one it is NONE.
foreach my $region_id (values %{$current_seq_region->{$seq_region_name}}){ ############
$sth_seq_mapping->execute($region_id,$region_id,$mapping_set_id);
} # $current_seq_region = &read_seq_region($dbh,$db_name->[0]);
} # #copy the seq_region_id in the seq_region_mapping
} # foreach my $seq_region_name (keys %{$current_seq_region}){
elsif ($status == SAME_MAPPING){ # #when copying there won't be any ambiguity with coord_systems
#seq_region_mapping has not change, nothing to do for the moment.... # foreach my $region_id (values %{$current_seq_region->{$seq_region_name}}){
# $sth_seq_mapping->execute($region_id,$region_id,$mapping_set_id);
} # }
elsif ($status == NEW_MAPPING){ # }
#there has been a seq_region change between releases, add a new mapping_set and the relation old_seq_region_id->new_seq_region_id }
my $previous_dbname = &get_previous_dbname($dbh,$db_name->[0],$release); elsif ($status == SAME_MAPPING){
$current_seq_region = &read_seq_region($dbh,$db_name->[0]); #seq_region_mapping has not change, nothing to do for the moment....
$old_seq_region = &read_seq_region($dbh,$previous_dbname);
#update the seq_region_mapping table with the old->new seq_region_id relation }
foreach my $seq_region_name (keys %{$old_seq_region}){ elsif ($status == NEW_MAPPING){
next if (!defined $current_seq_region->{$seq_region_name}); #the seq_region might have disappeared $sth_mapping_set->execute($mapping_set_id,$schema_build);
foreach my $coord_system_id (keys %{$old_seq_region->{$seq_region_name}}){ #there has been a seq_region change between releases, add a new mapping_set and the relation old_seq_region_id->new_seq_region_id
next if (!defined $current_seq_region->{$seq_region_name}->{$coord_system_id}); #the coord_system might have been removed in current database my $previous_dbname = &get_previous_dbname($old_dbh,$db_name->[0],$release);
$sth_seq_mapping->execute($old_seq_region->{$seq_region_name}->{$coord_system_id},$current_seq_region->{$seq_region_name}->{$coord_system_id},$mapping_set_id); $current_seq_region = &read_seq_region($dbh,$db_name->[0]);
} $old_seq_region = &read_seq_region($old_dbh,$previous_dbname);
} #update the seq_region_mapping table with the old->new seq_region_id relation
} my $count = 0;
else{ foreach my $seq_region_name (keys %{$old_seq_region}){
throw("Mapping status not recognized by script: $status \n\n"); next if (!defined $current_seq_region->{$seq_region_name}); #the seq_region might have disappeared
} foreach my $coord_system_id (keys %{$old_seq_region->{$seq_region_name}}){
next if (!defined $current_seq_region->{$seq_region_name}->{$coord_system_id}); #the coord_system might have been removed in current database
next if ($old_seq_region->{$seq_region_name}->{$coord_system_id} == $current_seq_region->{$seq_region_name}->{$coord_system_id}); # if no change no need to write out
$sth_seq_mapping->execute($old_seq_region->{$seq_region_name}->{$coord_system_id},$current_seq_region->{$seq_region_name}->{$coord_system_id},$mapping_set_id);
$count++;
}
}
print STDERR "Added $count seq_region_mapping entry\n\n";
}
else{
throw("Mapping status not recognized by script: $status \n\n");
}
} }
#will for a given database, will return the seq_region_name->seq_region_id relation #will for a given database, will return the seq_region_name->seq_region_id relation
...@@ -194,43 +223,48 @@ sub read_seq_region{ ...@@ -194,43 +223,48 @@ sub read_seq_region{
#method to check the status of the current core database: INITIAL_MAPPING, SAME_MAPPING and NEW_MAPPING are the possible states #method to check the status of the current core database: INITIAL_MAPPING, SAME_MAPPING and NEW_MAPPING are the possible states
sub mapping_status{ sub mapping_status{
my $dbh = shift; my $dbh = shift;
my $dbname = shift; my $old_dbh = shift;
my $mapping_set_id_ref = shift; my $dbname = shift;
my $release = shift; my $mapping_set_id_ref = shift;
my $release = shift;
my $sth_max_mapping = $dbh->prepare("select max(mapping_set_id) from $dbname.mapping_set");
$sth_max_mapping->execute(); # my $sth_max_mapping = $dbh->prepare("select max(mapping_set_id) from $dbname.mapping_set");
( $$mapping_set_id_ref ) = $sth_max_mapping->fetchrow_array(); # $sth_max_mapping->execute();
if (! $$mapping_set_id_ref){ # ( $$mapping_set_id_ref ) = $sth_max_mapping->fetchrow_array();
#the table is empty, first mapping # if (! $$mapping_set_id_ref){
$$mapping_set_id_ref = 1; # #the table is empty, first mapping
return INITIAL_MAPPING; # $$mapping_set_id_ref = 1;
} # return INITIAL_MAPPING;
else{ # }
#there is information, find out if it is the same mapping as previous release # else{
my $previous_dbname = &get_previous_dbname($dbh,$dbname,$release); #there is information, find out if it is the same mapping as previous release
throw("No previous database present for $dbname\n") if (!defined $previous_dbname);
my $cur_seq_region_size = &get_seq_region_size($dbh,$dbname); my $previous_dbname = &get_previous_dbname($old_dbh,$dbname,$release);
my $previous_seq_region_size = &get_seq_region_size($dbh,$previous_dbname); if(!defined($previous_dbname)){
if ($cur_seq_region_size == $previous_seq_region_size){ print "No previous database present for $dbname so cannot do diff so will initialise with this as the first version of the database\n";
#if both tables have same size, SAME_MAPPING $$mapping_set_id_ref = 1;
return SAME_MAPPING; return INITIAL_MAPPING;
} }
else{ my $cur_seq_region_size = &get_seq_region_size($dbh,$dbname);
#if tables have different size, NEW_MAPPING my $previous_seq_region_size = &get_seq_region_size($old_dbh,$previous_dbname);
$$mapping_set_id_ref++; if ($cur_seq_region_size == $previous_seq_region_size){
return NEW_MAPPING; #if both tables have same size, SAME_MAPPING
} return SAME_MAPPING;
} }
else{
#if tables have different size, NEW_MAPPING
$$mapping_set_id_ref++;
return NEW_MAPPING;
}
#}
} }
#for a given database, returns the size of the seq_region_table #for a given database, returns the size of the seq_region_table
sub get_seq_region_size{ sub get_seq_region_size{
my $dbh = shift; my $dbh = shift;
my $dbname = shift; my $dbname = shift;
my $sth_status = $dbh->prepare("show table status from $dbname like 'seq_region'"); my $sth_status = $dbh->prepare("show table status from $dbname like 'seq_region'") ;
$sth_status->execute(); $sth_status->execute();
my @table_status = $sth_status->fetchrow_array(); my @table_status = $sth_status->fetchrow_array();
return $table_status[6]; #return the size of the table return $table_status[6]; #return the size of the table
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment