From e5f562c4483b763dc1bb9df9506278f00b1fbcd7 Mon Sep 17 00:00:00 2001
From: Glenn Proctor <>
Date: Wed, 9 Aug 2006 12:10:49 +0000
Subject: [PATCH] Slight change in procedure: a 'master' database must now be
 specified. The external_dbs.txt file is loaded into this, and this in turn is
 compared (id and name) with the contents of the external_db table in each of
 the databases to be updated.

If the database to be updated has any extra rows, a warning is given and the database is skipped.

This should help avoid unilateral changes to external_db which haven't been commited to the external_dbs.txt file in CVS being silently overwritten and causing problems down the line.
 .../external_db/        | 107 ++++++++++++++----
 1 file changed, 88 insertions(+), 19 deletions(-)

diff --git a/misc-scripts/external_db/ b/misc-scripts/external_db/
index 96da96b55c..9793c32dea 100644
--- a/misc-scripts/external_db/
+++ b/misc-scripts/external_db/
@@ -9,22 +9,27 @@ use Getopt::Long;
 use DBI;
 use IO::File;
-my ( $host, $user, $pass, $port,@dbnames, $file, $release_num);
-GetOptions( "host=s", \$host,
-	    "user=s", \$user,
-	    "pass=s", \$pass,
-	    "port=i", \$port,
-	    "file=s", \$file,
-            "dbnames=s@", \@dbnames,
-	    "release_num=i", \$release_num
+my ( $host, $user, $pass, $port,@dbnames, $file, $release_num, $master);
+GetOptions( "host=s",        \$host,
+	    "user=s",        \$user,
+	    "pass=s",        \$pass,
+	    "port=i",        \$port,
+	    "file=s",        \$file,
+            "dbnames=s@",    \@dbnames,
+	    "release_num=i", \$release_num,
+	    "master=s",      \$master
 #both host and file are required
 usage() if(!$host || !$file);
 #release num XOR dbname are required
 usage() if(($release_num && @dbnames) || (!$release_num && !@dbnames));
+# master database is required
+usage() if (!$master);
 $port ||= 3306;
 my $dsn = "DBI:mysql:host=$host;port=$port";
@@ -33,7 +38,7 @@ my $db = DBI->connect( $dsn, $user, $pass, {RaiseError => 1} );
 if($release_num) {
   @dbnames = map {$_->[0] } @{ $db->selectall_arrayref( "show databases" ) };
   # filter out all non-core databases
@@ -44,13 +49,13 @@ if($release_num) {
 # make sure the user wishes to continue
-print STDERR "The following databases will be external_db updated:\n  ";
+print STDERR "The following databases will have their external_db tables updated if necessary:\n  ";
 print join("\n  ", @dbnames);
-print "\ncontinue with update (yes/no)>  ";
+print "\nContinue with update (yes/no)>  ";
 my $input = lc(<STDIN>);
-if($input ne 'yes') {
+if ($input ne 'yes') {
   print "external_db conversion aborted\n";
@@ -59,10 +64,10 @@ if($input ne 'yes') {
 # read all of the new external_db entries from the file
 my $fh = IO::File->new();
-$fh->open($file) or die("could not open input file $file");
+$fh->open($file) or die("Could not open input file $file");
 my @rows;
 my $row;
-while($row = <$fh>) {
+while ($row = <$fh>) {
   my @a = split(/\t/, $row);
   push @rows, {'external_db_id'         => $a[0],
@@ -76,8 +81,38 @@ while($row = <$fh>) {
+# Load into master database
+load_database($db, $master, @rows);
+# Check each other database in turn
+# Load if no extra rows in db that aren't in master
+# Warn and skip if there are
 foreach my $dbname (@dbnames) {
-  print STDERR "updating $dbname\n";
+  print STDERR "Looking at $dbname ... \n";
+  if (compare_external_db($db, $master, $dbname)) {
+    print STDERR "$dbname has no additional rows. Overwriting external_db table from $file\n";
+    load_database($db, $dbname, @rows);
+  } else {
+    print STDERR "$dbname has extra rows that are not in $file, skipping\n";
+  }
+print STDERR "Updates complete\n";
+sub load_database {
+  my ($db, $dbname, @rows) = @_;
   $db->do("use $dbname");
   my $sth = $db->prepare('DELETE FROM external_db');
@@ -101,10 +136,40 @@ foreach my $dbname (@dbnames) {
-print STDERR "updates complete\n";
+# return true if the tables are the same, undef if not
+sub compare_external_db {
+  my ($db, $master, $dbname) = @_;
+  my $same = 1;
+  # check each row in $dbname against each row in $master
+  # only compare ID and name since we're only aiming to catch extra rows in $dbname
+  $db->do("use $dbname");
+  my $sth = $db->prepare(qq {SELECT d.external_db_id, d.db_name
+			     FROM $dbname.external_db d
+			     LEFT JOIN $master.external_db m
+			     ON (d.external_db_id=m.external_db_id AND d.db_name=m.db_name)
+			     WHERE m.external_db_id IS NULL OR m.db_name IS NULL });
+  $sth->execute();
+  while (my ($id, $external_db_name) = $sth->fetchrow_array) {
+    print "$dbname has external_db entry for $external_db_name (ID $id) which is not present in $master\n";
+    $same = undef;
+  }
+  $sth->finish();
+  return $same;
 sub usage {
   print STDERR <<EOF
@@ -114,6 +179,7 @@ sub usage {
                     -user username 
                     -pass password 
                     -port port_of_server optional
+                    -master the name of the master database to load the file into
                     -release the release of the database to update used to 
                              match database names.  e.g. 13
                     -file the path of the file containing the insert statements
@@ -127,12 +193,15 @@ sub usage {
-  #update 2 databases
+  # update 2 databases
   perl -host ecs1c -file external_dbs.txt -user ensadmin -pass secret -dbnames homo_sapiens_core_14_33 -dbnames mus_musculus_core_14_30
-  #update all core databases for release 14
+  # update all core databases for release 14
   perl -host ecs2d -file external_dbs.txt -user ensadmin -pass secret -release 14
+  If the databases to be updated contain rows that are not in the file, a warning will
+  be given and the database in question skipped.