push_master_tables.pl 14.4 KB
Newer Older
1 2 3 4 5 6 7
#!/usr/bin/env perl

use strict;
use warnings;

use Data::Dumper;
use DBI qw( :sql_types );
8
use File::Spec::Functions;
9 10 11 12
use Getopt::Long qw( :config no_ignore_case );
use IO::File;
use POSIX qw( floor ceil );

13 14
my $outdir = 'fix-master_tables';

15 16 17 18 19 20 21
sub usage {
  my $padding = ' ' x length($0);

  print <<USAGE_END;
Usage:
  $0 --release NN --master master_server \\
  $padding --server server1 --server server2 [...] \\
22
  $padding --dbport 3306 --dbuser user --dbpass passwd
23 24 25 26 27 28 29 30 31 32

or
  $0 --help

or
  $0 --about

where

  --release/-r  The current release (required).
33

34 35 36 37
  --master/-m   The master server where the production database lives
                (optional, default is 'ens-staging1').
  --server/-s   A database server (optional, may occur several times,
                default is 'ens-staging1' and 'ens-staging2').
38

39
  --dbport/-P   The port to connect to (optional, default is '3306').
40 41 42

  --dbuser/-u   The (read-only) user to connect as (optional,
                default is 'ensro').
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
  --dbpass/-p   The password to connect with (optional, no default).

  --help/-h     Displays this help text.

  --about/-a    Display a text about this program (what it does etc.).
USAGE_END
} ## end sub usage

sub about {
  print <<ABOUT_END;
Run the program with --help to get information about available command
line switches.

This program takes the master tables from the production database
and compares it to the corresponding tables on the given servers (by
default, the staging servers).

60 61
The program will display any discrepancies on the display while
writing SQL to files in a subdirectory of the current directory called
62
"$outdir" that will correct the discrepancies.
63 64 65

Each SQL patch file will have the generic name "fix-DBNAME.sql"
where "DBNAME" is the name of the database, e.g.,
66
"$outdir/fix-oryctolagus_cuniculus_otherfeatures_60_3.sql".
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133

A discrepancy is patched by

1)  Insertion into the master table in the production database in the
    case where a new entry has been added to a database without being
    added to the master table.

2)  Insertion into the database table in the case where a new master
    entry is missing in the database.

3)  Updating the database entry in the case where an entry (identified by
    its primary key only) differs in any of its fields.

The SQL patch files may then be used to patch the databases:

  \$ mysql -h server -u user -ppass < fix-DBNAME.sql




                    BE SURE TO REVIEW THESE SQL PATCH FILES
                        (along with the program output)

                            WITH YOUR EYE AND BRAIN

                              BEFORE APPLYING THEM


ABOUT_END
} ## end sub about

sub fetch_table {
  my ( $dbh, $dbname, $table ) = @_;

  my $sth = $dbh->prepare(
             sprintf( 'SELECT * FROM %s',
                      $dbh->quote_identifier( undef, $dbname, $table ) )
  );

  $sth->execute();

  my %table_hash;

  $table =~ s/^master_//;

  my $pk = sprintf( '%s_id', $table );
  while ( my $row = $sth->fetchrow_hashref() ) {
    if ( !exists( $row->{$pk} ) ) {
      die( sprintf( "Can not find expected primary key '%s'", $pk ) );
    }

    $table_hash{ $row->{$pk} } = $row;
  }

  return \%table_hash;
} ## end sub fetch_table

sub display_banner {
  my ( $char, $text ) = @_;

  printf( "%s %s %s\n",
          $char x ( 39 - floor( length($text)/2 ) ),
          $text, $char x ( 39 - ceil( length($text)/2 ) ) );
}

my $release;
my @servers = ( 'ens-staging1', 'ens-staging2' );
134
my $master = 'ens-staging1';
135 136

my $dbport = '3306';
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
137
my ( $dbuser, $dbpass ) = ( 'ensro', undef );
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162

my $opt_help  = 0;
my $opt_about = 0;

if ( !GetOptions( 'release|r=i' => \$release,
                  'master|m=s'  => \$master,
                  'server|s=s@' => \@servers,
                  'dbuser|u=s'  => \$dbuser,
                  'dbpass|p=s'  => \$dbpass,
                  'dbport|P=s'  => \$dbport,
                  'help|h!'     => \$opt_help,
                  'about!'      => \$opt_about )
     || $opt_help )
{
  usage();
  exit();
} elsif ($opt_about) {
  about();
  exit();
} elsif ( !defined($release) ) {
  print("ERROR: Release was not specified! (use -r or --release)\n");
  usage();
  exit();
}

163 164
my @tables =
  ( 'attrib_type', 'external_db', 'misc_set', 'unmapped_reason' );
165 166 167 168
my @dbtypes = ( 'core', 'otherfeatures', 'cdna', 'vega' );

my %master;
{
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
169
  my $dsn = sprintf( 'DBI:mysql:host=%s;port=%d', $master, $dbport );
170 171 172 173 174 175 176 177 178 179 180 181
  my $dbh =
    DBI->connect( $dsn, $dbuser, $dbpass, { 'PrintError' => 1 } );

  foreach my $table (@tables) {
    my $master_table = sprintf( 'master_%s', $table );
    $master{$table} = fetch_table( $dbh,
                                   sprintf( 'ensembl_production_%d',
                                            $release ),
                                   $master_table );
  }
}

182
my %db_handles;
183
foreach my $server (@servers) {
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
184
  my $dsn = sprintf( 'DBI:mysql:host=%s;port=%d', $server, $dbport );
185 186 187
  my $dbh =
    DBI->connect( $dsn, $dbuser, $dbpass, { 'PrintError' => 1 } );

188
  $db_handles{$server} = $dbh;
189 190 191
}

my %sql;
192 193 194 195
foreach my $server (@servers) {
  printf( "###> Looking at '%s'\n", $server );
  my $dbh = $db_handles{$server};

196 197 198 199 200 201 202 203 204 205 206 207 208
  my $sth = $dbh->prepare('SHOW DATABASES LIKE ?');

  foreach my $dbtype (@dbtypes) {
    $sth->bind_param( 1,
                      sprintf( '%%\\_%s\\_%d\\_%%', $dbtype, $release ),
                      SQL_VARCHAR );

    $sth->execute();

    my $dbname;
    $sth->bind_col( 1, \$dbname );

    while ( $sth->fetch() ) {
209 210
      printf( "##> Processing '%s'\n", $dbname );

211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
      foreach my $table (@tables) {
        my $csth = $dbh->column_info( undef, $dbname, $table, '%' );
        my $colinfo = $csth->fetchall_hashref( ['COLUMN_NAME'] );

        my %table = %{ fetch_table( $dbh, $dbname, $table ) };

        foreach
          my $pk ( sort { $a <=> $b } keys( %{ $master{$table} } ) )
        {
          if ( !exists( $table{$pk} ) ) {
            my $row    = $master{$table}{$pk};
            my @fields = sort( keys( %{$row} ) );

            push(
              @{ $sql{$dbname} },
              sprintf( "-- insert %s_id=%d in %s\n",
                       $table, $pk, $table ),
              sprintf(
229
                "INSERT INTO %s (\n\t%s\n) VALUES (\n\t%s\n);\n",
230
                $dbh->quote_identifier( undef, $dbname, $table ),
231 232
                join( ",\n\t",
                      map { $dbh->quote_identifier($_) } @fields ),
233
                join(
234
                  ",\n\t",
235 236 237 238 239 240
                  map {
                    $dbh->quote( $row->{$_},
                                 $colinfo->{$_}{'DATA_TYPE'} )
                    } @fields ) ) );

          }
241
        } ## end foreach my $pk ( sort { $a ...})
242 243 244 245 246 247

        foreach my $pk ( sort { $a <=> $b } keys(%table) ) {
          my $master_row = $master{$table}{$pk};
          my $row        = $table{$pk};

          if ( $pk == 0 ) {
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
248
            display_banner( '-', sprintf( '%s.%s', $dbname, $table ) );
249 250 251 252 253 254 255 256 257 258

            print( "==> Primary key is ZERO "
                     . "for the following row in DATABASE:\n",
                   Dumper($row),
                   "\n" );
          } else {
            my @fields = sort( keys( %{$row} ) );

            if ( !defined($master_row) ) {
              display_banner( '=',
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
259
                              sprintf( '%s.%s', $dbname, $table ) );
260

261 262
              # Find other row in master table that is the same as
              # database table row, but with different primary key.
263

264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
              my $is_missing = 1;

              foreach my $master_pk ( keys( %{ $master{$table} } ) ) {
                my $master_row = $master{$table}{$master_pk};
                my $is_same    = 1;

                foreach my $field ( sort( keys( %{$master_row} ) ) ) {
                  if ( $field eq sprintf( '%s_id', $table ) ) {
                    # Skip the primary key.
                    next;
                  }

                  if ( $master_row->{$field} ne $row->{$field} ) {
                    $is_same = 0;
                    last;
                  }
                }

                if ($is_same) {
                  printf( "==> Entry with primary key %d "
                          . "is same as entry with primary key %d:\n%s",
                        $pk, $master_pk, Dumper($master_row) );

                  push( @{ $sql{$dbname} },
288 289 290 291 292 293 294 295
                        sprintf(
                               "-- Entries with %s_id = %d "
                                 . "should change this to %d\n"
                                 . "-- Useful SQL:\n"
                                 . "-- UPDATE <table> "
                                 . "SET %s_id = %d WHERE %s_id = %s;\n",
                               $table,     $pk,    $master_pk, $table,
                               $master_pk, $table, $pk ) );
296 297 298 299 300 301 302 303 304 305 306

                  $is_missing = 0;
                }
              } ## end foreach my $master_pk ( keys...)

              if ($is_missing) {
                print( "==> The following row is MISSING IN MASTER:\n",
                       Dumper($row) );

                push(
                  @{ $sql{$dbname} },
307
                  sprintf( "#HEADS_UP!# -- MASTER: insert from %s.%s\n",
308 309
                           $dbname, $table ),
                  sprintf(
310 311
                    "#HEADS_UP!# INSERT INTO %s (\n\t%s\n) "
                      . "VALUES (\n\t%s\n);\n",
312
                    $dbh->quote_identifier(
313 314 315
                           undef,
                           sprintf( 'ensembl_production_%d', $release ),
                           sprintf( 'master_%s',             $table ) ),
316
                    join( ",\n#HEADS_UP!# \t",
317 318
                          map { $dbh->quote_identifier($_) } @fields ),
                    join(
319
                      ",\n#HEADS_UP!# \t",
320 321 322 323 324 325 326
                      map {
                        $dbh->quote( $row->{$_},
                                     $colinfo->{$_}{'DATA_TYPE'} )
                        } @fields ) ) );

                print("\n");
              } ## end if ($is_missing)
327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350
            } else {
              my %diff_fields;

              foreach my $field (@fields) {
                if (    defined( $master_row->{$field} )
                     || defined( $row->{$field} ) )
                {
                  if ( (   !defined( $master_row->{$field} )
                         && defined( $row->{$field} ) )
                       || ( defined( $master_row->{$field} )
                            && !defined( $row->{$field} ) )
                       || ( $master_row->{$field} ne $row->{$field} ) )
                  {
                    if ( !(    $table eq 'external_db'
                            && $field eq 'db_release' ) )
                    {
                      $diff_fields{$field} = $master_row->{$field};
                    }
                  }
                }
              }

              if ( scalar( keys(%diff_fields) ) > 0 ) {
                display_banner( '=',
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
351
                                sprintf( '%s.%s', $dbname, $table ) );
352

353 354
                # Find other row in master table that is the same as
                # database table row, but with different primary key.
355

356
                my $is_missing = 1;
357

358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
                foreach my $master_pk ( keys( %{ $master{$table} } ) ) {
                  my $master_row = $master{$table}{$master_pk};
                  my $is_same    = 1;

                  foreach my $field ( sort( keys( %{$master_row} ) ) ) {
                    if ( $field eq sprintf( '%s_id', $table ) ) {
                      # Skip the primary key.
                      next;
                    }

                    if ( $master_row->{$field} ne $row->{$field} ) {
                      $is_same = 0;
                      last;
                    }
                  }

                  if ($is_same) {
                    printf( "==> Entry with primary key %d "
                          . "is same as entry with primary key %d:\n%s",
                        $pk, $master_pk, Dumper($master_row) );

                    push( @{ $sql{$dbname} },
380 381 382 383 384 385 386 387
                          sprintf(
                               "-- Entries with %s_id = %d "
                                 . "should change this to %d\n"
                                 . "-- Useful SQL:\n"
                                 . "-- UPDATE <table> "
                                 . "SET %s_id = %d WHERE %s_id = %s;\n",
                               $table,     $pk,    $master_pk, $table,
                               $master_pk, $table, $pk ) );
388 389 390 391 392

                    $is_missing = 0;
                  }
                } ## end foreach my $master_pk ( keys...)

393
                if ($is_missing) {
394 395 396 397 398 399 400 401 402 403 404 405 406 407 408
                  printf( "==> The following row differs in %s.\n",
                          join( ', ', keys(%diff_fields) ) );
                  print( "==> MASTER row:\n",   Dumper($master_row),
                         "==> DATABASE row:\n", Dumper($row) );

                  push(
                    @{ $sql{$dbname} },
                    sprintf( "-- update %s in %s\n",
                             join( ', ', keys(%diff_fields) ), $table ),
                    sprintf(
                      "UPDATE %s\nSET %s\nWHERE %s_id = %d;\n",
                      $dbh->quote_identifier( undef, $dbname, $table ),
                      join(
                        ', ',
                        map {
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
409
                          sprintf( '%s = %s',
410 411 412 413 414 415 416 417 418
                                   $_,
                                   $dbh->quote( $diff_fields{$_} ),
                                   $colinfo->{$_}{'DATA_TYPE'} )
                          }
                          keys(%diff_fields) ),
                      $table,
                      $pk ) );

                  print("\n");
419
                } ## end if ($is_missing)
420 421 422 423 424 425 426 427 428 429 430 431

              } ## end if ( scalar( keys(%diff_fields...)))
            } ## end else [ if ( !defined($master_row...))]

          } ## end else [ if ( $pk == 0 ) ]

        } ## end foreach my $pk ( sort { $a ...})
      } ## end foreach my $table (@tables)

    } ## end while ( $sth->fetch() )
  } ## end foreach my $dbtype (@dbtypes)

Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
432
} ## end foreach my $server (@servers)
433 434

if ( scalar( keys(%sql) ) > 0 ) {
435 436 437 438
  if ( !-d $outdir ) {
    mkdir($outdir);
  }

439
  foreach my $db_name ( keys(%sql) ) {
440 441
    my $filename =
      catfile( $outdir, sprintf( 'fix-%s.sql', $db_name ) );
442 443 444 445 446 447 448 449 450 451
    printf( "==> Writing SQL to '%s'\n", $filename );
    my $out = IO::File->new( $filename, 'w' );
    $out->print( @{ $sql{$db_name} } );
    $out->close();
  }
} else {
  print("Nothing to do, all seems ok\n");
}

END {
452
  foreach my $dbh ( values(%db_handles) ) {
453 454 455
    $dbh->disconnect();
  }
}