CopyDBoverServer.pl 16 KB
Newer Older
1 2 3 4
#!/usr/local/ensembl/bin/perl -w

use strict;
use Getopt::Long;
5
use DBI;
6
use Cwd 'chdir';
7
use Sys::Hostname qw(hostname);
8 9 10

$| = 1;

11

12
my $usage = "\nUsage: $0 -pass XXXXX [-noflush -probe_mapping -xref] input_file
13 14 15

Copy mysql databases between different servers and run myisamchk on the indices when copied.

16
-dbflush       Read locks and flushes only those DBs to be copied, default is to flush WITH READ LOCK entire instance.
17 18
-noflush       Skips table flushing
-xref          Only copies the xref tables, to enable running the transcript probe mapping with output to an isolation xref DB.
19 20 21
#no longer implemented
#-probe_mapping Only copies the tables relevant to running genomics and transcript probe mapping in isolation

22

23
The input file should have the following format:
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48

source_server\\tsource_port\\tsource_db\\tdestination_server\\tdestination_port\\tdestination_db

e.g.

#source_server\\tsource_port\\tsource_db\\tdestination_server\\tdestination_port\\tdestination_db
ecs3.internal.sanger.ac.uk 3307 homo_sapiens_core_13_31 ecs2.internal.sanger.ac.uk 3364 homo_sapiens_core_14_31

Lines starting with # are ignored and considered as comments.
Blank (or whitespace-only) lines are ignored.

Note fields can be separated by any number of tabs or spaces.

RESTRICTIONS:
============
1- The destination_server has to match the generic server you are running the script on,
   either ecs1, ecs2, ecs3, ecs4 or ia64[ef] otherwise the copying process for the corresponding database
   is skipped
2- This script works only for copy processes from and to ecs/ia64 nodes, namely
   ecs1[abcdefgh] port 3306
   ecs2 port: 336[1-6]
   ecs3 port: 300[47]
   ecs4 port: 335[0-3]
   ia64[efgh] port: 3306
3- -pass is compulsory and is expected to be the mysql password to connect as ensadmin
49 50
4- If you add a new instance, remember to check there is a /tmp directory in the /mysql/databases,
   otherwise rcp will complain
51 52 53 54 55 56 57 58 59 60 61 62

This script MUST be run as the mysqlens Unix user.

Also it is best to run it on the same node as the destination MySQL instance, e.g. for ecs2:3365 run

caa_stat mysql_3365

... which will reveal that it's running on ecs2e (caa_stat alone will show all instances). 
Note that this only works for Tru64 CAA instances, i.e. ecs2 & ecs4.
\n\n";


63 64 65 66 67 68 69
my ($pass, $xref, $probe_mapping, $help, $noflush, $dbflush);


GetOptions(
		   'h'             => \$help,
		   'pass=s'        => \$pass,
           'noflush'       => \$noflush,
70
		   #'probe_mapping' => \$probe_mapping,
71
		   'xref'          => \$xref,
72
		   'dbflush'       => \$dbflush,
73
		  );
74 75 76 77 78 79

if ($help || scalar @ARGV == 0 || ! defined $pass) {
  print $usage;
  exit 0;
}

80 81 82 83 84
if($dbflush && $noflush){
  die('Cannot specify mutually exclusive options -noflush and -dbflush');
}

if($probe_mapping && $xref){
85
  die('Cannot specify mutually exclusive options -probe_mapping and -xref');
86 87 88
}


89 90 91
my ($input_file) = @ARGV;
my @dbs_to_copy;

92
my %mysql_directory_per_svr = ('genebuild1:3306'   => "/mysql/data_3306/databases",
93 94 95 96 97 98
							   'genebuild2:3306'   => "/mysql/data_3306/databases",
							   'genebuild3:3306'   => "/mysql/data_3306/databases",
							   'genebuild4:3306'   => "/mysql/data_3306/databases",
							   'genebuild5:3306'   => "/mysql/data_3306/databases",
							   'genebuild6:3306'   => "/mysql/data_3306/databases",
							   'genebuild7:3306'   => "/mysql/data_3306/databases",
99
							   'genebuild7:5306'   => "/mysql/data_3306/databases",
100
			       'mart1:3306'        => "/mysql/data_3306/databases",
101
			       'mart2:3306'        => "/mysql/data_3306/databases",
102
			       'compara1:3306'     => "/mysql/data_3306/databases",
Abel Ureta-Vidal's avatar
Abel Ureta-Vidal committed
103
			       'compara2:3306'     => "/mysql/data_3306/databases",
104
			       'compara2:5316'     => "/mysql/data_5316/databases",
105
			       'compara3:3306'     => "/mysql/data_3306/databases",
Abel Ureta-Vidal's avatar
Abel Ureta-Vidal committed
106 107
			       'ens-genomics1:3306' => "/mysql/data_3306/databases",
			       'ens-genomics2:3306' => "/mysql/data_3306/databases",
108
			       'ens-staging:3306'  => "/mysql/data_3306/databases",
109 110 111
							   'ens-staging1:3306'  => "/mysql/data_3306/databases",
							   'ens-staging2:3306'  => "/mysql/data_3306/databases",
							   
112
			       'ens-livemirror:3306'  => "/mysql/data_3306/databases",
113
			       'ensdb-archive:3304' => "/mysql/data_3304/databases",
114
			       'ens-research:3306' => "/mysql/data_3306/databases",
115 116 117
							   'ens-research:3309' => "/mysql/data_3309/databases",
							   #'ensdb-1-11:5317' => '/mysql/data_5317/databases',
							   #mysqlens doesn't have releod privelages here
118
                               'ensdb-2-12:5106' => '/mysqlv5.1-test/data_5106/databases');
119

120
my %tables = (
121
			  #xref tables are required for transcript/gene generation
122 123 124
			  xref => [('xref', 'object_xref', 'identity_xref', 'go_xref', 
						'external_db', 'external_synonym', 'unmapped_reason', 'unmapped_object')],

125 126 127 128 129
			  #probe_mapping => [('oligo_array', 'oligo_probe', 'oligo_feature', 'coord_system', 
			#					 'seq_region', 'seq_region_attrib', 'seq_region_mapping', 'mapping_set', 'assembly_exception', 'attrib_type', 'analysis',
			#					 'exon', 'exon_stable_id', 'exon_transcript', 'assembly', 'dna',
			#					 'analysis_description', 'transcript', 'transcript_attrib', 'transcript_stable_id', 
			#					 'translation', 'translation_stable_id', 'meta', 'meta_coord')],
130
			  #translation & dna required for generating annotated UTRs
131
			  			 );
132 133

#add xref tables to probe mapping tables
134
#push @{$tables{'probe_mapping'}}, @{$tables{'xref'}};
135 136 137 138

#Set table sub set
my $table_type = '';
$table_type = 'xref' if $xref;
139
#$table_type = 'probe_mapping' if $probe_mapping;
140

141 142 143
#Currently this fails if xref or probe_mapping is specified for a non-core DB
#We need to default to normal copy if dbname !~ _core_

144 145

my $flush_scope = (defined $dbflush) ? 'src_db' : 'src_srv';
146 147
my ($generic_working_host) = (gethostbyname(hostname));
$generic_working_host =~ s/\..*//;
148
my $working_dir = $ENV{'PWD'};
149
my (%already_flushed, %db_handles);
150 151 152

# parsing/checking the input file

153
#This does not catch unreadable files!
154 155 156 157
open F, $input_file ||
  die "Can not open $input_file, $!\n";

while (my $line = <F>) {
158

159 160
  next if ($line =~ /^\#.*$/);
  next if ($line =~ /^\s*$/);
161

162 163
  if ($line =~ /^(\S+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\S+)\s*$/) {
    my ($src_srv,$src_port,$src_db,$dest_srv,$dest_port,$dest_db) = ($1,$2,$3,$4,$5,$6);
164 165
    my ($dest_srv_host) = (gethostbyname($dest_srv));
    $dest_srv_host =~ s/\..*//;
166 167

    unless ($generic_working_host =~ /^$dest_srv_host/) {
168
      warn "// skipped copy of $src_db from $src_srv to $dest_srv
169
// this script should be run on a generic destination host $dest_srv\n";
170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
      next;
    }
    my $src_srv_ok = 0;
    my $dest_srv_ok = 0;
    foreach my $available_srv_port (keys %mysql_directory_per_svr) {
      my ($srv,$port) = split ":", $available_srv_port;
      if ($src_srv =~ /^$srv.*$/ && $src_port == $port) {
	$src_srv_ok = 1;
      }
      if ($dest_srv =~ /^$srv.*$/ && $dest_port == $port) {
	$dest_srv_ok = 1;
      }
    }
    unless ($src_srv_ok && $dest_srv_ok) {
      warn "// skipped copy of $src_db from $src_srv to $dest_srv
185
// this script works only to copy dbs between certain nodes:mysql_port" .
186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
join(", ", keys %mysql_directory_per_svr) ."\n";
      next;
    }
    my %hash = ('src_srv' => $src_srv,
		'src_db' => $src_db,
		'src_port' => $src_port,
		'dest_srv' => $dest_srv,
		'dest_db' => $dest_db,
		'dest_port' => $dest_port,
		'status' => "FAILED");
    push @dbs_to_copy, \%hash;
  } else {
    warn "
The input file has the wrong format,
$line
source_server\\tsource_port\\tsource_db\\tdestination_server\\tdestination_port\\tdestination_db
EXIT 1
";
    exit 1;
  }
}

close F;

210 211 212 213 214 215 216 217 218 219
my $copy_executable;

if (-e "/usr/bin/cp") {
  $copy_executable = "/usr/bin/cp";
} elsif (-e "/bin/cp") {
  $copy_executable = "/bin/cp";
}

#change STDERR to STDOUT?

220 221 222
# starting copy processes
foreach my $db_to_copy (@dbs_to_copy) {

223 224
  print STDERR "//\n// Starting new copy process\n//\n";

225
  my ($time, $dbh, $sql, @tables);
226
  my $source_srv = $db_to_copy->{src_srv};
227
  $source_srv =~ s/\..*//;
228 229 230 231 232 233
  
  my $source_port = $db_to_copy->{src_port};

  my $source_db = $mysql_directory_per_svr{$source_srv . ":" . $source_port} . "/" . $db_to_copy->{src_db};

  my $destination_srv = $db_to_copy->{dest_srv};
234
  $destination_srv =~ s/\..*//;
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
  my $destination_port = $db_to_copy->{dest_port};

  my $destination_directory = $mysql_directory_per_svr{$destination_srv . ":" . $destination_port};
  
  my $destination_tmp_directory = $destination_directory;
  $destination_tmp_directory =~ s/\/var//;
  $destination_tmp_directory =~ s/\/databases//;
  $destination_tmp_directory .= "/tmp";

  # checking that destination db does not exist
  if (-e "$destination_directory/$db_to_copy->{dest_db}") {
    print STDERR "// $destination_directory/$db_to_copy->{dest_db} already exists, make sure to
// delete it or use another destination name for the database
// Skipped copy of $db_to_copy->{src_db} from $db_to_copy->{src_srv} to $db_to_copy->{dest_srv}
";
    next;
  }
  
  my $myisamchk_executable = "/usr/local/ensembl/mysql/bin/myisamchk";
  
  $source_srv =~ s/(ecs[1234]).*/$1/;
  $destination_srv =~ s/(ecs[1234]).*/$1/;

  if ($source_srv ne $destination_srv) {
    $copy_executable = "/usr/bin/rcp";
  }

  $source_srv = undef;
  $destination_srv = undef;

  # flush tables; in the source server
266 267 268 269 270 271 272
  #Need to change this $already_flushed functionality 
  #if we chose release locks after each copy
  #This will happen anyway as we redefine $dbh for each copy, so we need to keep
  #dbh's in a global hash?
 

  unless (exists $already_flushed{$db_to_copy->{$flush_scope}} || $noflush) {
273
  
274 275 276 277
	#Get DBI for session READ locks
	my $dsn = sprintf("DBI:%s:database=%s;host=%s;port=%s",
					  'mysql', $db_to_copy->{src_db}, $db_to_copy->{src_srv}, $source_port);	
	$dbh = DBI->connect($dsn, 'ensadmin', $pass,{ RaiseError => 1, AutoCommit => 0 });
278

279 280
	#Store in hash so locks persist and we don't get errors when they go out of scope
	$db_handles{$db_to_copy->{src_srv}} = $dbh;
281

282
	#Lock tables
283
	if($dbflush){
284 285 286 287 288 289 290
	  @tables = map{ $_ = "@$_"} @{$dbh->selectall_arrayref('show tables')};#Coudl use tables method here?
	  print STDERR "// Acquiring READ LOCK in $db_to_copy->{$flush_scope} (".&get_time.")...";
	  $sql = 'LOCK TABLES '.join(' READ, ', @tables).' READ';
	  $dbh->do($sql);
	  print STDERR ".DONE (".&get_time.")\n";
	  $sql = 'flush tables';
	  print STDERR "// Flushing tables in $db_to_copy->{$flush_scope} (".&get_time.")...";
291 292
	}
	else{
293 294 295
	  print STDERR "// Flushing tables WITH READ LOCK in $db_to_copy->{$flush_scope} (".&get_time.")...";
	  #This is actually a global lock across all DBs, so not tables required
	  $sql = 'flush tables WITH READ LOCK';
296
	}
297

298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315
	#Flush tables
	$dbh->do($sql);#Need to catch error here?
	print STDERR ".DONE (".&get_time.")\n";


	#OLD NON LOCKED FLUSH METHOD
	#print STDERR "// Flushing tables in $db_to_copy->{$flush_scope} (".&get_time.")...\t";
	#my $flush_cmd;
	#if($dbflush){
	#  my $tables_cmd = "echo 'show tables;' | mysql -h $db_to_copy->{src_srv} -u ensadmin -p$pass -P$source_port $db_to_copy->{src_db}";	  
	#	  my @tables = split/\n/, `$tables_cmd`;
	#	  shift @tables;#remove field header
	#	  $flush_cmd = "echo 'flush tables  ".join(', ', @tables).";' | mysql -h $db_to_copy->{src_srv} -u ensadmin -p$pass -P$source_port $db_to_copy->{src_db}";
	#}
	#else{
	#  $flush_cmd = "echo 'flush tables;' | mysql -h $db_to_copy->{src_srv} -u ensadmin -p$pass -P$source_port";
	###$flush_cmd .= ' '.$db_to_copy->{src_db} if $dbflush;
	#}
316 317 318 319
	#Now flushes on db specific tables
	#This was introduced to enable copying a an unused DB when others are being heavily used
	#flush tables still reset the query cache, so will this provide an appreciable speed up?
	#Apparently so.  But still slow due to burden on server from other queries
320 321 322 323 324 325
    #if (system($flush_cmd) == 0) {
    #  print STDERR "DONE (".&get_time.")\n";
    #} else {
    #  print STDERR "FAILED skipped copy of ".$db_to_copy->{src_db}." from ".$db_to_copy->{src_srv}." to ". $db_to_copy->{dest_srv} . "\n";
    #  next;
    #}
326 327 328 329


	#Can we capture a CTRL-C here to exit the whole script
	#Otherwise we exit the flush and the script acrries on trying to copy which can mess up tables.
330 331
	#Need to use sub int_HANDLER and $SIG{'INT'} = 'INT_handler';
	#OR is problem that we are interupting flush/rcp process and not perl process?
332 333

    $already_flushed{$db_to_copy->{$flush_scope}} = 1;
334 335 336 337
  }

  # cp the db to $destination_tmp_directory in the destination server
  my $copy_cmd;
338 339 340 341 342
  #Need to make the tmp dir first for file copying
  if($table_type && ! -e "$destination_tmp_directory/$db_to_copy->{dest_db}"){
	$copy_cmd = "mkdir $destination_tmp_directory/$db_to_copy->{dest_db};"
  }

343
  if ($copy_executable =~ /\/bin\/cp$/) {
344

345
	if($table_type && ($db_to_copy->{src_db} =~ /_core_/)){#Copy core table subset
346 347 348 349 350 351 352 353 354 355 356 357 358 359
	
	  foreach my $table(@{$tables{$table_type}}){
		$copy_cmd .= "$copy_executable  $source_db/$table.* $destination_tmp_directory/$db_to_copy->{dest_db};";
	  }
	  
	}#Full copy
	else{
	  $copy_cmd = "$copy_executable -r $source_db $destination_tmp_directory/$db_to_copy->{dest_db}";
	}
	
	print STDERR "// cp Copying $table_type $db_to_copy->{src_srv}:$source_db...";
 
  } # OR rcp the db to $destination_tmp_directory in the destination server
  elsif ($copy_executable eq "/usr/bin/rcp") {
360
    
361
	if($table_type && ($db_to_copy->{src_db} =~ /_core_/)){#Copy core table subset
362 363 364 365 366 367 368 369 370 371 372 373
	
	  foreach my $table(@{$tables{$table_type}}){
		$copy_cmd .= "$copy_executable -r $db_to_copy->{src_srv}:$source_db/$table.* $destination_tmp_directory/$db_to_copy->{dest_db};";
	  }
	  
	}#Full copy
	else{
	  $copy_cmd = "$copy_executable -r $db_to_copy->{src_srv}:$source_db $destination_tmp_directory/$db_to_copy->{dest_db}";
	}

	print STDERR "// rcp Copying $table_type $db_to_copy->{src_srv}:$source_db...";

374 375 376
  }

  if (system("$copy_cmd") == 0) {
377
    print STDERR "DONE (".&get_time.")\n";
378 379 380
  } else {
    print STDERR "FAILED
skipped copy of $db_to_copy->{src_db} from $db_to_copy->{src_srv} to $db_to_copy->{dest_srv}\n";
381
print "$copy_cmd\n";
382 383 384
    next;
  }

385 386 387 388 389 390 391 392 393 394 395
  
  #release locks on individual DB tables
  if($dbflush){
	$db_handles{$db_to_copy->{src_srv}}->do('UNLOCK TABLES');
	#Also remove from flush hash just incase we are copying this DB twice
	#i.e. we re-lock and flush
	$db_handles{$db_to_copy->{src_srv}}->disconnect;
	delete 	$db_handles{$db_to_copy->{src_srv}};
	delete $already_flushed{$db_to_copy->{$flush_scope}};
  }
 
396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422
  # checks/fixes the indices
  print STDERR "// Checking $destination_tmp_directory/$db_to_copy->{dest_db}/*.MYI in progress...
//\n";
  chdir "$destination_tmp_directory/$db_to_copy->{dest_db}";
  my $myisamchk_cmd = "ls | grep MYI | xargs $myisamchk_executable -F -f -s --key_buffer_size=2000000000 --sort_buffer_size=2000000000 --read_buffer_size=2000000 --write_buffer_size=2000000";
  if (system("$myisamchk_cmd") == 0) {
    print STDERR "//
// Checking $destination_tmp_directory/$db_to_copy->{dest_db}/*.MYI DONE\n";
    chdir "$working_dir";
  } else {
    print STDERR "//
// Checking $destination_tmp_directory/$db_to_copy->{dest_db}/*.MYI FAILED
skipped checking/copying of $db_to_copy->{dest_db}\n";
    system("rm -rf $destination_tmp_directory/$db_to_copy->{dest_db}");
    chdir "$working_dir";
    next;
  }

  # moves db to mysql directory if checking went fine, skip otherwise
  if (system("mv $destination_tmp_directory/$db_to_copy->{dest_db} $destination_directory") == 0) {
    print STDERR "// moving $destination_tmp_directory/$db_to_copy->{dest_db} to $destination_directory DONE\n";
  } else {
    print STDERR "// moving $destination_tmp_directory/$db_to_copy->{dest_db} to $destination_directory FAILED\n";
    system("rm -rf $destination_tmp_directory/$db_to_copy->{dest_db}");
    next;
  }
  $db_to_copy->{status} = "SUCCEEDED";
423 424 425 426 427 428 429

}

#Now unlock any global read locks we have left on servers
foreach my $dbh(values(%db_handles)){
  $dbh->do('UNLOCK TABLES');
  $dbh->disconnect;
430 431
}

432

433
print STDERR "//\n// End of all copy processes (".&get_time.")\n//\n// Processes summary\n";
434 435 436 437 438 439 440


foreach  my $db_to_copy (@dbs_to_copy) {
  print STDERR "// $db_to_copy->{status} copy of $db_to_copy->{src_db} on $db_to_copy->{src_srv} to $db_to_copy->{dest_db} on $db_to_copy->{dest_srv} \n";
}

print STDERR "\n";
441 442 443 444 445 446 447 448 449

sub get_time{

  my ($sec, $min, $hour) = localtime;

  return $hour.':'.$min.':'.$sec;
}

1;