Commit e5684f41 authored by cvs2git's avatar cvs2git
Browse files

This commit was manufactured by cvs2svn to create branch 'branch-

ensembl-59'.

Cherrypick from master 2010-10-18 13:34:05 UTC Andreas Kusalananda Kähäri <ak4@sanger.ac.uk> 'Insert yet another progress message.':
    misc-scripts/load_databases/load_database_from_ftp_site.pl
    misc-scripts/load_databases/load_multiple_databases.pl
    misc-scripts/mouse_knockouts/IKMC_get_data.pl
    misc-scripts/production_database/scripts/push_master_tables.pl
    misc-scripts/production_database/scripts/update_database_list.pl
    misc-scripts/production_database/sql/bootstrap_master.pl
    misc-scripts/production_database/sql/tables.sql
    misc-scripts/xref_mapping/XrefMapper/culex_quinquefasciatus.pm
    misc-scripts/xref_mapping/XrefParser/IKMCParser.pm
    modules/Bio/EnsEMBL/ApiVersion.pm
    modules/Bio/EnsEMBL/CircularSlice.pm
    modules/Bio/EnsEMBL/OntologyXref.pm
    modules/t/test-genome-DBs/homo_sapiens/core/ontology_xref.sql
    modules/t/test-genome-DBs/homo_sapiens/core/ontology_xref.txt
    sql/patch_59_60_a.sql
    sql/patch_59_60_b.sql
    sql/patch_59_60_c.sql
parent 5f610bb0
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Long;
# download and import the database
my $database;
my $root;
my $new_database;
my $user;
my $pass;
my $port;
my $host;
my $cleanup = undef;
my $force = undef; # if set ignore checksum dies just wrtie warnings.
my $mysqltmpdir = undef;
my $quiet = 0;
GetOptions ('root=s' => \$root,
'database=s' => \$database,
'new_database=s' => \$new_database,
'host=s' => \$host,
'force' => \$force,
'cleanup' => \$cleanup,
'port=s' => \$port,
'user=s' => \$user,
'pass=s' => \$pass,
'mysqltempdir=s' => \$mysqltmpdir,
'quiet' => \$quiet,
'help' => sub { usage(); exit(0);}
);
if(defined($database)){
if(!defined($root)){
#query database to try and guess root;
$database =~ /\S+_\S+_\S+_(\d+)_/;
my $release = $1;
if(defined($release)){
$root = "//ftp.ensembl.org/ensembl/pub/release-".$release."/mysql";
print "Using $root as the root obtained from the database name\n" unless $quiet;
}
else{
die "No root given i.e. ftp.ensembl.org/pub/release-54/mysql and could not guess from the database name $database";
}
}
}
if(!defined($root)){
die "No root given i.e. ftp.ensembl.org/pub/release-54/mysql and no database name given to try and guess root from";
}
if(!defined($new_database)){
$new_database = $ENV{"USER"}."_".$database;
print "will create new database $new_database\n" unless $quiet;
}
if(!defined $user or !defined $pass or !defined $host){
die "Need user, password and host for mysql instance to create new database on\n";
}
my $mysql_options = "-h$host -u$user -p$pass";
if(defined($port)){
$mysql_options .= " -P$port";
}
print "rsync --recursive rsync:$root/$database .\n" unless $quiet;
my $line;
#goto SKIP;
if($quiet){
$line = `rsync --recursive --verbose rsync:$root/$database .`;
}
else{
$line = `rsync --recursive --quiet rsync:$root/$database .`;
}
print $line unless $quiet;
#SKIP:
#if it does snot exist then so be it just ignore error code
#my $com = "mysql $mysql_options -e'drop database ".$new_database."'";
#$line = `$com`;
# no need to check here as if the databae does not exist it should get an error
# just done to delete if it exists already
##
## generate error to test
##
#$mysql_options =~ s/-uensadmin/-uensro/g;
my $com = "mysql $mysql_options -e'create database $new_database'";
$line = `$com`;
if($? or $line =~ /Error/ or $line =~ /ERROR/){
print $line;
die "Error during mysql\n";
}
else{
print "Created new database $new_database on host $host\n" unless $quiet;
}
$mysql_options .= " $new_database";
#get the database schema and load it.
print "now creating the schema\n" unless $quiet;
system("gunzip -f $database/$database.sql.gz");
system("mysql $mysql_options < $database/$database.sql");
system("gzip $database/$database.sql");
system("gunzip -f $database/CHECKSUMS.gz");
print "now parse the checksum\n" unless $quiet;
if(defined($mysqltmpdir)){
$mysql_options = " --tmpdir $mysqltmpdir ".$mysql_options;
}
open(CHK,"<$database/CHECKSUMS") or die "COuld not open CHECKSUMS for reading???\n";
while (<CHK>){
chomp;
my ($e1, $e2, $file) = split;
my $table;
my $index = "";
if($file =~ /(\S+)(.*\d*).txt.gz/){
$table = $1;
$index = $2;
}
else{
print "ignore $file\n" unless $quiet;
next;
}
if(!-e "$database/$file"){
print STDERR "$database/$file does not exist. It is specified in the CHECKSUM file but cannot be found?";
cleanup(1)
}
$com = "sum $database/$file";
$line = `$com`;
if($?){
print STDERR "$com failed\n";
print STDERR "with output:".$line."\n";
print STDERR "and error code $?\n";
print STDERR "Ending as no checksum could be obtained";
cleanup(1);
}
my ($s1, $s2, @junk) = split (/\s+/,$line);
if($s1 != $e1 or $s2 != $e2){
print STDERR "Warning: checksums do not match for file $database/$file\n" unless $quiet;
print STDERR " from checksum we have $e1 and $e2\n" unless $quiet;
print STDERR " but from sum we have $s1 and $s2\n" unless $quiet;
if(defined($force)){
print " Force set so carrying on\n" unless $quiet;
}
else{
print STDERR "Checksums do not match which can be a problem.\n";
print STDERR "But the CHECKSUM file can sometimes be wrong as the database may have been\n";
print STDERR "updated without the CHECKSUM file being updated\n";
print STDERR "To continue with just warning use the -force flag in the options\n";
cleanup(1);
}
}
system("gunzip -f $database/$file");
my $str= "mysqlimport --fields_escaped_by=\\\\ $mysql_options ".$ENV{"PWD"}."/$database/$table$index.txt";
print "$str\n" unless $quiet;
$line = `$str`;
if($line =~ /Error/ or $?){
print STDERR $line;
print STDERR "error code $?\n";
print STDERR "Error during mysqlimport\n";
cleanup(1);
}
print $line unless $quiet;
system("gzip $database/$table$index.txt");
print "\n\n" unless $quiet;
}
close CHK;
cleanup();
sub cleanup{
my $died = shift;
if(defined($died) and $died){
system("gzip $database/CHECKSUMS");
exit 1;
}
if(defined($cleanup)){
system("rm -Rf $database");
}
exit 0;
}
sub usage{
print << "EOH";
This perl script will download (rsync) the necesary ftp files and load them into a new local
ensembl mysql database. It will check that the checksums match and do all the zipping and
unzipping of the files.
load_database_from_ftp.pl -root {root} -database {database} -new_database {database2}
-force -cleanup -quiet -help
-host {host} -port {port} -user {user} -pass {password}
-mysqltempdir {dir}
-root Root directory for ftp files
-database Database name to get data for
-new_database Name of the new database
-user User name to access database. Must allow writing.
-pass Password for user.
-host Database host.
-port Database port.
-force import data even if the checksums do not match
-cleanup remove the downloaded files at the end
-quiet No output except for serous error message
-mysqltmpdir Mysql may not have enough tmp space so this can be set to another directory
-help print this help text
examples:-
1) perl load_database_from_ftp_site.pl -database homo_sapiens_core_54_36p -host mysqlhostname
-user mysqluser -pass mysqlpassword -force
This will download the ftp files for the 54 release of the human core database and create a database
called <userid>_homo_sapiens_core_59_36p where userid is the login name of the user. To choose you
own database name use the -new_database option.
2) load_database_from_ftp_site.pl -databases homo_sapiens_core_57_37d -new_database homo_sapiens_core_59_37d
-host mysqlhostname -user mysqluser -pass mysqlpassword -quiet -cleanup -mysqltmpdir /scratch/
Will load the human core database into the mysql instance on mysqlhostname and use the directory
/scratch/ to use as the tmp directory for mysql.
EOH
}
use strict;
use warnings;
use Getopt::Long;
use Bio::EnsEMBL::Registry;
my $reg = "Bio::EnsEMBL::Registry";
# download and import the database
my $root;
my $prefix="";
my $release;
my $specieslist;
my $grouplist;
my $user;
my $pass;
my $port=3306;
my $host;
my $cleanup = undef;
my $force = undef; # if set ignore checksum dies just wrtie warnings.
my $mysqltempdir = undef;
my $quiet = 0;
my $run = undef;
GetOptions ('root=s' => \$root,
'prefix=s' => \$prefix,
'release=s' => \$release,
'species=s' => \$specieslist,
'groups=s' => \$grouplist,
'host=s' => \$host,
'force' => \$force,
'cleanup' => \$cleanup,
'port=s' => \$port,
'user=s' => \$user,
'pass=s' => \$pass,
'mysqltempdir=s' => \$mysqltempdir,
'quiet' => \$quiet,
'run' => \$run,
'help' => sub { usage(); exit(0);}
);
my @names;
if(defined($specieslist)){
@names = split(",",$specieslist);
}
else{
usage();
die "No species set?\n";
}
my @types;
if(defined($grouplist)){
@types = split(",",$grouplist);
}
else{
usage();
die "No groups set?\n";
}
my $db_version = undef;
#
#connect to latest databases to get species name
#
$reg->no_version_check(1);
$reg->load_registry_from_db(
-host => "ensembldb.ensembl.org",
-user => "anonymous",
-db_version => 59, # comment out later.
);
my @species;
foreach my $sp (@names){
my $adap = $reg->get_adaptor($sp, "core", "slice");
if(defined($adap)){
my $name = $adap->dbc->dbname;
# print $name."\n";
if(defined($name)){
if($name =~ /(\S+_\S+)_core/){
push @species, $1;
# print "sp is $1\n";
}
}
}
else{
print "Could not find species $sp so ignoring\n";
}
}
if(defined($release)){
if($release =~ /^\d+$/){
$db_version = $release;
}
else{
die "release must be an integer\n";
}
}
else{
$release = $reg->software_version();
}
my $sqlport = 5306;
if($release < 47){
$sqlport = 3306;
}
my @database_list;
my $sqltemplate = 'mysql -hensembldb.ensembl.org -uanonymous -PPORT --skip-column-names -e\'show databases like "SPECIES%TYPE%RELEASE%"\'';
$sqltemplate =~ s/PORT/$sqlport/;
#print $sqltemplate."\n";
foreach my $sp (@species){
# print $sp."\n";
foreach my $ty (@types){
# print "\t$ty\n";
my $sql = $sqltemplate;
$sql =~ s/SPECIES/$sp/;
$sql =~ s/RELEASE/$release/;
if($ty eq "all"){
$sql =~ s/TYPE//;
}
else{
$ty .= "\\_";
$sql =~ s/TYPE/$ty/;
}
# print $sql."\n";
my $line = `$sql`;
my @vals = split(/\n/,$line);
foreach my $db (@vals){
# print "\t".$db."\n";
push @database_list, $db;
}
}
}
if(!defined($host) or !defined $user){
usage();
die " No host or user\n";
}
#
# check mysql instance data to be copoed to.
#
my $com = "mysql -h$host -u$user -P$port ";
if(defined($pass)){
$com .= "-p$pass ";
}
$com .= "-e'show databases like \"justatest\"' ";
#print $com."\n";
my $line = `$com`;
if($?){
print $com." fails\n";
die "$line";
}
if($line =~ /ERROR/){
die "problem with mysql information\n$line\n";
}
use FindBin '$Bin';
my $com_init = "perl ".$Bin."/load_database_from_ftp_site.pl -host $host -user $user ";
if(defined($force)){
$com_init .= "-force ";
}
if(defined($cleanup)){
$com_init .= "-cleanup ";
}
if(defined($pass)){
$com_init .= "-pass $pass ";
}
if(defined($root)){
$com_init .= "-root $root ";
}
if(defined($mysqltempdir)){
$com_init .= "-mysqltempdir $mysqltempdir ";
}
if(defined($quiet)){
$com_init .= "-quiet ";
}
my $okay="";
my $prob ="";
foreach my $db (@database_list){
my $com = "mysql -h$host -u$user -P$port ";
if(defined($pass)){
$com .= "-p$pass ";
}
$com .= "-e'show databases like \"$prefix$db\"'";
# print $db."\n";
$line = `$com`;
# print $line;
if($line =~ /$db/ and !defined($force)){
$prob .= "\t$prefix$db\n";
next;
}
elsif(defined($run)){
my $cmd = $com_init."-database $db -new_database $prefix$db ";
print STDERR "Copying $db to $host as $prefix$db\n";
my $output = `$cmd`;
open(OUT,">$db.OUTPUT");
print OUT $line;
close OUT;
}
else{
$okay .= "\t$db to $host $prefix$db\n";
}
}
if(!defined($run)){
if(length($prob) > 1){
print "Problem with the following databases as they already exist on $host\n";
print $prob;
}
if(length($okay) > 1){
print "The following would be copied:-\n";
print $okay;
}
print "\nYou need to set the flag -run to actually do the data copy\n";
print "By default it is not done so that this list can be checked first\n";
}
else{
if(length($prob) > 1){
print "Problem with the following databases as they already exist on $host so not copied\n";
print $prob;
}
}
sub usage{
print << "EOH";
It uses the Registry from the core API to get the species name to pass on to the script
load_database_from_ftp.pl.
load_multiple_databases.pl -root {root} -prefix {prefix} -release {number}
-species {s1,s2,s3} -groups {type1,type2} -force -cleanup -quiet -help
-host {host} -port {port} -user {user} -pass {password}
-mysqltempdir {dir} -list
-root Root directory for ftp files
-prefix Database name to get data for
-release Release version of the dtaabase to get
-species Comma separated list of species to get
-groups Comma separated list of database types to get
( from core,variation,funcgen,otherfeatures,vega etc or all)
-user User name to access database. Must allow writing.
-pass Password for user.
-host Database host.
-port Database port.
-force import data even if the checksums do not match
or the new database already exists.
-cleanup remove the downloaded files at the end
-quiet No output except for serous error message
-mysqltmpdir Mysql may not have enough tmp space so this can be set to another directory
-run If set will start the download etc else it will just list the databases.
NOTE: Not default as this script does alot so we want to make sure everything
is correct first before starting.
-help print this help text
examples:-
1) perl load_multiple_databases.pl -release 54 -groups core -species human -host mysqlhostname
-user mysqluser -pass mysqlpassword -force -run -prefix "copy_"
This will download the ftp files for the 54 release of the human core database and create a database
called copy_homo_sapiens_core_59_36p.
2) perl load_multiple_databases.pl -release 59 -species mouse -groups all -run
-host mysqlhostname -user mysqluser -pass mysqlpassword -quiet -cleanup -mysqltmpdir /scratch/
Will load the mouse databases for release 59 into the mysql instance on mysqlhostname and use the directory
/scratch/ to use as the tmp directory for mysql.
This will load the databases:-
mus_musculus_cdna_59_37l
mus_musculus_core_59_37l
mus_musculus_funcgen_59_37l
mus_musculus_otherfeatures_59_37l
mus_musculus_variation_59_37l
mus_musculus_vega_59_37l
EOH
}
#!/ebi/extserv/bin/perl/bin/perl
# an example script demonstrating the use of BioMart webservice
#
# NOTE this could have implemented in the parser itself but the data is needed
# for the simple features so
#
use strict;
use LWP::UserAgent;
my $xml = (<<XXML);
<?xml version="1.0" encoding="UTF-8"?>