Skip to content
Snippets Groups Projects
Commit e5684f41 authored by cvs2git's avatar cvs2git
Browse files

This commit was manufactured by cvs2svn to create branch 'branch-

ensembl-59'.

Cherrypick from master 2010-10-18 13:34:05 UTC Andreas Kusalananda Kähäri <ak4@sanger.ac.uk> 'Insert yet another progress message.':
    misc-scripts/load_databases/load_database_from_ftp_site.pl
    misc-scripts/load_databases/load_multiple_databases.pl
    misc-scripts/mouse_knockouts/IKMC_get_data.pl
    misc-scripts/production_database/scripts/push_master_tables.pl
    misc-scripts/production_database/scripts/update_database_list.pl
    misc-scripts/production_database/sql/bootstrap_master.pl
    misc-scripts/production_database/sql/tables.sql
    misc-scripts/xref_mapping/XrefMapper/culex_quinquefasciatus.pm
    misc-scripts/xref_mapping/XrefParser/IKMCParser.pm
    modules/Bio/EnsEMBL/ApiVersion.pm
    modules/Bio/EnsEMBL/CircularSlice.pm
    modules/Bio/EnsEMBL/OntologyXref.pm
    modules/t/test-genome-DBs/homo_sapiens/core/ontology_xref.sql
    modules/t/test-genome-DBs/homo_sapiens/core/ontology_xref.txt
    sql/patch_59_60_a.sql
    sql/patch_59_60_b.sql
    sql/patch_59_60_c.sql
parent 5f610bb0
No related branches found
Tags cvs/release/vega/59-1
No related merge requests found
Showing
with 6002 additions and 0 deletions
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Long;
# download and import the database
my $database;
my $root;
my $new_database;
my $user;
my $pass;
my $port;
my $host;
my $cleanup = undef;
my $force = undef; # if set ignore checksum dies just wrtie warnings.
my $mysqltmpdir = undef;
my $quiet = 0;
GetOptions ('root=s' => \$root,
'database=s' => \$database,
'new_database=s' => \$new_database,
'host=s' => \$host,
'force' => \$force,
'cleanup' => \$cleanup,
'port=s' => \$port,
'user=s' => \$user,
'pass=s' => \$pass,
'mysqltempdir=s' => \$mysqltmpdir,
'quiet' => \$quiet,
'help' => sub { usage(); exit(0);}
);
if(defined($database)){
if(!defined($root)){
#query database to try and guess root;
$database =~ /\S+_\S+_\S+_(\d+)_/;
my $release = $1;
if(defined($release)){
$root = "//ftp.ensembl.org/ensembl/pub/release-".$release."/mysql";
print "Using $root as the root obtained from the database name\n" unless $quiet;
}
else{
die "No root given i.e. ftp.ensembl.org/pub/release-54/mysql and could not guess from the database name $database";
}
}
}
if(!defined($root)){
die "No root given i.e. ftp.ensembl.org/pub/release-54/mysql and no database name given to try and guess root from";
}
if(!defined($new_database)){
$new_database = $ENV{"USER"}."_".$database;
print "will create new database $new_database\n" unless $quiet;
}
if(!defined $user or !defined $pass or !defined $host){
die "Need user, password and host for mysql instance to create new database on\n";
}
my $mysql_options = "-h$host -u$user -p$pass";
if(defined($port)){
$mysql_options .= " -P$port";
}
print "rsync --recursive rsync:$root/$database .\n" unless $quiet;
my $line;
#goto SKIP;
if($quiet){
$line = `rsync --recursive --verbose rsync:$root/$database .`;
}
else{
$line = `rsync --recursive --quiet rsync:$root/$database .`;
}
print $line unless $quiet;
#SKIP:
#if it does snot exist then so be it just ignore error code
#my $com = "mysql $mysql_options -e'drop database ".$new_database."'";
#$line = `$com`;
# no need to check here as if the databae does not exist it should get an error
# just done to delete if it exists already
##
## generate error to test
##
#$mysql_options =~ s/-uensadmin/-uensro/g;
my $com = "mysql $mysql_options -e'create database $new_database'";
$line = `$com`;
if($? or $line =~ /Error/ or $line =~ /ERROR/){
print $line;
die "Error during mysql\n";
}
else{
print "Created new database $new_database on host $host\n" unless $quiet;
}
$mysql_options .= " $new_database";
#get the database schema and load it.
print "now creating the schema\n" unless $quiet;
system("gunzip -f $database/$database.sql.gz");
system("mysql $mysql_options < $database/$database.sql");
system("gzip $database/$database.sql");
system("gunzip -f $database/CHECKSUMS.gz");
print "now parse the checksum\n" unless $quiet;
if(defined($mysqltmpdir)){
$mysql_options = " --tmpdir $mysqltmpdir ".$mysql_options;
}
open(CHK,"<$database/CHECKSUMS") or die "COuld not open CHECKSUMS for reading???\n";
while (<CHK>){
chomp;
my ($e1, $e2, $file) = split;
my $table;
my $index = "";
if($file =~ /(\S+)(.*\d*).txt.gz/){
$table = $1;
$index = $2;
}
else{
print "ignore $file\n" unless $quiet;
next;
}
if(!-e "$database/$file"){
print STDERR "$database/$file does not exist. It is specified in the CHECKSUM file but cannot be found?";
cleanup(1)
}
$com = "sum $database/$file";
$line = `$com`;
if($?){
print STDERR "$com failed\n";
print STDERR "with output:".$line."\n";
print STDERR "and error code $?\n";
print STDERR "Ending as no checksum could be obtained";
cleanup(1);
}
my ($s1, $s2, @junk) = split (/\s+/,$line);
if($s1 != $e1 or $s2 != $e2){
print STDERR "Warning: checksums do not match for file $database/$file\n" unless $quiet;
print STDERR " from checksum we have $e1 and $e2\n" unless $quiet;
print STDERR " but from sum we have $s1 and $s2\n" unless $quiet;
if(defined($force)){
print " Force set so carrying on\n" unless $quiet;
}
else{
print STDERR "Checksums do not match which can be a problem.\n";
print STDERR "But the CHECKSUM file can sometimes be wrong as the database may have been\n";
print STDERR "updated without the CHECKSUM file being updated\n";
print STDERR "To continue with just warning use the -force flag in the options\n";
cleanup(1);
}
}
system("gunzip -f $database/$file");
my $str= "mysqlimport --fields_escaped_by=\\\\ $mysql_options ".$ENV{"PWD"}."/$database/$table$index.txt";
print "$str\n" unless $quiet;
$line = `$str`;
if($line =~ /Error/ or $?){
print STDERR $line;
print STDERR "error code $?\n";
print STDERR "Error during mysqlimport\n";
cleanup(1);
}
print $line unless $quiet;
system("gzip $database/$table$index.txt");
print "\n\n" unless $quiet;
}
close CHK;
cleanup();
sub cleanup{
my $died = shift;
if(defined($died) and $died){
system("gzip $database/CHECKSUMS");
exit 1;
}
if(defined($cleanup)){
system("rm -Rf $database");
}
exit 0;
}
sub usage{
print << "EOH";
This perl script will download (rsync) the necesary ftp files and load them into a new local
ensembl mysql database. It will check that the checksums match and do all the zipping and
unzipping of the files.
load_database_from_ftp.pl -root {root} -database {database} -new_database {database2}
-force -cleanup -quiet -help
-host {host} -port {port} -user {user} -pass {password}
-mysqltempdir {dir}
-root Root directory for ftp files
-database Database name to get data for
-new_database Name of the new database
-user User name to access database. Must allow writing.
-pass Password for user.
-host Database host.
-port Database port.
-force import data even if the checksums do not match
-cleanup remove the downloaded files at the end
-quiet No output except for serous error message
-mysqltmpdir Mysql may not have enough tmp space so this can be set to another directory
-help print this help text
examples:-
1) perl load_database_from_ftp_site.pl -database homo_sapiens_core_54_36p -host mysqlhostname
-user mysqluser -pass mysqlpassword -force
This will download the ftp files for the 54 release of the human core database and create a database
called <userid>_homo_sapiens_core_59_36p where userid is the login name of the user. To choose you
own database name use the -new_database option.
2) load_database_from_ftp_site.pl -databases homo_sapiens_core_57_37d -new_database homo_sapiens_core_59_37d
-host mysqlhostname -user mysqluser -pass mysqlpassword -quiet -cleanup -mysqltmpdir /scratch/
Will load the human core database into the mysql instance on mysqlhostname and use the directory
/scratch/ to use as the tmp directory for mysql.
EOH
}
use strict;
use warnings;
use Getopt::Long;
use Bio::EnsEMBL::Registry;
my $reg = "Bio::EnsEMBL::Registry";
# download and import the database
my $root;
my $prefix="";
my $release;
my $specieslist;
my $grouplist;
my $user;
my $pass;
my $port=3306;
my $host;
my $cleanup = undef;
my $force = undef; # if set ignore checksum dies just wrtie warnings.
my $mysqltempdir = undef;
my $quiet = 0;
my $run = undef;
GetOptions ('root=s' => \$root,
'prefix=s' => \$prefix,
'release=s' => \$release,
'species=s' => \$specieslist,
'groups=s' => \$grouplist,
'host=s' => \$host,
'force' => \$force,
'cleanup' => \$cleanup,
'port=s' => \$port,
'user=s' => \$user,
'pass=s' => \$pass,
'mysqltempdir=s' => \$mysqltempdir,
'quiet' => \$quiet,
'run' => \$run,
'help' => sub { usage(); exit(0);}
);
my @names;
if(defined($specieslist)){
@names = split(",",$specieslist);
}
else{
usage();
die "No species set?\n";
}
my @types;
if(defined($grouplist)){
@types = split(",",$grouplist);
}
else{
usage();
die "No groups set?\n";
}
my $db_version = undef;
#
#connect to latest databases to get species name
#
$reg->no_version_check(1);
$reg->load_registry_from_db(
-host => "ensembldb.ensembl.org",
-user => "anonymous",
-db_version => 59, # comment out later.
);
my @species;
foreach my $sp (@names){
my $adap = $reg->get_adaptor($sp, "core", "slice");
if(defined($adap)){
my $name = $adap->dbc->dbname;
# print $name."\n";
if(defined($name)){
if($name =~ /(\S+_\S+)_core/){
push @species, $1;
# print "sp is $1\n";
}
}
}
else{
print "Could not find species $sp so ignoring\n";
}
}
if(defined($release)){
if($release =~ /^\d+$/){
$db_version = $release;
}
else{
die "release must be an integer\n";
}
}
else{
$release = $reg->software_version();
}
my $sqlport = 5306;
if($release < 47){
$sqlport = 3306;
}
my @database_list;
my $sqltemplate = 'mysql -hensembldb.ensembl.org -uanonymous -PPORT --skip-column-names -e\'show databases like "SPECIES%TYPE%RELEASE%"\'';
$sqltemplate =~ s/PORT/$sqlport/;
#print $sqltemplate."\n";
foreach my $sp (@species){
# print $sp."\n";
foreach my $ty (@types){
# print "\t$ty\n";
my $sql = $sqltemplate;
$sql =~ s/SPECIES/$sp/;
$sql =~ s/RELEASE/$release/;
if($ty eq "all"){
$sql =~ s/TYPE//;
}
else{
$ty .= "\\_";
$sql =~ s/TYPE/$ty/;
}
# print $sql."\n";
my $line = `$sql`;
my @vals = split(/\n/,$line);
foreach my $db (@vals){
# print "\t".$db."\n";
push @database_list, $db;
}
}
}
if(!defined($host) or !defined $user){
usage();
die " No host or user\n";
}
#
# check mysql instance data to be copoed to.
#
my $com = "mysql -h$host -u$user -P$port ";
if(defined($pass)){
$com .= "-p$pass ";
}
$com .= "-e'show databases like \"justatest\"' ";
#print $com."\n";
my $line = `$com`;
if($?){
print $com." fails\n";
die "$line";
}
if($line =~ /ERROR/){
die "problem with mysql information\n$line\n";
}
use FindBin '$Bin';
my $com_init = "perl ".$Bin."/load_database_from_ftp_site.pl -host $host -user $user ";
if(defined($force)){
$com_init .= "-force ";
}
if(defined($cleanup)){
$com_init .= "-cleanup ";
}
if(defined($pass)){
$com_init .= "-pass $pass ";
}
if(defined($root)){
$com_init .= "-root $root ";
}
if(defined($mysqltempdir)){
$com_init .= "-mysqltempdir $mysqltempdir ";
}
if(defined($quiet)){
$com_init .= "-quiet ";
}
my $okay="";
my $prob ="";
foreach my $db (@database_list){
my $com = "mysql -h$host -u$user -P$port ";
if(defined($pass)){
$com .= "-p$pass ";
}
$com .= "-e'show databases like \"$prefix$db\"'";
# print $db."\n";
$line = `$com`;
# print $line;
if($line =~ /$db/ and !defined($force)){
$prob .= "\t$prefix$db\n";
next;
}
elsif(defined($run)){
my $cmd = $com_init."-database $db -new_database $prefix$db ";
print STDERR "Copying $db to $host as $prefix$db\n";
my $output = `$cmd`;
open(OUT,">$db.OUTPUT");
print OUT $line;
close OUT;
}
else{
$okay .= "\t$db to $host $prefix$db\n";
}
}
if(!defined($run)){
if(length($prob) > 1){
print "Problem with the following databases as they already exist on $host\n";
print $prob;
}
if(length($okay) > 1){
print "The following would be copied:-\n";
print $okay;
}
print "\nYou need to set the flag -run to actually do the data copy\n";
print "By default it is not done so that this list can be checked first\n";
}
else{
if(length($prob) > 1){
print "Problem with the following databases as they already exist on $host so not copied\n";
print $prob;
}
}
sub usage{
print << "EOH";
It uses the Registry from the core API to get the species name to pass on to the script
load_database_from_ftp.pl.
load_multiple_databases.pl -root {root} -prefix {prefix} -release {number}
-species {s1,s2,s3} -groups {type1,type2} -force -cleanup -quiet -help
-host {host} -port {port} -user {user} -pass {password}
-mysqltempdir {dir} -list
-root Root directory for ftp files
-prefix Database name to get data for
-release Release version of the dtaabase to get
-species Comma separated list of species to get
-groups Comma separated list of database types to get
( from core,variation,funcgen,otherfeatures,vega etc or all)
-user User name to access database. Must allow writing.
-pass Password for user.
-host Database host.
-port Database port.
-force import data even if the checksums do not match
or the new database already exists.
-cleanup remove the downloaded files at the end
-quiet No output except for serous error message
-mysqltmpdir Mysql may not have enough tmp space so this can be set to another directory
-run If set will start the download etc else it will just list the databases.
NOTE: Not default as this script does alot so we want to make sure everything
is correct first before starting.
-help print this help text
examples:-
1) perl load_multiple_databases.pl -release 54 -groups core -species human -host mysqlhostname
-user mysqluser -pass mysqlpassword -force -run -prefix "copy_"
This will download the ftp files for the 54 release of the human core database and create a database
called copy_homo_sapiens_core_59_36p.
2) perl load_multiple_databases.pl -release 59 -species mouse -groups all -run
-host mysqlhostname -user mysqluser -pass mysqlpassword -quiet -cleanup -mysqltmpdir /scratch/
Will load the mouse databases for release 59 into the mysql instance on mysqlhostname and use the directory
/scratch/ to use as the tmp directory for mysql.
This will load the databases:-
mus_musculus_cdna_59_37l
mus_musculus_core_59_37l
mus_musculus_funcgen_59_37l
mus_musculus_otherfeatures_59_37l
mus_musculus_variation_59_37l
mus_musculus_vega_59_37l
EOH
}
#!/ebi/extserv/bin/perl/bin/perl
# an example script demonstrating the use of BioMart webservice
#
# NOTE this could have implemented in the parser itself but the data is needed
# for the simple features so
#
use strict;
use LWP::UserAgent;
my $xml = (<<XXML);
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE Query>
<Query virtualSchemaName = "default" formatter = "TSV" header = "0" uniqueRows = "1" count = "" datasetConfigVersion = "0.6" >
<Dataset name = "dcc" interface = "default" >
<Attribute name = "mgi_accession_id" />
<Attribute name = "marker_symbol" />
<Attribute name = "vector_available" />
<Attribute name = "escell_available" />
<Attribute name = "mouse_available" />
<Attribute name = "ensembl_gene_id" />
</Dataset>
</Query>
XXML
open (OUT,">ensembl_ikmc_initial.txt");
my $path="http://www.i-dcc.org/biomart/martservice?";
my $request = HTTP::Request->new("POST",$path,HTTP::Headers->new(),'query='.$xml."\n");
my $ua = LWP::UserAgent->new;
my $response;
$ua->request($request,
sub{
my($data, $response) = @_;
if ($response->is_success) {
print OUT "$data";
}
else {
warn ("Problems with the web server: ".$response->status_line);
}
},1000);
close OUT;
my %symbols;
my %ensembl_ids;
my %status;
open (IN,"ensembl_ikmc_initial.txt");
#nb [9] is now cell_line_bg and [10] is backcross
while (<IN>){
my @line = split(/\t/,$_);
chop $line[5];
my $mgi_id = $line[0];
$symbols{$mgi_id}=$line[1];
$ensembl_ids{$mgi_id}=$line[5];
$status{$mgi_id} = 1 if ($status{$mgi_id} eq '');
if ($status{$mgi_id} < 4 && $line[4] == 1){
$status{$mgi_id} = 4;
}
elsif ($status{$mgi_id} < 3 && $line[3] == 1){
$status{$mgi_id} = 3;
}
elsif ($status{$mgi_id} < 2 && $line[2] == 1){
$status{$mgi_id} = 2;
}
}
close IN;
open (OUT,">ensembl_ikmc_xref.txt");
foreach my $mgi_id(keys %symbols){
my $description;
$description = 'No products available yet' if $status{$mgi_id} == 1;
$description = 'Vector available' if $status{$mgi_id} == 2;
$description = 'ES cells available' if $status{$mgi_id} == 3;
$description = 'Mice available' if $status{$mgi_id} == 4;
print OUT "$mgi_id\t$symbols{$mgi_id}\t$description\t$ensembl_ids{$mgi_id}\n";
}
close OUT;
#!/usr/bin/env perl
use strict;
use warnings;
use Data::Dumper;
use DBI qw( :sql_types );
use Getopt::Long qw( :config no_ignore_case );
use IO::File;
use POSIX qw( floor ceil );
sub usage {
my $padding = ' ' x length($0);
print <<USAGE_END;
Usage:
$0 --release NN --master master_server \\
$padding --server server1 --server server2 [...] \\
$padding --dbport 3306 --dbuser user --dbpass passwd
or
$0 --help
or
$0 --about
where
--release/-r The current release (required).
--master/-m The master server where the production database lives
(optional, default is 'ens-staging1').
--server/-s A database server (optional, may occur several times,
default is 'ens-staging1' and 'ens-staging2').
--dbport/-P The port to connect to (optional, default is '3306').
--dbuser/-u The (read-only) user to connect as (optional,
default is 'ensro').
--dbpass/-p The password to connect with (optional, no default).
--help/-h Displays this help text.
--about/-a Display a text about this program (what it does etc.).
USAGE_END
} ## end sub usage
sub about {
print <<ABOUT_END;
Run the program with --help to get information about available command
line switches.
This program takes the master tables from the production database
and compares it to the corresponding tables on the given servers (by
default, the staging servers).
The program will display any discrepancies on the display
while writing SQL to files in the current directory that will
correct the discrepancies.
Each SQL patch file will have the generic name "fix-DBNAME.sql"
where "DBNAME" is the name of the database, e.g.,
"fix-oryctolagus_cuniculus_otherfeatures_60_3.sql".
A discrepancy is patched by
1) Insertion into the master table in the production database in the
case where a new entry has been added to a database without being
added to the master table.
2) Insertion into the database table in the case where a new master
entry is missing in the database.
3) Updating the database entry in the case where an entry (identified by
its primary key only) differs in any of its fields.
The SQL patch files may then be used to patch the databases:
\$ mysql -h server -u user -ppass < fix-DBNAME.sql
BE SURE TO REVIEW THESE SQL PATCH FILES
(along with the program output)
WITH YOUR EYE AND BRAIN
BEFORE APPLYING THEM
ABOUT_END
} ## end sub about
sub fetch_table {
my ( $dbh, $dbname, $table ) = @_;
my $sth = $dbh->prepare(
sprintf( 'SELECT * FROM %s',
$dbh->quote_identifier( undef, $dbname, $table ) )
);
$sth->execute();
my %table_hash;
$table =~ s/^master_//;
my $pk = sprintf( '%s_id', $table );
while ( my $row = $sth->fetchrow_hashref() ) {
if ( !exists( $row->{$pk} ) ) {
die( sprintf( "Can not find expected primary key '%s'", $pk ) );
}
$table_hash{ $row->{$pk} } = $row;
}
return \%table_hash;
} ## end sub fetch_table
sub display_banner {
my ( $char, $text ) = @_;
printf( "%s %s %s\n",
$char x ( 39 - floor( length($text)/2 ) ),
$text, $char x ( 39 - ceil( length($text)/2 ) ) );
}
my $release;
my @servers = ( 'ens-staging1', 'ens-staging2' );
my $master = 'ens-staging1';
my $dbport = '3306';
my ( $dbuser, $dbpass ) = ( 'ensro', undef );
my $opt_help = 0;
my $opt_about = 0;
if ( !GetOptions( 'release|r=i' => \$release,
'master|m=s' => \$master,
'server|s=s@' => \@servers,
'dbuser|u=s' => \$dbuser,
'dbpass|p=s' => \$dbpass,
'dbport|P=s' => \$dbport,
'help|h!' => \$opt_help,
'about!' => \$opt_about )
|| $opt_help )
{
usage();
exit();
} elsif ($opt_about) {
about();
exit();
} elsif ( !defined($release) ) {
print("ERROR: Release was not specified! (use -r or --release)\n");
usage();
exit();
}
my @tables =
( 'attrib_type', 'external_db', 'misc_set', 'unmapped_reason' );
my @dbtypes = ( 'core', 'otherfeatures', 'cdna', 'vega' );
my @db_handles;
my %master;
{
my $dsn = sprintf( "DBI:mysql:host=%s;port=%d", $master, $dbport );
my $dbh =
DBI->connect( $dsn, $dbuser, $dbpass, { 'PrintError' => 1 } );
foreach my $table (@tables) {
my $master_table = sprintf( 'master_%s', $table );
$master{$table} = fetch_table( $dbh,
sprintf( 'ensembl_production_%d',
$release ),
$master_table );
}
}
foreach my $server (@servers) {
my $dsn = sprintf( "DBI:mysql:host=%s;port=%d", $server, $dbport );
my $dbh =
DBI->connect( $dsn, $dbuser, $dbpass, { 'PrintError' => 1 } );
push( @db_handles, $dbh );
}
my %sql;
foreach my $dbh (@db_handles) {
my $sth = $dbh->prepare('SHOW DATABASES LIKE ?');
foreach my $dbtype (@dbtypes) {
$sth->bind_param( 1,
sprintf( '%%\\_%s\\_%d\\_%%', $dbtype, $release ),
SQL_VARCHAR );
$sth->execute();
my $dbname;
$sth->bind_col( 1, \$dbname );
while ( $sth->fetch() ) {
printf( "##> Processing '%s'\n", $dbname );
foreach my $table (@tables) {
my $csth = $dbh->column_info( undef, $dbname, $table, '%' );
my $colinfo = $csth->fetchall_hashref( ['COLUMN_NAME'] );
my %table = %{ fetch_table( $dbh, $dbname, $table ) };
foreach
my $pk ( sort { $a <=> $b } keys( %{ $master{$table} } ) )
{
if ( !exists( $table{$pk} ) ) {
my $row = $master{$table}{$pk};
my @fields = sort( keys( %{$row} ) );
push(
@{ $sql{$dbname} },
sprintf( "-- insert %s_id=%d in %s\n",
$table, $pk, $table ),
sprintf(
"INSERT INTO %s (\n\t%s\n) VALUES (\n\t%s\n);\n",
$dbh->quote_identifier( undef, $dbname, $table ),
join( ",\n\t",
map { $dbh->quote_identifier($_) } @fields ),
join(
",\n\t",
map {
$dbh->quote( $row->{$_},
$colinfo->{$_}{'DATA_TYPE'} )
} @fields ) ) );
}
} ## end foreach my $pk ( sort { $a ...})
foreach my $pk ( sort { $a <=> $b } keys(%table) ) {
my $master_row = $master{$table}{$pk};
my $row = $table{$pk};
if ( $pk == 0 ) {
display_banner( '-', sprintf( "%s.%s", $dbname, $table ) );
print( "==> Primary key is ZERO "
. "for the following row in DATABASE:\n",
Dumper($row),
"\n" );
} else {
my @fields = sort( keys( %{$row} ) );
if ( !defined($master_row) ) {
display_banner( '=',
sprintf( "%s.%s", $dbname, $table ) );
# Find other row in master table that is the same as
# database table row, but with different primary key.
my $is_missing = 1;
foreach my $master_pk ( keys( %{ $master{$table} } ) ) {
my $master_row = $master{$table}{$master_pk};
my $is_same = 1;
foreach my $field ( sort( keys( %{$master_row} ) ) ) {
if ( $field eq sprintf( '%s_id', $table ) ) {
# Skip the primary key.
next;
}
if ( $master_row->{$field} ne $row->{$field} ) {
$is_same = 0;
last;
}
}
if ($is_same) {
printf( "==> Entry with primary key %d "
. "is same as entry with primary key %d:\n%s",
$pk, $master_pk, Dumper($master_row) );
push( @{ $sql{$dbname} },
sprintf(
"-- Entries with %s_id = %d "
. "should change this to %d\n"
. "-- Useful SQL:\n"
. "-- UPDATE <table> "
. "SET %s_id = %d WHERE %s_id = %s;\n",
$table, $pk, $master_pk, $table,
$master_pk, $table, $pk ) );
$is_missing = 0;
}
} ## end foreach my $master_pk ( keys...)
if ($is_missing) {
print( "==> The following row is MISSING IN MASTER:\n",
Dumper($row) );
push(
@{ $sql{$dbname} },
sprintf( "#HEADS_UP!# -- MASTER: insert from %s.%s\n",
$dbname, $table ),
sprintf(
"#HEADS_UP!# INSERT INTO %s (\n\t%s\n) "
. "VALUES (\n\t%s\n);\n",
$dbh->quote_identifier(
undef,
sprintf( 'ensembl_production_%d', $release ),
sprintf( 'master_%s', $table ) ),
join( ",\n#HEADS_UP!# \t",
map { $dbh->quote_identifier($_) } @fields ),
join(
",\n#HEADS_UP!# \t",
map {
$dbh->quote( $row->{$_},
$colinfo->{$_}{'DATA_TYPE'} )
} @fields ) ) );
print("\n");
} ## end if ($is_missing)
} else {
my %diff_fields;
foreach my $field (@fields) {
if ( defined( $master_row->{$field} )
|| defined( $row->{$field} ) )
{
if ( ( !defined( $master_row->{$field} )
&& defined( $row->{$field} ) )
|| ( defined( $master_row->{$field} )
&& !defined( $row->{$field} ) )
|| ( $master_row->{$field} ne $row->{$field} ) )
{
if ( !( $table eq 'external_db'
&& $field eq 'db_release' ) )
{
$diff_fields{$field} = $master_row->{$field};
}
}
}
}
if ( scalar( keys(%diff_fields) ) > 0 ) {
display_banner( '=',
sprintf( "%s.%s", $dbname, $table ) );
# Find other row in master table that is the same as
# database table row, but with different primary key.
my $is_missing = 1;
foreach my $master_pk ( keys( %{ $master{$table} } ) ) {
my $master_row = $master{$table}{$master_pk};
my $is_same = 1;
foreach my $field ( sort( keys( %{$master_row} ) ) ) {
if ( $field eq sprintf( '%s_id', $table ) ) {
# Skip the primary key.
next;
}
if ( $master_row->{$field} ne $row->{$field} ) {
$is_same = 0;
last;
}
}
if ($is_same) {
printf( "==> Entry with primary key %d "
. "is same as entry with primary key %d:\n%s",
$pk, $master_pk, Dumper($master_row) );
push( @{ $sql{$dbname} },
sprintf(
"-- Entries with %s_id = %d "
. "should change this to %d\n"
. "-- Useful SQL:\n"
. "-- UPDATE <table> "
. "SET %s_id = %d WHERE %s_id = %s;\n",
$table, $pk, $master_pk, $table,
$master_pk, $table, $pk ) );
$is_missing = 0;
}
} ## end foreach my $master_pk ( keys...)
if ($is_missing) {
printf( "==> The following row differs in %s.\n",
join( ', ', keys(%diff_fields) ) );
print( "==> MASTER row:\n", Dumper($master_row),
"==> DATABASE row:\n", Dumper($row) );
push(
@{ $sql{$dbname} },
sprintf( "-- update %s in %s\n",
join( ', ', keys(%diff_fields) ), $table ),
sprintf(
"UPDATE %s\nSET %s\nWHERE %s_id = %d;\n",
$dbh->quote_identifier( undef, $dbname, $table ),
join(
', ',
map {
sprintf( "%s = %s",
$_,
$dbh->quote( $diff_fields{$_} ),
$colinfo->{$_}{'DATA_TYPE'} )
}
keys(%diff_fields) ),
$table,
$pk ) );
print("\n");
} ## end if ($is_missing)
} ## end if ( scalar( keys(%diff_fields...)))
} ## end else [ if ( !defined($master_row...))]
} ## end else [ if ( $pk == 0 ) ]
} ## end foreach my $pk ( sort { $a ...})
} ## end foreach my $table (@tables)
} ## end while ( $sth->fetch() )
} ## end foreach my $dbtype (@dbtypes)
} ## end foreach my $dbh (@db_handles)
if ( scalar( keys(%sql) ) > 0 ) {
foreach my $db_name ( keys(%sql) ) {
my $filename = sprintf( "fix-%s.sql", $db_name );
printf( "==> Writing SQL to '%s'\n", $filename );
my $out = IO::File->new( $filename, 'w' );
$out->print( @{ $sql{$db_name} } );
$out->close();
}
} else {
print("Nothing to do, all seems ok\n");
}
END {
foreach my $dbh (@db_handles) {
$dbh->disconnect();
}
}
#!/usr/bin/env perl
use strict;
use warnings;
use Getopt::Long qw( :config no_ignore_case );
use DBI qw( :sql_types );
sub usage {
my $padding = ' ' x length($0);
print <<USAGE_END;
Usage:
$0 --release NN --master master-server \\
$padding --server server1 --server server2 [...] \\
$padding --dbport 3306 --dbuser user --dbpass passwd \\
$padding --dbwuser write_user --dbwpass write_passwd
or
$0 --help
or
$0 --about
where
--release/-r The current release (required).
--master/-m The master server where the production database lives
(optional, default is 'ens-staging1').
--server/-s A database server (optional, may occur several times,
default is 'ens-staging1', and 'ens-staging2').
--dbport/-P The port to connect to (optional, default is '3306').
--dbuser/-u The (read only) user to connect as (optional,
default is 'ensro').
--dbpass/-p The password to connect with as the above user
(optional, no default).
--dbwuser/-wu The user (with write permissions) to connect as
(optional, default is 'ensadmin').
--dbwpass/-wp The password to connect with as the above user
(optional, no default).
--help/-h Displays this help text.
--about/-a Displays a text about this program (what it does etc.).
USAGE_END
} ## end sub usage
my $release;
my @servers = ( 'ens-staging1', 'ens-staging2' );
my $master = 'ens-staging1';
my $dbport = '3306';
my ( $dbwuser, $dbwpass ) = ( 'ensadmin', undef );
my ( $duser, $dbass ) = ( 'ensro', undef );
my $opt_help = 0;
my $opt_about = 0;
if ( !GetOptions( 'release|r=i' => \$release,
'master|m=s' => \$master,
'server|s=s@' => \@servers,
'dbuser|u=s' => \$dbuser,
'dbpass|p=s' => \$dbpass,
'dbport|P=s' => \$dbport,
'dbrouser|wu' => \$dbwuser,
'dbropass|wp' => \$dbwpass,
'help|h!' => \$opt_help,
'about!' => \$opt_about )
|| $opt_help )
{
usage();
exit();
} elsif ($opt_about) {
about();
exit();
} elsif ( !defined($release) ) {
print("ERROR: Release was not specified! (use -r or --release)\n");
usage();
exit();
}
my %databases;
foreach my $server (@servers) {
my $dsn = sprintf( 'DBI:mysql:host=%s;port=%d', $server, $dbport );
my $dbh = DBI->connect( $dsn, $dbuser, $dbpass,
{ 'PrintError' => 1, 'RaiseError' => 0 } );
foreach my $dbtype ( 'cdna', 'core',
'coreexpressionatlas', 'coreexpressionest',
'coreexpressiongnf', 'funcgen',
'otherfeatures', 'variation',
'vega' )
{
my $sth = $dbh->prepare(
sprintf( "SHOW DATABASES LIKE '%%\\_%s\\_%%'", $dbtype ) );
$sth->execute();
my $dbname;
$sth->bind_col( 1, \$dbname );
while ( $sth->fetch() ) {
if ( $dbname !~
/^([a-z]+_[a-z]+)_([a-z]+)_([0-9]+)_([0-9]+)([a-z]?)$/
|| exists( $databases{$1}{$2} ) )
{
next;
}
if ( $2 ne $dbtype ) {
carp( sprintf( "Strange database type '%s', expected '%s'",
$2, $dbtype ) );
next;
}
my $cn_sth = $dbh->prepare(
sprintf(
"SELECT meta_value "
. "FROM %s.meta "
. "WHERE meta_key = 'species.common_name'",
$dbh->quote_identifier($dbname) ) );
$cn_sth->execute();
my $common_name;
if ( !$cn_sth->err() ) {
$cn_sth->bind_col( 1, \$common_name );
while ( $cn_sth->fetch() ) { }
}
$databases{$1}{$2} = { 'db_release' => $3,
'db_assembly' => $4,
'db_suffix' => $5,
'db_host' => $server,
'common_name' => $common_name };
} ## end while ( $sth->fetch() )
} ## end foreach my $dbtype ( 'cdna'...)
$dbh->disconnect();
} ## end foreach my $server (@servers)
die;
my $dsn = sprintf( 'DBI:mysql:host=%s;port=%s;database=%s',
$master, $dbport,
sprintf( 'ensembl_production_%d', $release ) );
my $dbh = DBI->connect( $dsn, $dbwuser, $dbwpass,
{ 'PrintError' => 0, 'RaiseError' => 0 } );
my $sp_sel_sth = $dbh->prepare(
q(
SELECT species_id
FROM species
WHERE db_name = ?
) );
my $sp_sth = $dbh->prepare(
q(
INSERT INTO species
(db_name, common_name)
VALUES (?, ?)
) );
my $db_sth = $dbh->prepare(
q(
INSERT INTO db
(species_id, db_type, db_release, db_assembly, db_suffix, db_host)
VALUES (?, ?, ?, ?, ?, ?)
) );
foreach my $db_name ( sort( keys(%databases) ) ) {
foreach my $db_type ( sort ( keys( %{ $databases{$db_name} } ) ) ) {
my $entry = $databases{$db_name}{$db_type};
$sp_sel_sth->bind_param( 1, $db_name, SQL_VARCHAR );
$sp_sel_sth->execute();
my $species_id;
$sp_sel_sth->bind_col( 1, \$species_id );
while ( $sp_sel_sth->fetch() ) { }
if ( !defined($species_id) ) {
$sp_sth->bind_param( 1, $db_name, SQL_VARCHAR );
$sp_sth->bind_param( 2, $entry->{'common_name'}, SQL_VARCHAR );
printf( "Inserting '%s' ('%s') into species_list table... ",
$db_name, $entry->{'common_name'} );
$sp_sth->execute();
if ( $sp_sth->err() ) {
print("failed\n");
next;
} else {
print("ok\n");
}
$species_id = $dbh->{'mysql_insertid'};
}
if ( !defined($species_id) ) {
die( sprintf( "No species_id for '%s'.", $db_name ) );
}
$db_sth->bind_param( 1, $species_id, SQL_INTEGER );
$db_sth->bind_param( 2, $db_type, SQL_VARCHAR );
$db_sth->bind_param( 3, $entry->{'db_release'}, SQL_INTEGER );
$db_sth->bind_param( 4, $entry->{'db_assembly'}, SQL_INTEGER );
$db_sth->bind_param( 5, $entry->{'db_suffix'}, SQL_VARCHAR );
$db_sth->bind_param( 6, $entry->{'db_host'}, SQL_VARCHAR );
printf( "Inserting database '%s_{%s}_%d_%d%s'... ",
$db_name, $db_type,
$entry->{'db_release'},
$entry->{'db_assembly'},
$entry->{'db_suffix'} );
$db_sth->execute();
if ( $db_sth->err() ) {
print("failed\n");
} else {
print("ok\n");
}
} ## end foreach my $db_type ( sort ...)
} ## end foreach my $db_name ( sort(...))
$dbh->disconnect();
#!/usr/bin/env perl
# This little script will bootstrap the master tables in the production
# database (and nothing else). This means copying the relevant tables
# from a known correct database (the "template database") into the
# master_% tables in the production database.
#
# The template database needs to live on the same server as the
# production database.
#
# The output of this script is SQL written to standard output. The SQL
# needs to be run against the MySQL server manually.
#
use strict;
use warnings;
use Carp;
use DBI (':sql_types');
my $template_db = $ARGV[0];
if ( !defined($template_db) ) {
print STDERR <<END_USAGE;
Usage:
$0 template_database_name >output_file.sql
"template_database_name" should be the name of a Core database.
END_USAGE
exit;
}
my $dbport = '3306';
my $dbuser = 'ensro';
my $dbpass = '';
#-----------------------------------------------------------------------
# The "simple" tables.
{
my @simple_tables =
( 'attrib_type', 'external_db', 'misc_set', 'unmapped_reason' );
foreach my $table (@simple_tables) {
print(
qq(
DROP TABLE IF EXISTS master_${table};
CREATE TABLE master_${table}
LIKE ${template_db}.${table};
INSERT INTO master_${table}
SELECT *
FROM ${template_db}.${table};
) );
}
}
-- Schema of tables not added by the bootstrap_master.pl script.
-- NB: Additional tables are added by the web team to support storing
-- declarations of intentions etc. Those tables are not even mentioned
-- here.
-- The 'species' table.
-- Lists the species for which there is a Core database.
CREATE TABLE species (
species_id INTEGER UNSIGNED NOT NULL AUTO_INCREMENT,
db_name VARCHAR(32) NOT NULL, -- Name used in database names.
common_name VARCHAR(32) NOT NULL, -- What we often refer to it as.
web_name VARCHAR(32) NOT NULL, -- Name that the web site is using.
is_current BOOLEAN NOT NULL DEFAULT true,
PRIMARY KEY (species_id),
UNIQUE INDEX db_name_idx (db_name)
);
-- The 'db' table.
-- This table contains all species-specific databases for this release.
CREATE TABLE db (
db_id INTEGER UNSIGNED NOT NULL AUTO_INCREMENT,
species_id INTEGER UNSIGNED NOT NULL, -- FK into 'species'.
db_type ENUM('cdna', 'core', 'coreexpressionatlas',
'coreexpressionest', 'coreexpressiongnf',
'funcgen', 'otherfeatures', 'variation', 'vega')
NOT NULL DEFAULT 'core',
db_release INTEGER NOT NULL,
db_assembly INTEGER NOT NULL,
db_suffix CHAR(1) DEFAULT '',
db_host VARCHAR(32) DEFAULT NULL,
PRIMARY KEY (db_id),
UNIQUE INDEX species_release_idx (species_id, db_type, db_release)
);
-- The 'biotype' table.
-- Contains all the valid biotypes used for genes and transcripts.
CREATE TABLE biotype (
biotype_id INTEGER UNSIGNED NOT NULL AUTO_INCREMENT,
name VARCHAR(64) NOT NULL,
is_current BOOLEAN NOT NULL DEFAULT true,
is_dumped BOOLEAN NOT NULL DEFAULT true,
object_type ENUM('gene', 'transcript') NOT NULL DEFAULT 'gene',
db_type SET('cdna', 'core', 'coreexpressionatlas',
'coreexpressionest', 'coreexpressiongnf', 'funcgen',
'otherfeatures', 'variation', 'vega')
NOT NULL DEFAULT 'core',
description TEXT,
PRIMARY KEY (biotype_id),
UNIQUE INDEX name_type_idx (name, object_type, db_type)
);
-- The 'meta_key' table.
-- Contains the meta keys that may or must be available in the 'meta'
-- table in the Core databases.
CREATE TABLE meta_key (
meta_key_id INTEGER UNSIGNED NOT NULL AUTO_INCREMENT,
name VARCHAR(64) NOT NULL,
is_optional BOOLEAN NOT NULL DEFAULT false,
is_current BOOLEAN NOT NULL DEFAULT true,
db_type SET('cdna', 'core', 'funcgen', 'otherfeatures',
'variation', 'vega') NOT NULL DEFAULT 'core',
only_for_species TEXT,
description TEXT,
PRIMARY KEY (meta_key_id),
UNIQUE INDEX name_type_idx (name, db_type)
);
-- The 'analysis_description' table.
-- TODO: ANY DATA FOUND IN THIS TABLE IS NOT YET "REAL".
-- DEVELOPMENT IS STILL UNDERWAY.
-- Contains the analysis logic name along with the data that should
-- be available in the 'analysis_description' table, except for the
-- 'web_data' column.
CREATE TABLE analysis_description (
analysis_description_id INTEGER UNSIGNED NOT NULL AUTO_INCREMENT,
analysis_id INTEGER UNSIGNED NOT NULL,
logic_name VARCHAR(128) NOT NULL,
description TEXT,
display_label VARCHAR(256) NOT NULL,
displayable BOOLEAN,
PRIMARY KEY (analysis_description_id),
UNIQUE INDEX analysis_id_idx (analysis_id),
UNIQUE INDEX logic_name_idx (logic_name)
);
-- The 'web_data' table.
-- TODO: ANY DATA FOUND IN THIS TABLE IS NOT YET "REAL".
-- DEVELOPMENT IS STILL UNDERWAY.
-- Contains the data for the 'web_data' column in the
-- 'analysis_description' table.
-- The 'web_data' is a hash and we store this as key-value pairs
-- ('hash_key' and 'hash_value'). The 'hash_key' might contain double
-- colons ('::') to distinguish sub-hash keys, e.g. 'default::MultiTop'.
CREATE TABLE web_data (
web_data_id INTEGER UNSIGNED NOT NULL AUTO_INCREMENT,
hash_key VARCHAR(32) NOT NULL,
hash_value VARCHAR(128),
PRIMARY KEY (web_data_id)
);
-- The 'analysis_web_data' table.
-- TODO: ANY DATA FOUND IN THIS TABLE IS NOT YET "REAL".
-- DEVELOPMENT IS STILL UNDERWAY.
-- This table connects the 'analysis_description' table with the
-- 'web_data' and 'db' tables.
CREATE TABLE analysis_web_data (
analysis_description_id INTEGER UNSIGNED NOT NULL,
web_data_id INTEGER UNSIGNED NOT NULL,
db_id INTEGER UNSIGNED NOT NULL,
UNIQUE KEY analysis_web_data_db_idx
(analysis_description_id, web_data_id, db_id)
);
-- VIEWS
CREATE VIEW db_list AS
SELECT CONCAT(
CONCAT_WS('_', db_name, db_type, db_release, db_assembly),
db_suffix) AS full_db_name
FROM species
JOIN db USING (species_id);
-- CREATE VIEW readable_web_data AS
-- SELECT CONCAT('{',
-- GROUP_CONCAT(data SEPARATOR ','),
-- '}') AS web_data
-- FROM analysis_web_data awd
-- JOIN web_data wd USING (web_data_id)
-- GROUP BY
package XrefMapper::culex_quinquefasciatus;
use XrefMapper::BasicMapper;
use XrefMapper::VBCoordinateMapper;
use vars '@ISA';
@ISA = qw{ XrefMapper::BasicMapper };
sub get_set_lists {
return [["ExonerateGappedBest1_culex", ["culex_pipiens","*"]]];
}
# transcript, gene display_xrefs can use defaults
# since anopheles_symbol is "before" Uniprot
#Reverse order: the latest one is the list has higher precedence!
sub gene_description_sources {
return (
"VB_External_Description",
"VB_RNA_Description",
"Uniprot/SWISSPROT",
"VB_Community_Annotation"
);
}
sub transcript_display_xref_sources {
my @list = qw(RFAM
miRBase
Uniprot/SWISSPROT
VB_Community_Annotation
);
my %ignore;
return [\@list,\%ignore];
}
# regexps to match any descriptons we want to filter out
sub gene_description_filter_regexps {
return ();
}
sub no_source_label_list{
my $self = shift;
my @list;
print "Using no_source_label_list :-)\n";
#foreach my $ex (qw("VB RNA Description" "VB External Description")){
# $list{$ex} = 1;
#}
push @list,"VB_RNA_Description";
push @list,"VB_External_Description";
return \@list;
}
1;
package XrefParser::IKMCParser;
use strict;
use LWP::UserAgent;
use base qw( XrefParser::BaseParser );
sub new {
my $proto = shift;
my $class = ref $proto || $proto;
my $self = bless {}, $class;
return $self;
}
sub run_script {
my $self = shift if (defined(caller(1)));
my $file = shift;
my $source_id = shift;
my $species_id = shift;
my $verbose = shift;
my ($type, $my_args) = split(/:/,$file);
my %type2id;
foreach my $t ("No products available yet", "Vector available", "ES cells available", "Mice available"){
my $ikmc = "IKMC_".$t;
$ikmc =~ s/ /_/g;
$type2id{$t} = XrefParser::BaseParser->get_source_id_for_source_name($ikmc);
# print $ikmc."\t".$type2id{$t}."\n";
if(!defined( $type2id{$t})){
die "Could not get source id for $ikmc\n";
}
}
my $xml = (<<XXML);
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE Query>
<Query virtualSchemaName = "default" formatter = "TSV" header = "0" uniqueRows = "1" count = "" datasetConfigVersion = "0.6" >
<Dataset name = "dcc" interface = "default" >
<Attribute name = "mgi_accession_id" />
<Attribute name = "marker_symbol" />
<Attribute name = "vector_available" />
<Attribute name = "escell_available" />
<Attribute name = "mouse_available" />
<Attribute name = "ensembl_gene_id" />
</Dataset>
</Query>
XXML
# print $xml."\nYO\n";
my %symbols;
my %ensembl_ids;
my %status;
my $path="http://www.i-dcc.org/biomart/martservice?";
my $request = HTTP::Request->new("POST",$path,HTTP::Headers->new(),'query='.$xml."\n");
my $ua = LWP::UserAgent->new;
my $response;
# print "getting data from url\n";
my $line_count=0;
my $old_data="";
my $chunks = 0;
my $before;
$ua->request($request,
sub{
my($data, $response) = @_;
if ($response->is_success) {
chomp $data;
if($data =~ /^MGI:/ and $chunks){
$old_data .= "\n";
}
my $data_line= $old_data.$data;
my @lines = split(/\n/,$data_line);
if(length($lines[-1]) == 0){
pop @lines;
}
$old_data = "";
my $count=0;
$chunks++;
my $max= scalar(@lines);
foreach my $entry (@lines){
$count++;
my @fields = split(/\t/,$entry);
next if (!length($entry));
if($count == $max){ # possible incomplete line
$old_data = $entry;
next;
}
elsif($count > $max){
die "What the celery is going on here";
}
else{
$line_count++;
my $mgi_id = $fields[0];
if(!($mgi_id =~ /MGI:/)){
print "PROB1:$data_line\n";
print "PROB2:".join(', ',@fields)."\n";
}
$symbols{$mgi_id}=$fields[1];
$ensembl_ids{$mgi_id}=$fields[5];
$status{$mgi_id} = 1 if ($status{$mgi_id} eq '');
if ($status{$mgi_id} < 4 && $fields[4] == 1){
$status{$mgi_id} = 4;
}
elsif ($status{$mgi_id} < 3 && $fields[3] == 1){
$status{$mgi_id} = 3;
}
elsif ($status{$mgi_id} < 2 && $fields[2] == 1){
$status{$mgi_id} = 2;# print "$data";
}
}
}
}
else {
warn ("Problems with the web server: ".$response->status_line);
return 1;
}
},1000);
# print "Number of chunks is $chunks\n";
if($old_data){
my @fields = split(/\t/,$old_data);
$line_count++;
# chop $line[5];
my $mgi_id = $fields[0];
if(!($mgi_id =~ /MGI:/)){
print "PROB3:$old_data\n";
print "PROB4:".join(', ',@fields)."\n";
}
$symbols{$mgi_id}=$fields[1];
$ensembl_ids{$mgi_id}=$fields[5];
$status{$mgi_id} = 1 if ($status{$mgi_id} eq '');
if ($status{$mgi_id} < 4 && $fields[4] == 1){
$status{$mgi_id} = 4;
}
elsif ($status{$mgi_id} < 3 && $fields[3] == 1){
$status{$mgi_id} = 3;
}
elsif ($status{$mgi_id} < 2 && $fields[2] == 1){
$status{$mgi_id} = 2;# print "$data";
}
}
# print "obtained $line_count lines\n";
my $parsed_count = 0;
my $direct_count = 0;
foreach my $acc (keys %symbols){
my $source_id;
$source_id = $type2id{'No products available yet'} if $status{$acc} == 1;
$source_id = $type2id{'Vector available'} if $status{$acc} == 2;
$source_id = $type2id{'ES cells available'} if $status{$acc} == 3;
$source_id = $type2id{'Mice available'} if $status{$acc} == 4;
my $label = $symbols{$acc} || $acc;
my $ensembl_id = $ensembl_ids{$acc};
# print OUT "$acc\t$symbols{$acc}\t$description\t$ensembl_ids{$acc}\n";
my $type = 'gene';
++$parsed_count;
my $xref_id =
XrefParser::BaseParser->get_xref( $acc, $source_id, $species_id );
if ( !defined($xref_id) || $xref_id eq '' ) {
$xref_id =
XrefParser::BaseParser->add_xref(
$acc, undef, $label,
'', $source_id, $species_id, "DIRECT"
);
}
next if(!defined($ensembl_ids{$acc}));
$direct_count++;
XrefParser::BaseParser->add_direct_xref( $xref_id, $ensembl_id,
$type, $acc );
}
printf( "%d xrefs succesfully parsed and %d direct xrefs added\n", $parsed_count, $direct_count );
return 0;
} ## end sub run
1;
=head1 LICENSE
Copyright (c) 1999-2010 The European Bioinformatics Institute and
Genome Research Limited. All rights reserved.
This software is distributed under a modified Apache license.
For license details, please see
http://www.ensembl.org/info/about/code_licence.html
=head1 CONTACT
Please email comments or questions to the public Ensembl
developers list at <dev@ensembl.org>.
Questions may also be sent to the Ensembl help desk at
<helpdesk@ensembl.org>.
=cut
=head1 NAME
Bio::EnsEMBL::ApiVersion
=head1 SYNOPSIS
use Bio::EnsEMBL::ApiVersion;
printf( "The API version used is %s\n", software_version() );
=head1 DESCRIPTION
The module exports the software_version() subroutine which returns the
release version of the Ensembl Core API.
=cut
package Bio::EnsEMBL::ApiVersion;
use strict;
use warnings;
use Exporter;
use base qw( Exporter );
our @EXPORT = qw( software_version );
my $API_VERSION = 60;
sub software_version { return $API_VERSION }
1;
This diff is collapsed.
=head1 LICENSE
Copyright (c) 1999-2010 The European Bioinformatics Institute and
Genome Research Limited. All rights reserved.
This software is distributed under a modified Apache license.
For license details, please see
http://www.ensembl.org/info/about/code_licence.html
=head1 CONTACT
Please email comments or questions to the public Ensembl
developers list at <dev@ensembl.org>.
Questions may also be sent to the Ensembl help desk at
<helpdesk@ensembl.org>.
=cut
=head1 NAME
Bio::EnsEMBL::OntologyXref
=head1 DESCRIPTION
This class extends the DBEntry in order to associate Evidence Tags
to the relationship between EnsEMBL objects and ontology accessions
(primarily GO accessions).
The relationship to GO that is stored in the database is actually
derived through the relationship of EnsEMBL peptides to SwissProt
peptides, i.e. the relationship is derived like this:
ENSP -> SWISSPROT -> GO
And the evidence tag describes the relationship between the SwissProt
Peptide and the GO entry.
In reality, however, we store this in the database like this:
ENSP -> SWISSPROT
ENSP -> GO
and the evidence tag hangs off of the relationship between the ENSP and
the GO identifier. Some ENSPs are associated with multiple closely
related Swissprot entries which may both be associated with the same GO
identifier but with different evidence tags. For this reason a single
'OntologyXref' can have multiple evidence tags.
=head1 SYNOPSIS
my $ontology_xref = Bio::EnsEMBL::OntologyXref->new();
$ontology_xref->add_linkage_type('IEA');
foreach my $evtag ( @{ $ontology_xref->get_all_linkage_types() } ) {
print "$evtag\n";
}
=head1 METHODS
=cut
package Bio::EnsEMBL::OntologyXref;
use strict;
use base qw( Bio::EnsEMBL::DBEntry );
=head2 add_linkage_type
Arg [1] : string $value
allowed values:
'IC', 'IDA', 'IEA', 'IEP', 'IGI', 'IMP', 'IPI',
'ISS', NAS', 'ND', 'TAS', 'NR', 'RCA'
Arg [2] : (optional) Bio::EnsEMBL::DBEntry $source
Example : $ontology_xref->add_linkage_type('IGI');
Description: Associates a linkage type and source DBEntry with
this ontology_xref
Returntype : integer; number of linkages
Exceptions : thrown if $linkage_type argument not supplied or
the optional DBEntry is not a DBEntry object.
Caller : DBEntryAdaptor
Status : Experimantal
=cut
sub add_linkage_type {
my ( $self, $lt, $source_dbentry ) = @_;
if ( !defined($lt) ) {
$self->throw("linkage type argument required");
}
if ( defined($source_dbentry)
&& !$source_dbentry->isa('Bio::EnsEMBL::DBEntry') )
{
$self->throw("source_dbentry must be a Bio::EnsEMBL::DBEntry");
}
$self->{'linkage_types'} ||= [];
push @{ $self->{'linkage_types'} },
[ $lt, ( $source_dbentry || () ) ];
}
=head2 get_all_linkage_info
Arg [1] : none
Example :
foreach ( @{ $ontology_xref->get_all_linkage_info() } ) {
print "evidence: $_->[0] via $_->[1]->display_id";
}
Description: Retrieves a list of evidence-tag/source-DBEntry pairs
associated with this ontology_xref
Returntype : listref of listrefs
Exceptions : none
Caller : geneview? general.
Status : Experimental
=cut
sub get_all_linkage_info {
my ($self) = @_;
return $self->{'linkage_types'} || [];
}
=head2 get_all_linkage_types
Arg [1] : none
Example :
print( join( ' ', @{ $ontology_xref->get_all_linkage_types() } ),
"\n" );
Description: Retrieves a unique list of evidence tags associated with
this ontology_xref
Returntype : none
Exceptions : none
Caller : geneview? general
Status : Stable
=cut
sub get_all_linkage_types {
my ($self) = @_;
my %seen;
return [ grep { !$seen{$_}++ }
map { $_->[0] } @{ $self->{'linkage_types'} } ];
#return [ map{ $_->[0]} @{ $self->{'linkage_types'} || [] } ];
}
=head2 flush_linkage_types
Arg [1] : none
Example : $ontology_xref->flush_linkage_types();
Description: Removes any associated evidence tags
Returntype : none
Exceptions : none
Caller : general
Status : Stable
=cut
sub flush_linkage_types {
my ($self) = @_;
$self->{'linkage_types'} = [];
}
1;
CREATE TABLE `ontology_xref` (
`object_xref_id` int(10) unsigned NOT NULL default '0',
`linkage_type` enum('IC','IDA','IEA','IEP','IGI','IMP','IPI','ISS','NAS','ND','TAS','NR','RCA') NOT NULL,
`source_xref_id` int(10) unsigned default NULL,
UNIQUE KEY `object_xref_id_2` (`object_xref_id`,`source_xref_id`,`linkage_type`),
KEY `object_xref_id` (`object_xref_id`),
KEY `source_xref_id` (`source_xref_id`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
22988 IC \N
22988 IEA \N
22988 IC 152200
22989 IEA \N
22990 IC \N
22991 IEA \N
22992 IC \N
22993 IEA \N
22994 IC \N
41176 IEA \N
41177 IDA \N
41178 IEA \N
41179 IC \N
41180 IEA \N
41181 IEA \N
41182 IDA \N
41183 IC \N
83268 IEA \N
83269 IEA \N
83270 IDA \N
149843 IDA \N
151219 IDA \N
151220 IC \N
151221 IDA \N
151222 IC \N
151223 IEA \N
151224 IC \N
179294 IDA \N
179295 IEA \N
179296 IC \N
191222 IEA \N
191223 IC \N
191224 IC \N
191225 IC \N
191226 IDA \N
191227 IDA \N
191244 IEA \N
191245 IDA \N
191246 IC \N
229064 IDA \N
229065 IC \N
229077 IDA \N
229078 IC \N
238791 IC \N
238792 IDA \N
238793 IDA \N
238794 IC \N
238795 IDA \N
238796 IEA \N
238797 IDA \N
# patch_59_60_a.sql
#
# Title: Update schema version.
#
# Description:
# Update schema_version in meta table to 60.
UPDATE meta SET meta_value='60' WHERE meta_key='schema_version';
# Patch identifier
INSERT INTO meta (species_id, meta_key, meta_value)
VALUES (NULL, 'patch', 'patch_59_60_a.sql|schema_version');
# patch_59_60_b.sql
#
# Title:
# Rename 'go_xref' table to 'ontology_xref'.
#
# Description:
# Rename the 'go_xref' table to make its use more generic.
# Rename the table, and swap the source_xref_id and linkage_type fields.
ALTER TABLE go_xref
RENAME TO ontology_xref,
MODIFY COLUMN source_xref_id INT(10) UNSIGNED DEFAULT NULL
AFTER object_xref_id;
# Optimize the table, because indexes may be out of whack
OPTIMIZE TABLE ontology_xref;
# Insert patch identifier.
INSERT INTO meta (species_id, meta_key, meta_value)
VALUES (NULL, 'patch', 'patch_59_60_b.sql|rename_go_xref_table');
# patch_59_60_c.sql
#
# Title:
# A patch to fix a couple of inconsistencies in the schema.
#
# Description:
# QC turned up issues with the signedness of a number of fields in
# the Ensembl Core schema. This patch fixes these. The fields
# are: karyotype.seq_region_start, karyotype.seq_region_end and
# seq_region.length (should all be UNSIGNED).
# Make the the 'seq_region_start' and 'seq_region_end' fields of the
# 'karyotype' table UNSIGNED (like they are everywhere else).
ALTER TABLE karyotype
MODIFY COLUMN seq_region_start INT(10) UNSIGNED NOT NULL,
MODIFY COLUMN seq_region_end INT(10) UNSIGNED NOT NULL;
# Make 'seq_region.length' UNSIGNED (we do not like negative lengths).
ALTER TABLE seq_region
MODIFY COLUMN length INT(10) UNSIGNED NOT NULL;
# Insert patch identifier.
INSERT INTO meta (species_id, meta_key, meta_value)
VALUES (NULL, 'patch', 'patch_59_60_c.sql|QC_fixes');
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment