Commit 93ad6e79 authored by Wojtek Bazant's avatar Wojtek Bazant
Browse files

XrefParser::Database uses DBConnection

+ XrefParser::Database stores a DBConnection, setters delegate to it
+ New test database, for xrefs
+ Setup to test XrefParsers
+ Tests for WormbaseDirectParser
parent c0d38d06
......@@ -23,9 +23,10 @@ use strict;
use warnings;
use Carp;
use DBI;
use Bio::EnsEMBL::DBSQL::DBConnection;
use File::Spec::Functions;
use IO::File;
use English;
sub new
{
......@@ -33,12 +34,15 @@ sub new
my $class = ref $proto || $proto;
my $self = bless {}, $class;
$self->host($arg_ref->{host});
$self->dbname($arg_ref->{dbname});
$self->user($arg_ref->{user});
$self->pass($arg_ref->{pass} || '');
$self->port($arg_ref->{port} || '3306');
$self->dbc(ref $arg_ref eq 'Bio::EnsEMBL::DBSQL::DBConnection'
? $arg_ref : Bio::EnsEMBL::DBSQL::DBConnection->new(
-HOST => $arg_ref->{host},
-DBNAME => $arg_ref->{dbname},
-USER => $arg_ref->{user},
-PASS => $arg_ref->{pass} || '',
-PORT => $arg_ref->{port} || '3306'
));
$self->verbose($arg_ref->{verbose});
return $self;
......@@ -52,44 +56,40 @@ sub verbose {
return $self->{_verbose};
}
sub host {
sub dbc {
my ($self, $arg) = @_;
(defined $arg) &&
($self->{_host} = $arg );
return $self->{_host};
($self->{_dbc} = $arg );
return $self->{_dbc};
}
sub host {
my ($self, $arg) = @_;
$self->dbc->host($arg) if defined $arg;
return $self->dbc->host;
}
sub dbname {
my ($self, $arg) = @_;
(defined $arg) &&
($self->{_dbname} = $arg );
return $self->{_dbname};
$self->dbc->dbname($arg) if defined $arg;
return $self->dbc->dbname;
}
sub user {
my ($self, $arg) = @_;
(defined $arg) &&
($self->{_user} = $arg );
return $self->{_user};
$self->dbc->user($arg) if defined $arg;
return $self->dbc->user;
}
sub pass {
my ($self, $arg) = @_;
(defined $arg) &&
($self->{_pass} = $arg );
return $self->{_pass};
$self->dbc->pass($arg) if defined $arg;
return $self->dbc->pass;
}
sub port {
my ($self, $arg) = @_;
(defined $arg) &&
($self->{_port} = $arg );
return $self->{_port};
$self->dbc->port($arg) if defined $arg;
return $self->dbc->port;
}
sub dbi
......@@ -117,16 +117,18 @@ sub dbi
sub create {
my ($self, $sql_dir, $force, $drop_db) = @_;
my $user = $self->user;
my $dbname = $self->dbname;
my $host = $self->host;
my $pass = $self->pass;
my $port = $self->port;
my $dbh = DBI->connect( "DBI:mysql:host=$host:port=$port", $user, $pass,
{'RaiseError' => 1});
$self->recreate_database($force,$drop_db);
$self->populate($sql_dir, $force);
}
sub populate {
my ($self, $sql_dir, $force) = @_;
my $table_file = catfile( $sql_dir, 'sql', 'table.sql' );
my $metadata_file = $self->prepare_metadata_file($sql_dir, $force);
$self->populate_with_file($table_file);
$self->populate_with_file($metadata_file);
}
sub prepare_metadata_file {
my ($self, $sql_dir, $force) = @_;
my $metadata_file =
catfile( $sql_dir, 'sql', 'populate_metadata.sql' );
my $ini_file = catfile( $sql_dir, 'xref_config.ini' );
......@@ -174,7 +176,17 @@ sub create {
}
} ## end if ( !defined($meta_tm...
return $metadata_file;
}
sub recreate_database {
my ($self,$force, $drop_db) = @_;
my $user = $self->user;
my $dbname = $self->dbname;
my $host = $self->host;
my $pass = $self->pass;
my $port = $self->port;
my $dbh = DBI->connect( "DBI:mysql:host=$host:port=$port", $user, $pass,
{'RaiseError' => 1});
# check to see if the database already exists
my %dbs = map {$_->[0] => 1} @{$dbh->selectall_arrayref('SHOW DATABASES')};
......@@ -206,30 +218,18 @@ sub create {
}
$dbh->do( 'CREATE DATABASE ' . $dbname );
}
my $table_file = catfile( $sql_dir, 'sql', 'table.sql' );
printf( "Creating %s from %s\n", $dbname, $table_file ) if($self->verbose);
if ( !-e $table_file ) {
croak( "Cannot open " . $table_file );
}
my $cmd =
"mysql -u $user -p'$pass' -P $port -h $host $dbname < $table_file";
system($cmd) == 0
or croak("Cannot execute the following command (exit $?):\n$cmd\n");
printf( "Populating metadata in %s from %s\n",
$dbname, $metadata_file ) if($self->verbose);
if ( !-e $metadata_file ) {
croak( "Cannot open " . $metadata_file );
sub populate_with_file {
my ($self, $sql_file) = @_;
my $previous_input_record_separator = $INPUT_RECORD_SEPARATOR;
$INPUT_RECORD_SEPARATOR = ";";
open(my $sql_fh, "<", $sql_file) or die $sql_file;
while(<$sql_fh>){
s/#(.*?)\n//g;
next if /^\s+$/;
$self->dbc->do($_);
}
$cmd = "mysql -u $user -p'$pass' -P $port -h $host "
. "$dbname < $metadata_file";
system($cmd) == 0
or croak("Cannot execute the following command (exit $?):\n$cmd\n");
return;
$INPUT_RECORD_SEPARATOR = $previous_input_record_separator;
}
1;
......@@ -11,6 +11,7 @@
'empty' => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
'patch' => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
'variation' => 'Bio::EnsEMBL::Variation::DBSQL::DBAdaptor',
'xref' => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
},
'mus_musculus' => {
'core' => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
......
=head1 LICENSE
Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Copyright [2016-2018] EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
=cut
use strict;
use warnings;
use File::Spec;
use File::Basename qw/dirname/;
use File::Temp qw/tempdir/;
use Test::More;
use Test::Warnings;
use Bio::EnsEMBL::Test::MultiTestDB;
use Bio::EnsEMBL::Test::TestUtils;
use XrefParser::Database;
my $multi = Bio::EnsEMBL::Test::MultiTestDB->new;
my $dba = $multi->get_DBAdaptor("xref");
plan skip_all => "xref database schema is mysql specific - won't work with a different driver"
unless $dba->dbc->driver eq 'mysql';
my $database = XrefParser::Database->new( $dba->dbc);
$database->populate(
File::Spec->catdir($multi->curr_dir, "../../misc-scripts/xref_mapping"),
"with force please",
);
my %xref_tables_expected_empty_by_default = (
checksum_xref=>0,
coordinate_xref=>0,
dependent_xref=>0,
gene_direct_xref=>0,
go_xref=>0,
identity_xref=>0,
object_xref=>0,
primary_xref=>0,
transcript_direct_xref=>0,
translation_direct_xref=>0,
xref=>0,
);
my $tmp_dir = tempdir(CLEANUP=>1);
sub store_in_temporary_file {
my $path = "$tmp_dir/tmp";
open(my $fh, ">", $path) or die $path;
print $fh @_;
close($fh);
return $path;
}
sub test_parser {
my ($parser, $content, $source_id, $expected, $test_name) = @_;
require_ok($parser);
$parser->new($database)->run({
files => [store_in_temporary_file($content)],
source_id => $source_id,
species_id => 1 #Happens to be right, but doesn't matter anyway - we are not testing the mapping
});
my $expected_table_counts = {%xref_tables_expected_empty_by_default, %$expected};
subtest "$parser $test_name" => sub {
plan tests => scalar(keys %$expected_table_counts);
for my $table (keys %$expected_table_counts){
my $actual_count = count_rows($dba, $table);
$dba->dbc->prepare("delete from $table;")->execute() if $actual_count;
my $expected_count = $expected_table_counts->{$table};
is($actual_count, $expected_count, "$table has $expected_count rows") or diag "$table has $actual_count rows";
}
}
}
test_parser("XrefParser::WormbaseDirectParser", "", "source_id (unused)", {}, "null case");
my $wormbase_celegans_xrefs_head= <<EOF;
//
// WormBase Caenorhabditis elegans XREFs for WS265
//
// Columns (tab separated) are:
// 1. WormBase Gene sequence name
// 2. WormBase Gene accession
// 3. WormBase Gene CGC name
// 4. WormBase Transcript sequence name
// 5. WormPep protein accession
// 6. INSDC parent sequence accession
// 7. INSDC locus_tag id
// 8. INSDC protein_id
// 9. UniProt accession
//
// Missing or not applicable data (e.g. protein identifiers for non-coding RNAs) is denoted by a "."
//
2L52.1 WBGene00007063 . 2L52.1b CE50569 BX284602 CELE_2L52.1 CTQ86426 A0A0K3AWR5
2L52.1 WBGene00007063 . 2L52.1a CE32090 BX284602 CELE_2L52.1 CCD61130 A4F336
2L52.2 WBGene00200402 . 2L52.2 . BX284602 CELE_2L52.2 . .
EOF
test_parser("XrefParser::WormbaseDirectParser", $wormbase_celegans_xrefs_head, "source_id (unused)", {
xref=>9,
gene_direct_xref => 6,
transcript_direct_xref => 3,
translation_direct_xref => 2,
}, "Direct xrefs: genes: count currently off due to some questionable duplicates, transcripts: as in column 4, translations: as in column 5. At least one direct xref per xref (but should be one to one)");
done_testing();
......@@ -2,7 +2,7 @@
ENSDIR="${ENSDIR:-$PWD}"
export PERL5LIB=$ENSDIR/bioperl-live:$ENSDIR/ensembl-test/modules:$PWD/modules:$ENSDIR/ensembl-variation/modules:$ENSDIR/ensembl-compara/modules
export PERL5LIB=$ENSDIR/bioperl-live:$ENSDIR/ensembl-test/modules:$PWD/modules:$ENSDIR/ensembl-variation/modules:$ENSDIR/ensembl-compara/modules:$PWD/misc-scripts/xref_mapping
export TEST_AUTHOR=$USER
if [ "$DB" = 'mysql' ]; then
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment