From d88a3586d34e15ac891489cf108bff1290a1060a Mon Sep 17 00:00:00 2001 From: Graham McVicker <mcvicker@sanger.ac.uk> Date: Tue, 5 Nov 2002 09:37:08 +0000 Subject: [PATCH] First pass implementation of MultiTestDB framework --- modules/t/MultiDB.conf | 13 ++ modules/t/MultiTestDB.pm | 380 +++++++++++++++++++++++++++++++++++++++ modules/t/analysis.t | 2 - modules/t/runtests.pl | 103 +++++++++++ 4 files changed, 496 insertions(+), 2 deletions(-) create mode 100644 modules/t/MultiDB.conf create mode 100644 modules/t/MultiTestDB.pm create mode 100755 modules/t/runtests.pl diff --git a/modules/t/MultiDB.conf b/modules/t/MultiDB.conf new file mode 100644 index 0000000000..14bc2b5043 --- /dev/null +++ b/modules/t/MultiDB.conf @@ -0,0 +1,13 @@ +{ + 'port' => 3306, + 'driver' => 'mysql', + 'user' => 'ensadmin', + 'pass' => 'ensembl', + 'host' => 'ecs1c', + 'zip' => 'multidb.zip', + + #add a line with the dbname and module + 'databases' => { 'core' => 'Bio::EnsEMBL::DBSQL::DBAdaptor', + 'lite' => 'Bio::EnsEMBL::Lite::DBAdaptor' } + +} diff --git a/modules/t/MultiTestDB.pm b/modules/t/MultiTestDB.pm new file mode 100644 index 0000000000..9a7584bb5f --- /dev/null +++ b/modules/t/MultiTestDB.pm @@ -0,0 +1,380 @@ + +=pod + +=head1 NAME - EnsTestDB + +=head1 SYNOPSIS + +=head1 DESCRIPTION + + +=head1 METHODS + +=cut + +package MultiTestDB; + +use vars qw(@ISA); +use strict; + +use DBI; +use Digest::MD5; +use Data::Dumper; + + +#homo sapiens is used if no species is specified +my $DEFAULT_SPECIES = 'Homo_sapiens'; + +#configuration file extension appended onto species name +my $FROZEN_CONF_EXT = '.MultiTestDB.frozen.conf'; + +my $CONF_FILE = 'MultiTestDB.conf'; + +my $DUMP_DIR = 'multi_test_dbs'; + + + + +sub new { + my( $pkg, $species ) = @_; + + my $self = bless {}, $pkg; + + unless($species) { + $species = $DEFAULT_SPECIES; + } + + $self->species($species); + + if($ENV{'HARNESS_ACTIVE'}) { + #databases are loaded already, read conf hash from file + $self->load_config($species); + } else { + #load the databases and generate the conf hash + $self->load_databases($species); + } + + #generate the db_adaptors from the $self->{'conf'} hash + $self->create_adaptors; + + return $self; +} + + + + +# +# load config into $self->{'conf'} hash +# +sub load_config { + my $self = shift; + + my $conf = $self->species . $FROZEN_CONF_EXT; + + eval { + $self->{'conf'} = do $conf; #reads file into $self->{'conf'} + }; + + if($@) { + die("Could not read frozen configuration file '$conf'\n"); + } +} + + + +# +# Store $self->{'config'} hash into a file +# +sub store_config { + my $self = shift; + + my $conf = $self->species . $FROZEN_CONF_EXT; + + local *FILE = open ">$conf" or die "Could not open config file '$conf'\n"; + + my $string = Dumper($self->{'conf'}); + + #strip of leading '$VAR1 = ' + $string =~ s/$[\$]VAR1\s*=//; + + #store config in file + print FILE $string; + + close FILE; +} + + + +#create a set of adaptors based on the $self->{'conf'} hash +sub create_adaptors { + my $self = shift; + + #establish a connection to each of the databases in the configuration + foreach my $dbtype (keys %{$self->{'conf'}}) { + print "Connecting to $dbtype\n"; + my $db = $self->{'conf'}->{$dbtype}; + + my $adaptor; + + #try to instantiate an adaptor for this database + eval { + require $db->{'module'}; + $adaptor = new $db->{'module'}('dbname' => $db->{'name'}, + 'user' => $db->{'user'}, + 'pass' => $db->{'pass'}, + 'port' => $db->{'port'}, + 'host' => $db->{'host'}, + 'driver' => $db->{'driver'}); + }; + + if ($@) { + warn("WARNING: Could not instantiate $dbtype DBAdaptor:\n$@"); + } else { + $self->{'db_adaptors'}->{$dbtype} = $adaptor; + } + } +} + + + + +sub load_databases { + my ($self, $species) = @_; + + #create database from conf and from zip files + my $db_conf = do $CONF_FILE; + + my $port = $db_conf->{'port'}; + my $driver = $db_conf->{'driver'}; + my $host = $db_conf->{'host'}; + my $pass = $db_conf->{'pass'}; + my $user = $db_conf->{'user'}; + my $zip = $db_conf->{'zip'}; + + #create a config hash which will be frozen to a file + $self->{'conf'} = {}; + + #unzip database files + unzip_test_dbs($zip); + + #connect to the database + my $locator = 'DBI:$driver:host=$host;port=$port'; + my $db = DBI->connect($locator, $user, $pass, {RaiseError => 1}); + + unless($db) { + die "Can't connect to database $locator"; + } + + #create a database for each database specified + foreach my $dbtype (keys %{$db_conf->{'databases'}}) { + #create a unique random dbname + my $dbname = $self->_create_db_name($species, $dbtype); + + + unless($db->do("CREATE DATABASE $dbname")) { + die("Could not create database [$dbname]"); + } + + #copy the general config into a dbtype specific config + $self->{'conf'}->{$dbtype} = {}; + %{$self->{'conf'}->{$dbtype}} = %$db_conf; + + #store the temporary database name in the dbtype specific config + $self->{'conf'}->{$dbtype}->{'dbname'} = $dbname; + + $db->do("use $dbname"); + + #load the database with data + my $dir = "$DUMP_DIR/species"; + local *DIR; + + opendir(DIR, $dir) or die "could not open dump directory '$dir'"; + + my @files = readdir DIR; + + local *FILE; + + #read in table creat statements from *.sql files and process them with DBI + foreach my $sql_file (grep /\.sql$/, @files) { + unless(-f $sql_file && -r $sql_file) { + warn("could not read SQL file '$sql_file'\n"); + next; + } + + FILE = open $sql_file; + my @file = <FILE>; + $db->do(join ' ', @file); + close FILE; + + #import data from the txt files of the same name + $sql_file =~ /.*\/(.*)\.sql/; + my $tablename = $1; + my $txt_file = s/\.sql$/\.txt/; + + unless(-f $txt_file && -r $txt_file) { + warn("could not read data file '$txt_file'\n"); + next; + } + + $db->do( "load data local infile '$txt_file' into table $tablename" ); + } + } + + closedir DIR; + + $db->disconnect; + + #freeze configuration in a file + $self->store_config; + + +} + + +sub unzip_test_dbs { + my ($self, $zipfile) = @_; + + if (-e $DUMP_DIR) { + warn "Test genome dbs already unpacked\n"; + return; + } + + unless($zipfile) { + $self->throw("zipfile argument is required\n"); + } + + unless(-f $zipfile) { + $self->throw("zipfile could not be found\n"); + } + + system ( "unzip $zipfile" ); +} + + + + +sub get_DBAdaptor { + my ($self, $type) = @_; + + unless($type) { + die('type arg must be specified\n'); + } + + return $self->{'db_adaptors'}->{$type}; +} + + +# convenience method: by calling it, you get the name of the database, +# which you can cut-n-paste into another window for doing some mysql +# stuff interactively +sub pause { + my ($self) = @_; + + print STDERR "pausing to inspect databases\n"; + foreach my $dbtype (keys %{$self->{'db_adaptors'}}) { + my $db_adaptor = $self->{'db_adaptors'}->{$dbtype}; + print STDERR " [$dbtype]\n"; + print STDERR " name=[".$db_adaptor->dbname."]\n"; + print STDERR " port=[".$db_adaptor->port."]\n"; + print STDERR " host=[".$db_adaptor->host."]\n"; + print STDERR " user=[".$db_adaptor->user."]\n"; + } + print STDERR "press ^D to continue\n"; + `cat `; +} + + + +sub species { + my ($self, $species) = @_; + + if($species) { + $self->{'species'} = $species; + } + + return $self->{'species'}; +} + + + +sub _create_db_name { + my( $self, $species, $dbtype ) = @_; + + my $rand = &Digest::MD5::md5_hex(rand()); + my $db_name = "_test_db_${species}_${dbtype}_${rand}"; + + return $db_name; +} + + + + +sub do_sql_file { + my( $self, @files ) = @_; + local *SQL; + my $i = 0; + my $dbh = $self->db_handle; + + my $comment_strip_warned=0; + + foreach my $file (@files) { + my $sql = ''; + open SQL, $file or die "Can't read SQL file '$file' : $!"; + while (<SQL>) { + # careful with stripping out comments; quoted text + # (e.g. aligments) may contain them. Just warn (once) and ignore + if ( /'[^']*#[^']*'/ + || /'[^']*--[^']*'/ ) { + if ( $comment_strip_warned++ ) { + # already warned + } else { + warn "#################################\n"; + warn "# found comment strings inside quoted string;" . + "not stripping, too complicated: $_\n"; + warn "# (continuing, assuming all these they are simply " . + "valid quoted strings)\n"; + warn "#################################\n"; + } + } else { + s/(#|--).*//; # Remove comments + } + next unless /\S/; # Skip lines which are all space + $sql .= $_; + $sql .= ' '; + } + close SQL; + + #Modified split statement, only semicolumns before end of line, + #so we can have them inside a string in the statement + #\s*\n, takes in account the case when there is space before the new line + foreach my $s (grep /\S/, split /;[ \t]*\n/, $sql) { + $s =~ s/\;\s*$//g; + $self->validate_sql($s); + $dbh->do($s); + $i++ + } + } + return $i; +} # do_sql_file + +sub validate_sql { + my ($self, $statement) = @_; + if ($statement =~ /insert/i) { + $statement =~ s/\n/ /g; #remove newlines + die ("INSERT should use explicit column names " . + "(-c switch in mysqldump)\n$statement\n") + unless ($statement =~ /insert.+into.*\(.+\).+values.*\(.+\)/i); + } +} + + + +sub DESTROY { + my( $self ) = @_; + +} + + + +1; + diff --git a/modules/t/analysis.t b/modules/t/analysis.t index d88b438103..21fbadef78 100644 --- a/modules/t/analysis.t +++ b/modules/t/analysis.t @@ -11,8 +11,6 @@ END {print "not ok 1\n" unless $loaded;} use EnsTestDB; use Bio::EnsEMBL::DBLoader; - - $loaded = 1; ok(1); diff --git a/modules/t/runtests.pl b/modules/t/runtests.pl new file mode 100755 index 0000000000..25da55a619 --- /dev/null +++ b/modules/t/runtests.pl @@ -0,0 +1,103 @@ +#!/usr/local/bin/perl -w + +use strict; + +use Getopt::Std; +use Test::Harness; + +use vars qw($opt_l $opt_h); + +#read command line options +&usage unless getopts('lh'); + + +#print usage on '-h' command line option +if($opt_h) { + &usage; + exit; +} + +#list test files on '-l' command line option +if($opt_l) { + foreach my $file (@{&get_all_tests('.', \@ARGV )}) { + print "$file\n"; + } + exit; +} + +#run all of the specified tests +runtests(@{&get_all_tests('.', \@ARGV)}); + + + +=head2 get_all_tests + + Arg [1] : string $dir + the name of the directory retrieve a list of tests from + Arg [2] : (optional) listref $input_files + testfiles or directories to retrieve. If not specified all + ".t" files in $dir are taken. + Example : @test_files = read_test_dir('t'); + Description: Returns a list of testfiles in the directories specified by + the @tests argument. The relative path is given as well as + with the testnames returned. Only files ending with .t are + returned. Subdirectories are recursively entered and the test + files returned within them are returned as well. + Returntype : listref of strings. + Exceptions : none + Caller : general + +=cut + +sub get_all_tests { + my ($dir, $input_files) = @_; + + my @files; + my @out = (); + local *DIR; + + unless(opendir(DIR, $dir)) { + warn("WARNING: cannot open directory $dir\n"); + return []; + } + + if($input_files && @$input_files) { + #input files were specified so use them + @files = @$input_files; + } else { + #otherwise use every file in the directory + @files = readdir DIR; + } + + #filter out CVS files, files beginning with '.' and files ending in ~ + @files = grep !/(^\.)|(^CVS$)|(~$)/, @files; + + foreach my $file (@files) { + $file = "$dir/$file"; + + if(-d $file) { + #do a recursive call on directories + push @out, @{get_all_tests("$file")}; + } elsif ($file =~ /\.t$/) { + #files ending with a '.t' are considered test files + unless(-r $file && -f $file) { + warn("WARNING: cannot read test file $file\n"); + } + push @out, $file; + } + } + + closedir DIR; + + return \@out; +} + + + +sub usage { + print "usage:\n"; + print "\tlist tests: run_tests.pl -l [<testfiles or dirs> ...]\n"; + print "\trun tests: run_tests.pl [<testfiles or dirs> ...]\n"; +} + + -- GitLab