First pass implementation of MultiTestDB framework

d88a3586 · Graham McVicker · c3b81139 · d88a3586 · d88a3586 · d88a3586
Commit d88a3586 authored 22 years ago by Graham McVicker
--- a/modules/t/MultiDB.conf
+++ b/modules/t/MultiDB.conf
+{ 
+ 'port'   => 3306,
+ 'driver' => 'mysql',
+ 'user'   => 'ensadmin',
+ 'pass'   => 'ensembl',
+ 'host'   => 'ecs1c',
+ 'zip'    => 'multidb.zip',
+  #add a line with the dbname and module
+  'databases'   => { 'core' => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
+		     'lite' => 'Bio::EnsEMBL::Lite::DBAdaptor' }
+}
--- a/modules/t/MultiTestDB.pm
+++ b/modules/t/MultiTestDB.pm
+=pod
+=head1 NAME - EnsTestDB
+=head1 SYNOPSIS
+=head1 DESCRIPTION
+=head1 METHODS
+=cut
+package MultiTestDB;
+use vars qw(@ISA);
+use strict;
+use DBI;
+use Digest::MD5;
+use Data::Dumper;
+#homo sapiens is used if no species is specified
+my $DEFAULT_SPECIES  = 'Homo_sapiens';
+#configuration file extension appended onto species name
+my $FROZEN_CONF_EXT  = '.MultiTestDB.frozen.conf';
+my $CONF_FILE    = 'MultiTestDB.conf';
+my $DUMP_DIR = 'multi_test_dbs';
+sub new {
+  my( $pkg, $species ) = @_;
+  my $self = bless {}, $pkg;
+  unless($species) {
+    $species = $DEFAULT_SPECIES;
+  }
+  $self->species($species);
+  if($ENV{'HARNESS_ACTIVE'}) {
+    #databases are loaded already, read conf hash from file
+    $self->load_config($species);
+  } else {
+    #load the databases and generate the conf hash
+    $self->load_databases($species);
+  }
+  #generate the db_adaptors from the $self->{'conf'} hash
+  $self->create_adaptors;
+  return $self;
+}
+#
+# load config into $self->{'conf'} hash
+#
+sub load_config {
+  my $self = shift;
+  my $conf = $self->species . $FROZEN_CONF_EXT;
+  eval {
+    $self->{'conf'} = do $conf; #reads file into $self->{'conf'}
+  };
+  if($@) {
+    die("Could not read frozen configuration file '$conf'\n");
+  }
+}
+#
+# Store $self->{'config'} hash into a file
+#
+sub store_config {
+  my $self = shift;
+  my $conf = $self->species . $FROZEN_CONF_EXT;
+  local *FILE = open ">$conf" or die "Could not open config file '$conf'\n";
+  my $string = Dumper($self->{'conf'});
+  #strip of leading '$VAR1 = '
+  $string =~ s/$[\$]VAR1\s*=//;
+  #store config in file
+  print FILE $string;
+  close FILE;
+}
+#create a set of adaptors based on the $self->{'conf'} hash
+sub create_adaptors {
+  my $self = shift;
+  #establish a connection to each of the databases in the configuration
+  foreach my $dbtype (keys %{$self->{'conf'}}) {
+    print "Connecting to $dbtype\n";
+    my $db = $self->{'conf'}->{$dbtype};
+    my $adaptor;
+    #try to instantiate an adaptor for this database 
+    eval {
+      require $db->{'module'};
+      $adaptor = new $db->{'module'}('dbname' => $db->{'name'},
+				     'user'   => $db->{'user'},
+				     'pass'   => $db->{'pass'},
+				     'port'   => $db->{'port'},
+				     'host'   => $db->{'host'},
+				     'driver' => $db->{'driver'});
+    };
+    if ($@) {
+      warn("WARNING: Could not instantiate $dbtype DBAdaptor:\n$@");
+    } else {
+      $self->{'db_adaptors'}->{$dbtype} = $adaptor;
+    }
+  }
+}
+sub load_databases {
+  my ($self, $species) = @_;
+  #create database from conf and from zip files 
+  my $db_conf = do $CONF_FILE;
+  my $port   = $db_conf->{'port'};
+  my $driver = $db_conf->{'driver'};
+  my $host   = $db_conf->{'host'};
+  my $pass   = $db_conf->{'pass'};
+  my $user   = $db_conf->{'user'};
+  my $zip    = $db_conf->{'zip'};
+  #create a config hash which will be frozen to a file
+  $self->{'conf'} = {};
+  #unzip database files
+  unzip_test_dbs($zip);
+  #connect to the database
+  my $locator = 'DBI:$driver:host=$host;port=$port';
+  my $db = DBI->connect($locator, $user, $pass, {RaiseError => 1});
+  unless($db) {
+    die "Can't connect to database $locator";
+  }
+  #create a database for each database specified
+  foreach my $dbtype (keys %{$db_conf->{'databases'}}) {
+    #create a unique random dbname
+    my $dbname = $self->_create_db_name($species, $dbtype);
+    unless($db->do("CREATE DATABASE $dbname")) {
+      die("Could not create database [$dbname]");
+    }
+    #copy the general config into a dbtype specific config 
+    $self->{'conf'}->{$dbtype} = {};
+    %{$self->{'conf'}->{$dbtype}} = %$db_conf;
+    #store the temporary database name in the dbtype specific config
+    $self->{'conf'}->{$dbtype}->{'dbname'} = $dbname;
+    $db->do("use $dbname");
+    #load the database with data
+    my $dir = "$DUMP_DIR/species";
+    local *DIR;
+    opendir(DIR, $dir) or die "could not open dump directory '$dir'";
+    my @files = readdir DIR;
+    local *FILE;
+    #read in table creat statements from *.sql files and process them with DBI
+    foreach my $sql_file (grep /\.sql$/, @files) {
+      unless(-f $sql_file && -r $sql_file) {
+	warn("could not read SQL file '$sql_file'\n");
+	next;
+      }
+      FILE = open $sql_file;
+      my @file = <FILE>;
+      $db->do(join ' ', @file);
+      close FILE;
+      #import data from the txt files of the same name
+      $sql_file  =~ /.*\/(.*)\.sql/;
+      my $tablename = $1;
+      my $txt_file = s/\.sql$/\.txt/;
+      unless(-f $txt_file && -r $txt_file) {
+	warn("could not read data file '$txt_file'\n");
+	next;
+      }
+      $db->do( "load data local infile '$txt_file' into table $tablename" );
+    }	     
+  }
+  closedir DIR;
+  $db->disconnect;
+  #freeze configuration in a file
+  $self->store_config;
+}
+sub unzip_test_dbs {
+  my ($self, $zipfile) = @_;
+  if (-e $DUMP_DIR) {
+    warn "Test genome dbs already unpacked\n";
+    return;
+  }
+  unless($zipfile) {
+    $self->throw("zipfile argument is required\n");
+  }
+  unless(-f $zipfile) {
+    $self->throw("zipfile could not be found\n");
+  }
+  system ( "unzip $zipfile" );
+}
+sub get_DBAdaptor {
+  my ($self, $type) = @_;
+  unless($type) {
+    die('type arg must be specified\n');
+  }
+  return $self->{'db_adaptors'}->{$type};
+}
+# convenience method: by calling it, you get the name of the database,
+# which  you can cut-n-paste into another window for doing some mysql
+# stuff interactively
+sub pause {
+  my ($self) = @_;
+  print STDERR "pausing to inspect databases\n";
+  foreach my $dbtype (keys %{$self->{'db_adaptors'}}) {
+    my $db_adaptor = $self->{'db_adaptors'}->{$dbtype};
+    print STDERR " [$dbtype]\n";
+    print STDERR "    name=[".$db_adaptor->dbname."]\n";
+    print STDERR "    port=[".$db_adaptor->port."]\n";
+    print STDERR "    host=[".$db_adaptor->host."]\n";
+    print STDERR "    user=[".$db_adaptor->user."]\n";
+  }
+  print STDERR "press ^D to continue\n";
+  `cat `;
+}
+sub species {
+  my ($self, $species) = @_;
+  if($species) {
+    $self->{'species'} = $species;
+  }
+  return $self->{'species'};
+}
+sub _create_db_name {
+    my( $self, $species, $dbtype ) = @_;
+    my $rand = &Digest::MD5::md5_hex(rand());
+    my $db_name = "_test_db_${species}_${dbtype}_${rand}";
+    return $db_name;
+}
+sub do_sql_file {
+  my( $self, @files ) = @_;
+  local *SQL;
+  my $i = 0;
+  my $dbh = $self->db_handle;
+  my $comment_strip_warned=0;
+  foreach my $file (@files) {
+    my $sql = '';
+    open SQL, $file or die "Can't read SQL file '$file' : $!";
+    while (<SQL>) {
+      # careful with stripping out comments; quoted text
+      # (e.g. aligments) may contain them. Just warn (once) and ignore
+      if (    /'[^']*#[^']*'/ 
+	|| /'[^']*--[^']*'/ ) {
+	  if ( $comment_strip_warned++ ) { 
+	    # already warned
+	  } else {
+	    warn "#################################\n";
+	    warn "# found comment strings inside quoted string;" .
+	         "not stripping, too complicated: $_\n";
+	    warn "# (continuing, assuming all these they are simply " .
+	      "valid quoted strings)\n";
+	    warn "#################################\n";
+	  }
+	} else {
+	  s/(#|--).*//;       # Remove comments
+	}
+        next unless /\S/;   # Skip lines which are all space
+        $sql .= $_;
+        $sql .= ' ';
+      }
+  close SQL;
+    #Modified split statement, only semicolumns before end of line,
+    #so we can have them inside a string in the statement
+    #\s*\n, takes in account the case when there is space before the new line
+    foreach my $s (grep /\S/, split /;[ \t]*\n/, $sql) {
+      $s =~ s/\;\s*$//g;
+      $self->validate_sql($s);
+      $dbh->do($s);
+      $i++
+    }
+  }
+  return $i;
+}                                       # do_sql_file
+sub validate_sql {
+  my ($self, $statement) = @_;
+  if ($statement =~ /insert/i) {
+    $statement =~ s/\n/ /g; #remove newlines
+    die ("INSERT should use explicit column names " .
+	 "(-c switch in mysqldump)\n$statement\n")
+      unless ($statement =~ /insert.+into.*\(.+\).+values.*\(.+\)/i);
+  }
+}
+sub DESTROY {
+    my( $self ) = @_;
+}
+1;
--- a/modules/t/analysis.t
+++ b/modules/t/analysis.t
@@ -11,8 +11,6 @@ END {print "not ok 1\n" unless $loaded;}
 use EnsTestDB;
 use Bio::EnsEMBL::DBLoader;
 $loaded = 1;
 ok(1);

--- a/modules/t/runtests.pl
+++ b/modules/t/runtests.pl
+#!/usr/local/bin/perl -w
+use strict;
+use Getopt::Std;
+use Test::Harness;
+use vars qw($opt_l $opt_h);
+#read command line options
+&usage unless getopts('lh');
+#print usage on '-h' command line option
+if($opt_h) {
+  &usage;
+  exit;
+}
+#list test files on '-l' command line option
+if($opt_l) {
+  foreach my $file (@{&get_all_tests('.', \@ARGV )}) {
+    print "$file\n";
+  }
+  exit;
+}
+#run all of the specified tests  
+runtests(@{&get_all_tests('.', \@ARGV)});
+=head2 get_all_tests
+  Arg [1]    : string $dir
+               the name of the directory retrieve a list of tests from
+  Arg [2]    : (optional) listref $input_files 
+               testfiles or directories to retrieve. If not specified all 
+               ".t" files in $dir are taken.
+  Example    : @test_files = read_test_dir('t');
+  Description: Returns a list of testfiles in the directories specified by
+               the @tests argument.  The relative path is given as well as
+               with the testnames returned.  Only files ending with .t are
+               returned.  Subdirectories are recursively entered and the test
+               files returned within them are returned as well.
+  Returntype : listref of strings.
+  Exceptions : none
+  Caller     : general
+=cut
+sub get_all_tests {
+  my ($dir, $input_files) = @_;
+  my @files;
+  my @out = ();
+  local *DIR;
+  unless(opendir(DIR, $dir)) {
+    warn("WARNING: cannot open directory $dir\n");
+    return [];
+  }
+  if($input_files && @$input_files) {
+    #input files were specified so use them
+    @files = @$input_files; 
+  } else {
+    #otherwise use every file in the directory
+    @files = readdir DIR;
+  }     
+  #filter out CVS files, files beginning with '.' and files ending in ~
+  @files = grep !/(^\.)|(^CVS$)|(~$)/, @files;
+  foreach my $file (@files) {
+    $file = "$dir/$file";
+    if(-d $file) {
+      #do a recursive call on directories
+      push @out, @{get_all_tests("$file")};
+    } elsif ($file =~ /\.t$/) {
+      #files ending with a '.t' are considered test files
+      unless(-r $file && -f $file) {
+	warn("WARNING: cannot read test file $file\n");
+      }
+      push @out, $file;
+    } 
+  }
+  closedir DIR;
+  return \@out;
+}
+sub usage {
+  print "usage:\n";
+  print "\tlist tests:        run_tests.pl -l [<testfiles or dirs> ...]\n";
+  print "\trun tests:         run_tests.pl [<testfiles or dirs> ...]\n";
+}