Skip to content
Snippets Groups Projects
ConversionSupport.pm 57.3 KiB
Newer Older
=head1 LICENSE

Andy Yates's avatar
Andy Yates committed
  Copyright (c) 1999-2012 The European Bioinformatics Institute and
  Genome Research Limited.  All rights reserved.

  This software is distributed under a modified Apache license.
  For license details, please see

    http://www.ensembl.org/info/about/code_licence.html

=head1 CONTACT

  Please email comments or questions to the public Ensembl
  developers list at <dev@ensembl.org>.

  Questions may also be sent to the Ensembl help desk at
  <helpdesk@ensembl.org>.

=cut

=head1 NAME

Bio::EnsEMBL::Utils::ConversionSupport - Utility module for Vega release and
schema conversion scripts

=head1 SYNOPSIS

  my $serverroot = '/path/to/ensembl';
  my $support = new Bio::EnsEMBL::Utils::ConversionSupport($serverroot);

  # parse common options
  $support->parse_common_options;

  # parse extra options for your script
  $support->parse_extra_options( 'string_opt=s', 'numeric_opt=n' );

  # ask user if he wants to run script with these parameters
  $support->confirm_params;

  # see individual method documentation for more stuff

=head1 DESCRIPTION

This module is a collection of common methods and provides helper
functions for the Vega release and schema conversion scripts. Amongst
others, it reads options from a config file, parses commandline options
and does logging.

=head1 METHODS

=cut

package Bio::EnsEMBL::Utils::ConversionSupport;

use strict;
use warnings;
no warnings 'uninitialized';

use Getopt::Long;
use Text::Wrap;
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use FindBin qw($Bin $Script);
use POSIX qw(strftime);
use Cwd qw(abs_path);
use DBI;
use Data::Dumper;

my $species_c = 1; #counter to be used for each database connection made

=head2 new

  Arg[1]      : String $serverroot - root directory of your ensembl sandbox
  Example     : my $support = new Bio::EnsEMBL::Utils::ConversionSupport(
                                        '/path/to/ensembl');
  Description : constructor
  Return type : Bio::EnsEMBL::Utils::ConversionSupport object
  Exceptions  : thrown if no serverroot is provided
  Caller      : general

=cut

sub new {
  my $class = shift;
  (my $serverroot = shift) or throw("You must supply a serverroot.");
  my $self = {
    '_serverroot'   => $serverroot,
    '_param'        => { interactive => 1 },
    '_warnings'     => 0,
  };
  bless ($self, $class);
  return $self;
}

=head2 parse_common_options

  Example     : $support->parse_common_options;
  Description : This method reads options from a configuration file and parses
                some commandline options that are common to all scripts (like
                db connection settings, help, dry-run). Commandline options
                will override config file settings. 

                All options will be accessible via $self->param('name').
  Return type : true on success 
  Exceptions  : thrown if configuration file can't be opened
  Caller      : general

=cut

sub parse_common_options {
  my $self = shift;

  # read commandline options
  my %h;
  Getopt::Long::Configure("pass_through");
  &GetOptions( \%h,
	       'dbname|db_name=s',
	       'host|dbhost|db_host=s',
	       'port|dbport|db_port=n',
	       'user|dbuser|db_user=s',
	       'pass|dbpass|db_pass=s',
	       'conffile|conf=s',
	       'logfile|log=s',
	       'logpath=s',
               'log_base_path=s',
	       'help|h|?',
	     );

  # reads config file
  my $conffile = $h{'conffile'} || $self->serverroot . "/sanger-plugins/vega/conf/ini-files/Conversion.ini";
  $conffile = abs_path($conffile);
  if (-e $conffile) {
    open(CONF, $conffile) or throw( 
      "Unable to open configuration file $conffile for reading: $!");
    my $serverroot = $self->serverroot;
    while (<CONF>) {
      chomp;

      # remove comments
      s/^[#;].*//;
      s/\s+[;].*$//;

      # read options into internal parameter datastructure, removing whitespace
      next unless (/(\w\S*)\s*=\s*(\S*)\s*/);
      my $name = $1;
      my $val = $2;
      if ($val =~ /\$SERVERROOT/) {
	$val =~ s/\$SERVERROOT/$serverroot/g;
	$val = abs_path($val);
      }
      $self->param($name, $val);
    }
    $self->param('conffile', $conffile);
  }
  elsif ($conffile) {
    warning("Unable to open configuration file $conffile for reading: $!");
  }

# override configured parameter with commandline options
  map { $self->param($_, $h{$_}) } keys %h;


  return (1) if $self->param('nolog');

  # if logpath & logfile are not set, set them here to /ensemblweb/vega_dev/shared/logs/conversion/DBNAME/SCRIPNAME_NN.log
  if (! defined($self->param('log_base_path')))  {
Steve Trevanion's avatar
Steve Trevanion committed
    $self->param('log_base_path','/ensemblweb/shared/logs/conversion/');
  }
  my $dbname = $self->param('dbname');
  $dbname =~ s/^vega_//;
  if (not (defined($self->param('logpath')))){
    $self->param('logpath', $self->param('log_base_path')."/".$dbname."/" );
  }
  if ( not defined $self->param('logfile') ){
    my $log = $Script;
    $log =~ s/.pl$//g;
    my $counter;
    for ($counter=1 ; (-e $self->param('logpath')."/".$log."_".sprintf("%03d", $counter).".log"); $counter++){
#        warn  $self->param('logpath')."/".$log."_".$counter.".log";
    }
    $self->param('logfile', $log."_".sprintf("%03d", $counter).".log");
  }
  return(1);
}

=head2 parse_extra_options

  Arg[1-N]    : option descriptors that will be passed on to Getopt::Long
  Example     : $support->parse_extra_options('string_opt=s', 'numeric_opt=n');
  Description : Parse extra commandline options by passing them on to
                Getopt::Long and storing parameters in $self->param('name).
  Return type : true on success
  Exceptions  : none (caugth by $self->error)
  Caller      : general

=cut

sub parse_extra_options {
  my ($self, @params) = @_;
  Getopt::Long::Configure("no_pass_through");
  eval {
    # catch warnings to pass to $self->error
    local $SIG{__WARN__} = sub { die @_; };
    &GetOptions(\%{ $self->{'_param'} }, @params);
  };
  $self->error($@) if $@;
  return(1);
}

=head2 allowed_params

  Arg[1-N]    : (optional) List of allowed parameters to set
  Example     : my @allowed = $self->allowed_params(qw(param1 param2));
  Description : Getter/setter for allowed parameters. This is used by
                $self->confirm_params() to avoid cluttering of output with
                conffile entries not relevant for a given script. You can use
                $self->get_common_params() as a shortcut to set them.
  Return type : Array - list of allowed parameters
  Exceptions  : none
  Caller      : general

=cut

sub allowed_params {
  my $self = shift;

  # setter
  if (@_) {
    @{ $self->{'_allowed_params'} } = @_;
  }

  # getter
  if (ref($self->{'_allowed_params'}) eq 'ARRAY') {
    return @{ $self->{'_allowed_params'} };
  } else {
    return ();
  }
}

=head2 get_common_params

  Example     : my @allowed_params = $self->get_common_params, 'extra_param';
  Description : Returns a list of commonly used parameters in the conversion
                scripts. Shortcut for setting allowed parameters with
                $self->allowed_params().
  Return type : Array - list of common parameters
  Exceptions  : none
  Caller      : general

=cut

sub get_common_params {
  return qw(
	    conffile
	    dbname
	    host
	    port
	    user
	    pass
            nolog
	    logpath
            log_base_path
	    logfile
	    logappend
	    verbose
	    interactive
	    dry_run
	  );
}

=head2 get_loutre_params

  Arg         : (optional) return a list to parse or not
  Example     : $support->parse_extra_options($support->get_loutre_params('parse'))
  Description : Returns a list of commonly used loutre db parameters - parse option is
                simply used to distinguish between reporting and parsing parameters
  Return type : Array - list of common parameters
  Exceptions  : none
  Caller      : general

=cut

sub get_loutre_params {
  my ($self,$p) = @_;
  if ($p) {
    return qw(
	      loutrehost=s
	      loutreport=s
	      loutreuser=s
	      loutrepass=s
	      loutredbname=s
	    );
  }
  else {
    return qw(
	      loutrehost
	      loutreport
	      loutreuser
	      loutrepass
	      loutredbname
	    );
  }
}

=head2 remove_vega_params

  Example     : $support->remove_vega_params;
  Description : Removes Vega db conection parameters. Usefull to avoid clutter in log files when
                working exclusively with loutre
  Return type : none
  Exceptions  : none
  Caller      : general

=cut

sub remove_vega_params {
  my $self = shift;
  foreach my $param (qw(dbname host port user pass)) {
    $self->{'_param'}{$param} = undef;
  }
}

=head2 confirm_params

  Example     : $support->confirm_params;
  Description : Prints a table of parameters that were collected from config
                file and commandline and asks user to confirm if he wants
                to proceed.
  Return type : true on success
  Exceptions  : none
  Caller      : general

=cut

sub confirm_params {
  my $self = shift;

  # print parameter table
  print "Running script with these parameters:\n\n";
  print $self->list_all_params;

  if ($self->param('host') eq 'ensdb-1-10') {
    # ask user if he wants to proceed
    exit unless $self->user_proceed("**************\n\n You're working on ensdb-1-10! Is that correct and you want to continue ?\n\n**************");
  }
  else {
    # ask user if he wants to proceed
    exit unless $self->user_proceed("Continue?");
  }
  return(1);
}

=head2 list_all_params

  Example     : print LOG $support->list_all_params;
  Description : prints a table of the parameters used in the script
  Return type : String - the table to print
  Exceptions  : none
  Caller      : general

=cut

sub list_all_params {
  my $self = shift;
  my $txt = sprintf "    %-21s%-40s\n", qw(PARAMETER VALUE);
  $txt .= "    " . "-"x71 . "\n";
  $Text::Wrap::colums = 72;
  my @params = $self->allowed_params;
  foreach my $key (@params) {
    my @vals = $self->param($key);
    if (@vals) {
      $txt .= Text::Wrap::wrap( sprintf('   %-21s', $key),
				' 'x24,
				join(", ", @vals)
			      ) . "\n";
    }
  }
  $txt .= "\n";
  return $txt;
}

=head2 create_commandline_options

  Arg[1]      : Hashref $settings - hashref describing what to do
                Allowed keys:
                    allowed_params => 0|1   # use all allowed parameters
                    exclude => []           # listref of parameters to exclude
                    replace => {param => newval} # replace value of param with
                                                 # newval
  Example     : $support->create_commandline_options({
                    allowed_params => 1,
                    exclude => ['verbose'],
                    replace => { 'dbname' => 'homo_sapiens_vega_33_35e' }
                });
  Description : Creates a commandline options string that can be passed to any
                other script using ConversionSupport.
  Return type : String - commandline options string
  Exceptions  : none
  Caller      : general

=cut

sub create_commandline_options {
  my ($self, $settings, $param_hash) = @_;
  my %param_hash = $param_hash ? %$param_hash : ();

  # get all allowed parameters
  if ($settings->{'allowed_params'}) {
    # exclude params explicitly stated
    my %exclude = map { $_ => 1 } @{ $settings->{'exclude'} || [] };
    foreach my $param ($self->allowed_params) {
      unless ($exclude{$param}) {
	my ($first, @rest) = $self->param($param);
	next unless (defined($first));
	
	if (@rest) {
	  $first = join(",", $first, @rest);
	}
	$param_hash{$param} = $first;
      }
    }
  }

  # replace values
  foreach my $key (keys %{ $settings->{'replace'} || {} }) {
    $param_hash{$key} = $settings->{'replace'}->{$key};
  }

  # create the commandline options string
  my $options_string;
  foreach my $param (keys %param_hash) {
    $options_string .= sprintf("--%s %s ", $param, $param_hash{$param});
  }
  return $options_string;
}

=head2 check_required_params

  Arg[1-N]    : List @params - parameters to check
  Example     : $self->check_required_params(qw(dbname host port));
  Description : Checks $self->param to make sure the requested parameters
                have been set. Dies if parameters are missing.
  Return type : true on success
  Exceptions  : none
  Caller      : general

=cut

sub check_required_params {
  my ($self, @params) = @_;
  my @missing = ();
  foreach my $param (@params) {
    push @missing, $param unless $self->param($param);
  }
  if (@missing) {
    throw("Missing parameters: @missing.\nYou must specify them on the commandline or in your conffile.\n");
  }
  return(1);
}

=head2 user_proceed

  Arg[1]      : (optional) String $text - notification text to present to user
  Example     : # run a code snipped conditionally
                if ($support->user_proceed("Run the next code snipped?")) {
                    # run some code
                }

                # exit if requested by user
                exit unless ($support->user_proceed("Want to continue?"));
  Description : If running interactively, the user is asked if he wants to
                perform a script action. If he doesn't, this section is skipped
                and the script proceeds with the code. When running
                non-interactively, the section is run by default.
  Return type : TRUE to proceed, FALSE to skip.
  Exceptions  : none
  Caller      : general

=cut

sub user_proceed {
  my ($self, $text) = @_;

  if ($self->param('interactive')) {
    print "$text\n" if $text;
    print "[y/N] ";
    my $input = lc(<>);
    chomp $input;
    unless ($input eq 'y') {
      print "Skipping.\n";
      return(0);
    }
  }

  return(1);
}

=head2 user_confirm

  Description : DEPRECATED - please use user_proceed() instead

=cut

sub user_confirm {
  my $self = shift;
  exit unless $self->user_proceed("Continue?");
}

=head2 read_user_input

  Arg[1]      : (optional) String $text - notification text to present to user
  Example     : my $ret = $support->read_user_input("Choose a number [1/2/3]");
                if ($ret == 1) {
                    # do something
                } elsif ($ret == 2) {
                    # do something else
                }
  Description : If running interactively, the user is asked for input.
  Return type : String - user's input
  Exceptions  : none
  Caller      : general

=cut

sub read_user_input {
  my ($self, $text) = @_;

  if ($self->param('interactive')) {
    print "$text\n" if $text;
    my $input = <>;
    chomp $input;
    return $input;
  }
}

=head2 comma_to_list

  Arg[1-N]    : list of parameter names to parse
  Example     : $support->comma_to_list('chromosomes');
  Description : Transparently converts comma-separated lists into arrays (to
                allow different styles of commandline options, see perldoc
                Getopt::Long for details). Parameters are converted in place
                (accessible through $self->param('name')).
  Return type : true on success
  Exceptions  : none
  Caller      : general

=cut

sub comma_to_list {
  my $self = shift;
  foreach my $param (@_) {
    $self->param($param,
		 split (/,/, join (',', $self->param($param))));
  }
  return(1);
}

=head2 list_or_file

  Arg[1]      : Name of parameter to parse
  Example     : $support->list_or_file('gene');
  Description : Determines whether a parameter holds a list or it is a filename
                to read the list entries from.
  Return type : true on success
  Exceptions  : thrown if list file can't be opened
  Caller      : general

=cut

sub list_or_file {
  my ($self, $param) = @_;
  my @vals = $self->param($param);
  return unless (@vals);

  my $firstval = $vals[0];
  if (scalar(@vals) == 1 && -e $firstval) {
    # we didn't get a list of values, but a file to read values from
    @vals = ();
    open(IN, $firstval) or throw("Cannot open $firstval for reading: $!");
    while(<IN>){
      chomp;
      push(@vals, $_);
    }
    close(IN);
    $self->param($param, @vals);
  }
  $self->comma_to_list($param);
  return(1);
}

=head2 param

  Arg[1]      : Parameter name
  Arg[2-N]    : (optional) List of values to set
  Example     : my $dbname = $support->param('dbname');
                $support->param('port', 3306);
                $support->param('chromosomes', 1, 6, 'X');
  Description : Getter/setter for parameters. Accepts single-value params and
                list params.
  Return type : Scalar value for single-value parameters, array of values for
                list parameters
  Exceptions  : thrown if no parameter name is supplied
  Caller      : general

=cut

sub param {
  my $self = shift;
  my $name = shift or throw("You must supply a parameter name");

  # setter
  if (@_) {
    if (scalar(@_) == 1) {
      # single value
      $self->{'_param'}->{$name} = shift;
    } else {
      # list of values
      undef $self->{'_param'}->{$name};
      @{ $self->{'_param'}->{$name} } = @_;
    }
  }

  # getter
  if (ref($self->{'_param'}->{$name}) eq 'ARRAY') {
    # list parameter
    return @{ $self->{'_param'}->{$name} };
  } elsif (defined($self->{'_param'}->{$name})) {
    # single-value parameter
    return $self->{'_param'}->{$name};
  } else {
    return ();
  }
}

=head2 error

  Arg[1]      : (optional) String - error message
  Example     : $support->error("An error occurred: $@");
                exit(0) if $support->error;
  Description : Getter/setter for error messages
  Return type : String - error message
  Exceptions  : none
  Caller      : general

=cut

sub error {
  my $self = shift;
  $self->{'_error'} = shift if (@_);
  return $self->{'_error'};
}

=head2 warnings

  Example     : print LOG "There were ".$support->warnings." warnings.\n";
  Description : Returns the number of warnings encountered while running the
                script (the warning counter is increased by $self->log_warning).
  Return type : Int - number of warnings
  Exceptions  : none
  Caller      : general

=cut

sub warnings {
  my $self = shift;
  return $self->{'_warnings'};
}

=head2 serverroot

  Arg[1]      : (optional) String - root directory of your ensembl sandbox
  Example     : my $serverroot = $support->serverroot;
  Description : Getter/setter for the root directory of your ensembl sandbox.
                This is set when ConversionSupport object is created, so
                usually only used as a getter.
  Return type : String - the server root directory
  Exceptions  : none
  Caller      : general

=cut

sub serverroot {
  my $self = shift;
  $self->{'_serverroot'} = shift if (@_);
  return $self->{'_serverroot'};
}

=head2 get_database

  Arg[1]      : String $database - the type of database to connect to
                (eg core, otter)
  Arg[2]      : (optional) String $prefix - the prefix used for retrieving the
                connection settings from the configuration
  Example     : my $db = $support->get_database('core');
  Description : Connects to the database specified.
  Return type : DBAdaptor of the appropriate type
  Exceptions  : thrown if asking for unknown database
  Caller      : general

=cut

sub get_database {
  my $self = shift;
  my $database = shift or throw("You must provide a database");
  my $prefix = shift || '';
  $self->check_required_params(
    "${prefix}host",
    "${prefix}port",
    "${prefix}user",
    "${prefix}dbname",
  );
  my %adaptors = (
    core    => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
    ensembl => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
    evega   => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
    otter   => 'Bio::Otter::DBSQL::DBAdaptor',
    vega    => 'Bio::Otter::DBSQL::DBAdaptor',
    compara => 'Bio::EnsEMBL::Compara::DBSQL::DBAdaptor',
    loutre  => 'Bio::Vega::DBSQL::DBAdaptor',
  );
  throw("Unknown database: $database") unless $adaptors{$database};

  $self->dynamic_use($adaptors{$database});
  my $species = 'species' . $species_c;
  my $dba = $adaptors{$database}->new(
    -host    => $self->param("${prefix}host"),
    -port    => $self->param("${prefix}port"),
    -user    => $self->param("${prefix}user"),
    -pass    => $self->param("${prefix}pass") || '',
    -dbname  => $self->param("${prefix}dbname"),
    -group   => $database,
    -species => $species,
  );
  #can use this approach to get dna from another db
#  my $dna_db = $adaptors{$database}->new(
#    -host => 'otterlive',
#    -port => '3301',
#    -user => $self->param("${prefix}user"),
#    -pass => $self->param("${prefix}pass"),
#    -dbname => 'loutre_human',
#  );
#  $dba->dnadb($dna_db);

  # otherwise explicitely set the dnadb to itself - by default the Registry assumes
  # a group 'core' for this now
  $dba->dnadb($dba);

  $self->{'_dba'}->{$database} = $dba;
  $self->{'_dba'}->{'default'} = $dba unless $self->{'_dba'}->{'default'};
  return $self->{'_dba'}->{$database};
}


=head2 get_dbconnection

  Arg[1]      : (optional) String $prefix - the prefix used for retrieving the
                connection settings from the configuration
  Example     : my $dbh = $self->get_dbconnection;
  Description : Connects to the database server specified. You don't have to
                specify a database name (this is useful for running commands
                like $dbh->do('show databases')).
  Return type : DBI database handle
  Exceptions  : thrown if connection fails
  Caller      : general
  Status      : At Risk

=cut

sub get_dbconnection {
  my $self = shift;
  my $prefix = shift;
 
  $self->check_required_params(
      "${prefix}host",
      "${prefix}port",
      "${prefix}user",
  );

  my $dsn = "DBI:" . ($self->param('driver')||'mysql') .
            ":host=" . $self->param("${prefix}host") .
            ";port=" . $self->param("${prefix}port");

  if ($self->param("${prefix}dbname")) {
    $dsn .= ";dbname=".$self->param("${prefix}dbname");
  }

#  warn $dsn;

  my $dbh;
  eval{
    $dbh = DBI->connect($dsn, $self->param("${prefix}user"),
      $self->param("${prefix}pass"), {'RaiseError' => 1, 'PrintError' => 0});
  };

  if (!$dbh || $@ || !$dbh->ping) {
    $self->log_error("Could not connect to db server as user ".
      $self->param("${prefix}user") .
      " using [$dsn] as a locator:\n" . $DBI::errstr . $@);
  }

  $self->{'_dbh'} = $dbh;
  return $self->{'_dbh'};

}


=head2 dba

  Arg[1]      : (optional) String $database - type of db apaptor to retrieve
  Example     : my $dba = $support->dba;
  Description : Getter for database adaptor. Returns default (i.e. created
                first) db adaptor if no argument is provided.
  Return type : Bio::EnsEMBL::DBSQL::DBAdaptor or Bio::Otter::DBSQL::DBAdaptor
  Exceptions  : none
  Caller      : general

=cut

sub dba {
  my ($self, $database) = shift;
  return $self->{'_dba'}->{$database} || $self->{'_dba'}->{'default'};
}

=head2 dynamic_use

  Arg [1]    : String $classname - The name of the class to require/import
  Example    : $self->dynamic_use('Bio::EnsEMBL::DBSQL::DBAdaptor');
  Description: Requires and imports the methods for the classname provided,
               checks the symbol table so that it doesnot re-require modules
               that have already been required.
  Returntype : true on success
  Exceptions : Warns to standard error if module fails to compile
  Caller     : internal

=cut

sub dynamic_use {
  my ($self, $classname) = @_;
  my ($parent_namespace, $module) = $classname =~/^(.*::)(.*)$/ ? ($1,$2) : ('::', $classname);

  no strict 'refs';
  # return if module has already been imported
  return 1 if $parent_namespace->{$module.'::'} && %{ $parent_namespace->{$module.'::'}||{} };

  eval "require $classname";
  throw("Failed to require $classname: $@") if ($@);
  $classname->import();

  return 1;
}

=head2 get_chrlength

  Arg[1]      : (optional) Bio::EnsEMBL::DBSQL::DBAdaptor $dba
  Arg[2]      : (optional) String $version - coord_system version
  Arg[3]      : (optional) String $type - type of region eg chromsome (defaults to 'toplevel')
  Arg[4]      : (optional) Boolean - return non reference slies as well (required for haplotypes eq 6-COX)
  Example     : my $chr_length = $support->get_chrlength($dba);
  Description : Get all chromosomes and their length from the database. Return
                chr_name/length for the chromosomes the user requested (or all
                chromosomes by default)
  Return type : Hashref - chromosome_name => length
  Exceptions  : thrown if not passing a Bio::EnsEMBL::DBSQL::DBAdaptor
  Caller      : general

=cut

sub get_chrlength {
  my ($self, $dba, $version,$type,$include_non_reference) = @_;
  $type ||= 'toplevel';
  throw("get_chrlength should be passed a Bio::EnsEMBL::DBSQL::DBAdaptor\n")
    unless ($dba->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'));

  my $sa = $dba->get_SliceAdaptor;

  my @chromosomes = map { $_->seq_region_name } 
    @{ $sa->fetch_all($type,$version,$include_non_reference) };
  my %chr = map { $_ => $sa->fetch_by_region($type, $_, undef, undef, undef, $version)->length } @chromosomes;

  my @wanted = $self->param('chromosomes');
  if (@wanted) {
    # check if user supplied invalid chromosome names
    foreach my $chr (@wanted) {
      my $found = 0;
      foreach my $chr_from_db (keys %chr) {
	if ($chr_from_db eq $chr) {
	  $found = 1;
	  last;
	}
      }
      unless ($found) {
	warning("Didn't find chromosome $chr in database " .
		  $self->param('dbname'));
      }
    }

    # filter to requested chromosomes only
  HASH:
    foreach my $chr_from_db (keys %chr) {
      foreach my $chr (@wanted) {
	if ($chr_from_db eq $chr) {
	  next HASH;
	}
      }
      delete($chr{$chr_from_db});
    }
  }

  return \%chr;
}

=head2 get_ensembl_chr_mapping

  Arg[1]      : (optional) Bio::EnsEMBL::DBSQL::DBAdaptor $dba
  Arg[2]      : (optional) String $version - coord_system version
  Example     : my $ensembl_mapping = $support->get_ensembl_chr_mapping($dba);
  Description : Gets a mapping between Vega chromosome names and their
                equivalent Ensembl chromosomes. Works with non-reference chromosomes
  Return type : Hashref - Vega name => Ensembl name
  Exceptions  : thrown if not passing a Bio::EnsEMBL::DBSQL::DBAdaptor
  Caller      : general

=cut

sub get_ensembl_chr_mapping {
  my ($self, $dba, $version) = @_;
  $dba ||= $self->dba;
  throw("get_ensembl_chr_mapping should be passed a Bio::EnsEMBL::DBSQL::DBAdaptor\n") unless ($dba->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'));

  my $sa = $dba->get_SliceAdaptor;
  my @chromosomes = map { $_->seq_region_name } 
    @{ $sa->fetch_all('chromosome', $version, 1) };

  my %chrs;
  foreach my $chr (@chromosomes) {
    my $sr = $sa->fetch_by_region('chromosome', $chr, undef, undef, undef, $version);
    my ($ensembl_name_attr) = @{ $sr->get_all_Attributes('ensembl_name') };
    if ($ensembl_name_attr) {
      $chrs{$chr} = $ensembl_name_attr->value;
    } else {
      $chrs{$chr} = $chr;
    }
  }
  return \%chrs;
}

=head2 get_taxonomy_id

  Arg[1]      : Bio::EnsEMBL::DBSQL::DBAdaptor $dba
  Example     : my $sid = $support->get_taxonony_id($dba);
  Description : Retrieves the taxononmy ID from the meta table
  Return type : Int - the taxonomy ID
  Exceptions  : thrown if no taxonomy ID is found in the database
  Caller      : general

=cut

sub get_taxonomy_id {
  my ($self, $dba) = @_;
  $dba ||= $self->dba;
  my $sql = 'SELECT meta_value FROM meta WHERE meta_key = "species.taxonomy_id"';
  my $sth = $dba->dbc->db_handle->prepare($sql);
  $sth->execute;
  my ($tid) = $sth->fetchrow_array;
  $sth->finish;
  $self->throw("Could not determine taxonomy_id from database.") unless $tid;
  return $tid;
}

=head2 get_species_scientific_name

  Arg[1]      : Bio::EnsEMBL::DBSQL::DBAdaptor $dba
  Example     : my $species = $support->get_species_scientific_name($dba);
  Description : Retrieves the species scientific name (Genus species) from the
                meta table
  Return type : String - species scientific name
  Exceptions  : thrown if species name can not be determined from db
  Caller      : general

=cut

sub get_species_scientific_name {
  my ($self, $dba) = @_;
  $dba ||= $self->dba;
  my $sql_tmp = "SELECT meta_value FROM meta WHERE meta_key = \'species.classification\' ORDER BY meta_id";
  my $sql = $dba->dbc->add_limit_clause($sql_tmp,2);
  my $sth = $dba->dbc->db_handle->prepare($sql);
  $sth->execute;
  my @sp;
  while (my @row = $sth->fetchrow_array) {
    push @sp, $row[0];
  }