Skip to content
Snippets Groups Projects
ConversionSupport.pm 52.8 KiB
Newer Older
  Copyright (c) 1999-2009 The European Bioinformatics Institute and
  Genome Research Limited.  All rights reserved.
  This software is distributed under a modified Apache license.
  For license details, please see
    http://www.ensembl.org/info/about/code_licence.html
  Please email comments or questions to the public Ensembl
  developers list at <ensembl-dev@ebi.ac.uk>.
  Questions may also be sent to the Ensembl help desk at
  <helpdesk@ensembl.org>.
Bio::EnsEMBL::Utils::ConversionSupport - Utility module for Vega release and
schema conversion scripts
  my $serverroot = '/path/to/ensembl';
  my $support = new Bio::EnsEMBL::Utils::ConversionSupport($serverroot);
  # parse common options
  $support->parse_common_options;
  # parse extra options for your script
  $support->parse_extra_options( 'string_opt=s', 'numeric_opt=n' );
  # ask user if he wants to run script with these parameters
  $support->confirm_params;
  # see individual method documentation for more stuff
=head1 DESCRIPTION

This module is a collection of common methods and provides helper
functions for the Vega release and schema conversion scripts. Amongst
others, it reads options from a config file, parses commandline options
and does logging.

=head1 METHODS
package Bio::EnsEMBL::Utils::ConversionSupport;

use strict;
use warnings;
no warnings 'uninitialized';

use Getopt::Long;
use Text::Wrap;
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use FindBin qw($Bin $Script);
use POSIX qw(strftime);
use Cwd qw(abs_path);
Patrick Meidl's avatar
Patrick Meidl committed
use DBI;
Web Admin's avatar
Web Admin committed
use Data::Dumper;

=head2 new

  Arg[1]      : String $serverroot - root directory of your ensembl sandbox
  Example     : my $support = new Bio::EnsEMBL::Utils::ConversionSupport(
                                        '/path/to/ensembl');
  Description : constructor
  Return type : Bio::EnsEMBL::Utils::ConversionSupport object
  Exceptions  : thrown if no serverroot is provided
  Caller      : general

=cut

sub new {
    my $class = shift;
    (my $serverroot = shift) or throw("You must supply a serverroot.");
    my $self = {
        '_serverroot'   => $serverroot,
        '_param'        => { interactive => 1 },
        '_warnings'     => 0,
    };
    bless ($self, $class);
    return $self;
}

=head2 parse_common_options

  Example     : $support->parse_common_options;
  Description : This method reads options from a configuration file and parses
                some commandline options that are common to all scripts (like
                db connection settings, help, dry-run). Commandline options
                will override config file settings. 

                All options will be accessible via $self->param('name').
  Return type : true on success 
  Exceptions  : thrown if configuration file can't be opened
  Caller      : general

=cut

sub parse_common_options {
    my $self = shift;

    # read commandline options
    my %h;
    Getopt::Long::Configure("pass_through");
    &GetOptions( \%h,
        'dbname|db_name=s',
        'host|dbhost|db_host=s',
        'port|dbport|db_port=n',
        'user|dbuser|db_user=s',
        'pass|dbpass|db_pass=s',
        'conffile|conf=s',
        'logfile|log=s',
        'logappend|log_append=s',
        'verbose|v=s',
        'interactive|i=s',
        'dry_run|dry|n=s',
        'help|h|?',
    );

Web Admin's avatar
Web Admin committed

    my $conffile = $h{'conffile'} || $self->serverroot . "/sanger-plugins/vega/conf/ini-files/Conversion.ini";
    $conffile = abs_path($conffile);
    if (-e $conffile) {
        open(CONF, $conffile) or throw( 
            "Unable to open configuration file $conffile for reading: $!");
        my $serverroot = $self->serverroot;
Web Admin's avatar
Web Admin committed
            # read options into internal parameter datastructure, removing whitespace
            next unless (/(\w\S*)\s*=\s*(\S*)\s*/);
            my $name = $1;
            my $val = $2;
            if ($val =~ /\$SERVERROOT/) {
                $val =~ s/\$SERVERROOT/$serverroot/g;
                $val = abs_path($val);
            }
            $self->param($name, $val);
        $self->param('conffile', $conffile);
Patrick Meidl's avatar
Patrick Meidl committed
    } elsif ($conffile) {
        warning("Unable to open configuration file $conffile for reading: $!");
    }
Web Admin's avatar
Web Admin committed

    
    # override configured parameter with commandline options
    map { $self->param($_, $h{$_}) } keys %h;
    return(1);
}

=head2 parse_extra_options

  Arg[1-N]    : option descriptors that will be passed on to Getopt::Long
  Example     : $support->parse_extra_options('string_opt=s', 'numeric_opt=n');
  Description : Parse extra commandline options by passing them on to
                Getopt::Long and storing parameters in $self->param('name).
  Return type : true on success
  Exceptions  : none (caugth by $self->error)
  Caller      : general

=cut

sub parse_extra_options {
    my ($self, @params) = @_;
    Getopt::Long::Configure("no_pass_through");
    eval {
        # catch warnings to pass to $self->error
        local $SIG{__WARN__} = sub { die @_; };
        &GetOptions(\%{ $self->{'_param'} }, @params);
    };
    $self->error($@) if $@;
    return(1);
}

=head2 allowed_params

  Arg[1-N]    : (optional) List of allowed parameters to set
  Example     : my @allowed = $self->allowed_params(qw(param1 param2));
  Description : Getter/setter for allowed parameters. This is used by
                $self->confirm_params() to avoid cluttering of output with
                conffile entries not relevant for a given script. You can use
                $self->get_common_params() as a shortcut to set them.
  Return type : Array - list of allowed parameters
  Exceptions  : none
  Caller      : general

=cut

sub allowed_params {
    my $self = shift;

    # setter
    if (@_) {
        @{ $self->{'_allowed_params'} } = @_;
    }

    # getter
    if (ref($self->{'_allowed_params'}) eq 'ARRAY') {
        return @{ $self->{'_allowed_params'} };
    } else {
        return ();
    }
}

=head2 get_common_params

  Example     : my @allowed_params = $self->get_common_params, 'extra_param';
  Description : Returns a list of commonly used parameters in the conversion
                scripts. Shortcut for setting allowed parameters with
                $self->allowed_params().
  Return type : Array - list of common parameters
  Exceptions  : none
  Caller      : general

=cut

sub get_common_params {
    return qw(
        conffile
        dbname
        host
        port
        user
        pass
        logpath
        logfile
        logappend
        verbose
        interactive
        dry_run
    );
}

Steve Trevanion's avatar
Steve Trevanion committed
=head2 get_loutre_params
Web Admin's avatar
Web Admin committed

Steve Trevanion's avatar
Steve Trevanion committed
  Example     : my @allowed_params = $self->get_loutre_params, 'extra_param';
Web Admin's avatar
Web Admin committed
  Description : Returns a list of commonly used parameters in for working with a loutre db
  Return type : Array - list of common parameters
  Exceptions  : none
  Caller      : general

=cut

sub get_loutre_params {
	return qw(
			  loutrehost
			  loutreport
			  loutreuser
			  loutrepass
			  loutredbname
		  );
}

Steve Trevanion's avatar
Steve Trevanion committed
=head2 remove_vega_params

  Example     : $support->remove_vega_params;
  Description : Removes Vega db conection parameters. Usefull to avoid clutter in log files when
                working exclusively with loutre
  Return type : none
  Exceptions  : none
  Caller      : general

=cut

sub remove_vega_params {
	my $self = shift;
	foreach my $param (qw(dbname host port user pass)) {
		$self->{'_param'}{$param} = undef;
	}
}
Web Admin's avatar
Web Admin committed

=head2 confirm_params

  Example     : $support->confirm_params;
  Description : Prints a table of parameters that were collected from config
                file and commandline and asks user to confirm if he wants
                to proceed.
  Return type : true on success
  Exceptions  : none
  Caller      : general

=cut

sub confirm_params {
    my $self = shift;

    # print parameter table
    print "Running script with these parameters:\n\n";
    print $self->list_all_params;

	if ($self->param('host') eq 'web-4-11') {
		# ask user if he wants to proceed
		exit unless $self->user_proceed("**************\n\n You're working on web-4-11! Is that correct and you want to continue ?\n\n**************");
	}
	else {
		# ask user if he wants to proceed
		exit unless $self->user_proceed("Continue?");
    }
    return(1);
}

=head2 list_all_params

  Example     : print LOG $support->list_all_params;
  Description : prints a table of the parameters used in the script
  Return type : String - the table to print
  Exceptions  : none
  Caller      : general

=cut

sub list_all_params {
    my $self = shift;
Steve Trevanion's avatar
Steve Trevanion committed
    my $txt = sprintf "    %-21s%-40s\n", qw(PARAMETER VALUE);
    $txt .= "    " . "-"x71 . "\n";
    my @params = $self->allowed_params;
    foreach my $key (@params) {
        if (@vals) {
Steve Trevanion's avatar
Steve Trevanion committed
            $txt .= Text::Wrap::wrap( sprintf('   %-21s', $key),
                                      ' 'x24,
                                      join(", ", @vals)
                                    ) . "\n";
        }
=head2 create_commandline_options

  Arg[1]      : Hashref $settings - hashref describing what to do
                Allowed keys:
                    allowed_params => 0|1   # use all allowed parameters
                    exclude => []           # listref of parameters to exclude
                    replace => {param => newval} # replace value of param with
                                                 # newval
  Example     : $support->create_commandline_options({
                    allowed_params => 1,
                    exclude => ['verbose'],
                    replace => { 'dbname' => 'homo_sapiens_vega_33_35e' }
                });
  Description : Creates a commandline options string that can be passed to any
                other script using ConversionSupport.
  Return type : String - commandline options string
  Exceptions  : none
  Caller      : general

=cut

sub create_commandline_options {
    my ($self, $settings) = @_;
    my %param_hash;

    # get all allowed parameters
    if ($settings->{'allowed_params'}) {
        # exclude params explicitly stated
        my %exclude = map { $_ => 1 } @{ $settings->{'exclude'} || [] };
        
        foreach my $param ($self->allowed_params) {
            unless ($exclude{$param}) {
                my ($first, @rest) = $self->param($param);
                next unless (defined($first));

                if (@rest) {
                    $first = join(",", $first, @rest);
                }
                $param_hash{$param} = $first;
            }
        }

    }

    # replace values
    foreach my $key (keys %{ $settings->{'replace'} || {} }) {
        $param_hash{$key} = $settings->{'replace'}->{$key};
    }

    # create the commandline options string
    my $options_string;
    foreach my $param (keys %param_hash) {
        $options_string .= sprintf("--%s %s ", $param, $param_hash{$param});
    }
    
    return $options_string;
}

=head2 check_required_params

  Arg[1-N]    : List @params - parameters to check
  Example     : $self->check_required_params(qw(dbname host port));
  Description : Checks $self->param to make sure the requested parameters
                have been set. Dies if parameters are missing.
  Return type : true on success
  Exceptions  : none
  Caller      : general

=cut

sub check_required_params {
    my ($self, @params) = @_;
    my @missing = ();
    foreach my $param (@params) {
        push @missing, $param unless $self->param($param);
    }
    if (@missing) {
        throw("Missing parameters: @missing.\nYou must specify them on the commandline or in your conffile.\n");
    }
    return(1);
}

=head2 user_proceed

  Arg[1]      : (optional) String $text - notification text to present to user
  Example     : # run a code snipped conditionally
                if ($support->user_proceed("Run the next code snipped?")) {
                    # run some code
                }
                # exit if requested by user
                exit unless ($support->user_proceed("Want to continue?"));
  Description : If running interactively, the user is asked if he wants to
                perform a script action. If he doesn't, this section is skipped
                and the script proceeds with the code. When running
                non-interactively, the section is run by default.
  Return type : TRUE to proceed, FALSE to skip.
sub user_proceed {
    my ($self, $text) = @_;
        print "$text\n" if $text;
        print "[y/N] ";
        my $input = lc(<>);
        chomp $input;
        unless ($input eq 'y') {
            print "Skipping.\n";
            return(0);
=head2 user_confirm

  Description : DEPRECATED - please use user_proceed() instead

=cut

sub user_confirm {
    my $self = shift;
    exit unless $self->user_proceed("Continue?");
}

=head2 read_user_input

  Arg[1]      : (optional) String $text - notification text to present to user
  Example     : my $ret = $support->read_user_input("Choose a number [1/2/3]");
                if ($ret == 1) {
                    # do something
                } elsif ($ret == 2) {
                    # do something else
                }
  Description : If running interactively, the user is asked for input.
  Return type : String - user's input
  Exceptions  : none
  Caller      : general

=cut

sub read_user_input {
    my ($self, $text) = @_;

    if ($self->param('interactive')) {
        print "$text\n" if $text;
        my $input = <>;
        chomp $input;
        return $input;
    }
}

=head2 comma_to_list

  Arg[1-N]    : list of parameter names to parse
  Example     : $support->comma_to_list('chromosomes');
  Description : Transparently converts comma-separated lists into arrays (to
                allow different styles of commandline options, see perldoc
                Getopt::Long for details). Parameters are converted in place
                (accessible through $self->param('name')).
  Return type : true on success
  Exceptions  : none
  Caller      : general

=cut

sub comma_to_list {
    my $self = shift;
    foreach my $param (@_) {
        $self->param($param,
            split (/,/, join (',', $self->param($param))));
    }
    return(1);
}

=head2 list_or_file

  Arg[1]      : Name of parameter to parse
  Example     : $support->list_or_file('gene_stable_id');
  Description : Determines whether a parameter holds a list or it is a filename
                to read the list entries from.
  Return type : true on success
  Exceptions  : thrown if list file can't be opened
  Caller      : general

=cut

sub list_or_file {
    my ($self, $param) = @_;
    my @vals = $self->param($param);
    return unless (@vals);

    my $firstval = $vals[0];
    if (scalar(@vals) == 1 && -e $firstval) {
        # we didn't get a list of values, but a file to read values from
        @vals = ();
        open(IN, $firstval) or throw("Cannot open $firstval for reading: $!");
        while(<IN>){
            chomp;
            push(@vals, $_);
        }
        close(IN);
        $self->param($param, @vals);
    }
    $self->comma_to_list($param);
    return(1);
}

=head2 param

  Arg[1]      : Parameter name
  Arg[2-N]    : (optional) List of values to set
  Example     : my $dbname = $support->param('dbname');
                $support->param('port', 3306);
                $support->param('chromosomes', 1, 6, 'X');
  Description : Getter/setter for parameters. Accepts single-value params and
                list params.
  Return type : Scalar value for single-value parameters, array of values for
                list parameters
  Exceptions  : thrown if no parameter name is supplied
  Caller      : general

=cut

sub param {
    my $self = shift;
    my $name = shift or throw("You must supply a parameter name");

    # setter
    if (@_) {
        if (scalar(@_) == 1) {
            # single value
            $self->{'_param'}->{$name} = shift;
        } else {
            # list of values
            undef $self->{'_param'}->{$name};
            @{ $self->{'_param'}->{$name} } = @_;
        }
    }

    # getter
    if (ref($self->{'_param'}->{$name}) eq 'ARRAY') {
        # list parameter
        return @{ $self->{'_param'}->{$name} };
    } elsif (defined($self->{'_param'}->{$name})) {
        # single-value parameter
        return $self->{'_param'}->{$name};
    } else {
        return ();
    }
}

=head2 error

  Arg[1]      : (optional) String - error message
  Example     : $support->error("An error occurred: $@");
                exit(0) if $support->error;
  Description : Getter/setter for error messages
  Return type : String - error message
  Exceptions  : none
  Caller      : general

=cut

sub error {
    my $self = shift;
    $self->{'_error'} = shift if (@_);
    return $self->{'_error'};
}

=head2 warnings

  Example     : print LOG "There were ".$support->warnings." warnings.\n";
  Description : Returns the number of warnings encountered while running the
                script (the warning counter is increased by $self->log_warning).
  Return type : Int - number of warnings
  Exceptions  : none
  Caller      : general

=cut

sub warnings {
    my $self = shift;
    return $self->{'_warnings'};
}

=head2 serverroot

  Arg[1]      : (optional) String - root directory of your ensembl sandbox
  Example     : my $serverroot = $support->serverroot;
  Description : Getter/setter for the root directory of your ensembl sandbox.
                This is set when ConversionSupport object is created, so
                usually only used as a getter.
  Return type : String - the server root directory
  Exceptions  : none
  Caller      : general

=cut

sub serverroot {
    my $self = shift;
    $self->{'_serverroot'} = shift if (@_);
    return $self->{'_serverroot'};
}

=head2 get_database

  Arg[1]      : String $database - the type of database to connect to
                (eg core, otter)
  Arg[2]      : (optional) String $prefix - the prefix used for retrieving the
                connection settings from the configuration
  Example     : my $db = $support->get_database('core');
  Description : Connects to the database specified.
  Return type : DBAdaptor of the appropriate type
  Exceptions  : thrown if asking for unknown database
  Caller      : general

=cut

sub get_database {
    my $self = shift;
    my $database = shift or throw("You must provide a database");
Leo Gordon's avatar
Leo Gordon committed
    my $prefix = shift || '';
    $self->check_required_params(
        "${prefix}host",
        "${prefix}port",
        "${prefix}user",
        # "${prefix}pass", not required since might be empty
        "${prefix}dbname",
    );

    my %adaptors = (
        core    => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
        ensembl => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
        evega   => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
        otter   => 'Bio::Otter::DBSQL::DBAdaptor',
        vega    => 'Bio::Otter::DBSQL::DBAdaptor',
		compara => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
Web Admin's avatar
Web Admin committed
		loutre  => 'Bio::Vega::DBSQL::DBAdaptor',
Leo Gordon's avatar
Leo Gordon committed
    throw("Unknown database: $database") unless $adaptors{$database};

    $self->dynamic_use($adaptors{$database});
    my $dba = $adaptors{$database}->new(
            -host   => $self->param("${prefix}host"),
            -port   => $self->param("${prefix}port"),
            -user   => $self->param("${prefix}user"),
            -pass   => $self->param("${prefix}pass") || '',
            -dbname => $self->param("${prefix}dbname"),
            -group  => $database,
    # explicitely set the dnadb to itself - by default the Registry assumes
    # a group 'core' for this now
    $dba->dnadb($dba);

    $self->{'_dba'}->{$database} = $dba;
    $self->{'_dba'}->{'default'} = $dba unless $self->{'_dba'}->{'default'};
    return $self->{'_dba'}->{$database};
}

Patrick Meidl's avatar
Patrick Meidl committed

=head2 get_dbconnection

  Arg[1]      : (optional) String $prefix - the prefix used for retrieving the
                connection settings from the configuration
  Example     : my $dbh = $self->get_dbconnection;
  Description : Connects to the database server specified. You don't have to
                specify a database name (this is useful for running commands
                like $dbh->do('show databases')).
  Return type : DBI database handle
  Exceptions  : thrown if connection fails
  Caller      : general
  Status      : At Risk

=cut

sub get_dbconnection {
  my $self = shift;
  my $prefix = shift;
Web Admin's avatar
Web Admin committed
 
Patrick Meidl's avatar
Patrick Meidl committed
  $self->check_required_params(
      "${prefix}host",
      "${prefix}port",
      "${prefix}user",
  );

  my $dsn = "DBI:" . ($self->param('driver')||'mysql') .
            ":host=" . $self->param("${prefix}host") .
            ";port=" . $self->param("${prefix}port");

  if ($self->param("${prefix}dbname")) {
    $dsn .= ";dbname=".$self->param("${prefix}dbname");
  }

#  warn $dsn;

Patrick Meidl's avatar
Patrick Meidl committed
  my $dbh;
  eval{
    $dbh = DBI->connect($dsn, $self->param("${prefix}user"),
      $self->param("${prefix}pass"), {'RaiseError' => 1, 'PrintError' => 0});
  };

  if (!$dbh || $@ || !$dbh->ping) {
    $self->log_error("Could not connect to db server as user ".
      $self->param("${prefix}user") .
      " using [$dsn] as a locator:\n" . $DBI::errstr . $@);
Patrick Meidl's avatar
Patrick Meidl committed
  }

  $self->{'_dbh'} = $dbh;
  return $self->{'_dbh'};

}

=head2 get_glovar_database

  Example     : my $dba = $support->get_glovar_database;
  Description : Connects to the Glovar database.
  Return type : Bio::EnsEMBL::ExternalData::Glovar::DBAdaptor
  Exceptions  : thrown if no connection to a core db exists
  Caller      : general

=cut

sub get_glovar_database {
    my $self = shift;
    $self->check_required_params(qw(
        glovarhost
        glovarport
        glovaruser
        glovarpass
        glovardbname
        oracle_home
        ld_library_path
        glovar_snp_consequence_exp
    ));

    # check for core dbadaptor
    my $core_db = $self->dba;
    unless ($core_db && (ref($core_db) =~ /Bio::.*::DBSQL::DBAdaptor/)) {
        $self->log_error("You have to connect to a core db before you can get a glovar dbadaptor.\n");
        exit;
    }
    
    # setup Oracle environment
    $ENV{'ORACLE_HOME'} = $self->param('oracle_home');
    $ENV{'LD_LIBRARY_PATH'} = $self->param('ld_library_path');

    # connect to Glovar db
    $self->dynamic_use('Bio::EnsEMBL::ExternalData::Glovar::DBAdaptor');
    my $dba = Bio::EnsEMBL::ExternalData::Glovar::DBAdaptor->new(
            -host   => $self->param("glovarhost"),
            -port   => $self->param("glovarport"),
            -user   => $self->param("glovaruser"),
            -pass   => $self->param("glovarpass"),
            -dbname => $self->param("glovardbname"),
            -group  => 'glovar',
    );

    # setup adaptor inter-relationships
    $dba->dnadb($core_db);
    $self->dynamic_use('Bio::EnsEMBL::ExternalData::Glovar::GlovarSNPAdaptor');
    my $glovar_snp_adaptor = $dba->get_GlovarSNPAdaptor;
    $glovar_snp_adaptor->consequence_exp($self->param('glovar_snp_consequence_exp'));
    $core_db->add_ExternalFeatureAdaptor($glovar_snp_adaptor);

=head2 dba
  Arg[1]      : (optional) String $database - type of db apaptor to retrieve
  Example     : my $dba = $support->dba;
  Description : Getter for database adaptor. Returns default (i.e. created
                first) db adaptor if no argument is provided.
  Return type : Bio::EnsEMBL::DBSQL::DBAdaptor or Bio::Otter::DBSQL::DBAdaptor
sub dba {
    my ($self, $database) = shift;
    return $self->{'_dba'}->{$database} || $self->{'_dba'}->{'default'};
}

=head2 dynamic_use

  Arg [1]    : String $classname - The name of the class to require/import
  Example    : $self->dynamic_use('Bio::EnsEMBL::DBSQL::DBAdaptor');
  Description: Requires and imports the methods for the classname provided,
               checks the symbol table so that it doesnot re-require modules
               that have already been required.
  Returntype : true on success
  Exceptions : Warns to standard error if module fails to compile
  Caller     : internal

=cut

sub dynamic_use {
    my ($self, $classname) = @_;
    my ($parent_namespace, $module) = $classname =~/^(.*::)(.*)$/ ?
                                        ($1,$2) : ('::', $classname);
    no strict 'refs';
    # return if module has already been imported
Steve Trevanion's avatar
Steve Trevanion committed
    return 1 if $parent_namespace->{$module.'::'} && %{ $parent_namespace->{$module.'::'}||{} };
    
    eval "require $classname";
    throw("Failed to require $classname: $@") if ($@);
    $classname->import();
  Arg[1]      : (optional) Bio::EnsEMBL::DBSQL::DBAdaptor $dba
  Arg[2]      : (optional) String $version - coord_system version
Web Admin's avatar
Web Admin committed
  Arg[3]      : (optional) String $type - type of region eg chromsome (defaults to 'toplevel')
  Arg[4]      : (optional) Boolean - return non reference slies as well (required for haplotypes eq 6-COX)
  Example     : my $chr_length = $support->get_chrlength($dba);
  Description : Get all chromosomes and their length from the database. Return
                chr_name/length for the chromosomes the user requested (or all
                chromosomes by default)
  Return type : Hashref - chromosome_name => length
  Exceptions  : thrown if not passing a Bio::EnsEMBL::DBSQL::DBAdaptor
  Caller      : general

=cut

sub get_chrlength {
Web Admin's avatar
Web Admin committed
    my ($self, $dba, $version,$type,$include_non_reference) = @_;
    $dba ||= $self->dba;
Web Admin's avatar
Web Admin committed
	$type ||= 'toplevel';
    throw("get_chrlength should be passed a Bio::EnsEMBL::DBSQL::DBAdaptor\n")
        unless ($dba->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'));

    my $sa = $dba->get_SliceAdaptor;
Web Admin's avatar
Web Admin committed

    my @chromosomes = map { $_->seq_region_name } 
Web Admin's avatar
Web Admin committed
                            @{ $sa->fetch_all($type, $version,$include_non_reference) };
    my %chr = map { $_ => $sa->fetch_by_region($type, $_, undef, undef, undef, $version)->length } @chromosomes;

    my @wanted = $self->param('chromosomes');
    if (@wanted) {
        # check if user supplied invalid chromosome names
        foreach my $chr (@wanted) {
            my $found = 0;
            foreach my $chr_from_db (keys %chr) {
                if ($chr_from_db eq $chr) {
                    $found = 1;
                    last;
                }
            }
            unless ($found) {
                warning("Didn't find chromosome $chr in database " .
                         $self->param('dbname'));
            }
        }
        
        # filter to requested chromosomes only
        HASH: 
        foreach my $chr_from_db (keys %chr) {
            foreach my $chr (@wanted) {
                if ($chr_from_db eq $chr) {
                    next HASH;
                }
            }
            delete($chr{$chr_from_db});
        }
    }

    return \%chr;
}

=head2 get_ensembl_chr_mapping

  Arg[1]      : (optional) Bio::EnsEMBL::DBSQL::DBAdaptor $dba
  Arg[2]      : (optional) String $version - coord_system version
  Example     : my $ensembl_mapping = $support->get_ensembl_chr_mapping($dba);
  Description : Gets a mapping between Vega chromosome names and their
                equivalent Ensembl chromosomes.
  Return type : Hashref - Vega name => Ensembl name
  Exceptions  : thrown if not passing a Bio::EnsEMBL::DBSQL::DBAdaptor
  Caller      : general

=cut

sub get_ensembl_chr_mapping {
    my ($self, $dba, $version) = @_;
    $dba ||= $self->dba;
    throw("get_ensembl_chr_mapping should be passed a Bio::EnsEMBL::DBSQL::DBAdaptor\n") unless ($dba->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'));

    my $sa = $dba->get_SliceAdaptor;
    my @chromosomes = map { $_->seq_region_name } 
                            @{ $sa->fetch_all('chromosome', $version) };

    my %chrs;
    foreach my $chr (@chromosomes) {
        my $sr = $sa->fetch_by_region('chromosome', $chr, undef, undef, undef, $version);
        my ($ensembl_name_attr) = @{ $sr->get_all_Attributes('ensembl_name') };
        if ($ensembl_name_attr) {
            $chrs{$chr} = $ensembl_name_attr->value;
        } else {
            $chrs{$chr} = $chr;
        }
    }

    return \%chrs;
}

=head2 get_taxonomy_id

  Arg[1]      : Bio::EnsEMBL::DBSQL::DBAdaptor $dba
  Example     : my $sid = $support->get_taxonony_id($dba);
  Description : Retrieves the taxononmy ID from the meta table
  Return type : Int - the taxonomy ID
  Exceptions  : thrown if no taxonomy ID is found in the database
  Caller      : general

=cut

sub get_taxonomy_id {
    my ($self, $dba) = @_;
    $dba ||= $self->dba;
    my $sql = 'SELECT meta_value FROM meta WHERE meta_key = "species.taxonomy_id"';
    my $sth = $dba->dbc->db_handle->prepare($sql);
    $sth->execute;
    my ($tid) = $sth->fetchrow_array;
    $sth->finish;
    $self->throw("Could not determine taxonomy_id from database.") unless $tid;
    return $tid;
}

=head2 get_species_scientific_name

  Arg[1]      : Bio::EnsEMBL::DBSQL::DBAdaptor $dba
  Example     : my $species = $support->get_species_scientific_name($dba);
  Description : Retrieves the species scientific name (Genus species) from the
                meta table
  Return type : String - species scientific name
  Exceptions  : thrown if species name can not be determined from db
  Caller      : general

=cut

sub get_species_scientific_name {
    my ($self, $dba) = @_;
    $dba ||= $self->dba;
    my $sql_tmp = "SELECT meta_value FROM meta WHERE meta_key = \'species.classification\' ORDER BY meta_id";