From b2ede769ab25cfe0590a07b093462fbba91aa18e Mon Sep 17 00:00:00 2001 From: Andrew Yates <ayates@ebi.ac.uk> Date: Fri, 6 Jan 2012 17:14:55 +0000 Subject: [PATCH] Adding trinomial support for database names --- modules/Bio/EnsEMBL/Registry.pm | 78 +++++++++++++++++++++++++++++---- 1 file changed, 69 insertions(+), 9 deletions(-) diff --git a/modules/Bio/EnsEMBL/Registry.pm b/modules/Bio/EnsEMBL/Registry.pm index d2c749dab8..40064dc0b5 100644 --- a/modules/Bio/EnsEMBL/Registry.pm +++ b/modules/Bio/EnsEMBL/Registry.pm @@ -127,6 +127,7 @@ use Bio::EnsEMBL::Utils::Exception qw( deprecate throw warning ); use Bio::EnsEMBL::Utils::Argument qw(rearrange); use Bio::EnsEMBL::Utils::ConfigRegistry; use Bio::EnsEMBL::ApiVersion; +use Bio::EnsEMBL::Utils::URI qw/parse_uri/; use DBI qw(:sql_types); @@ -1352,6 +1353,15 @@ sub change_access{ Example : load_registry_from_url( 'mysql://anonymous@ensembldb.ensembl.org:3306'); + + load_registry_from_url( + 'mysql://anonymous@ensembldb.ensembl.org:3306/homo_sapiens_core_65_37?group=core&species=homo_sapiens' + ); + + load_registry_from_url( + 'mysql://anonymous@ensembldb.ensembl.org:3306/homo_sapiens_core_65_37?group=core' + ); + Description: Will load the correct versions of the ensembl databases for the software release it can find on @@ -1363,6 +1373,11 @@ sub change_access{ script may crash as the API version won't match the DB version. + You can also specify a database name which will cause the + loading of a single DBAdaptor instance. Parameters are + mapped from a normal URL parameter set to their DBAdaptor + equivalent. Group must be defined. + Returntype : Int count of the DBAdaptor instances which can be found in the registry @@ -1376,7 +1391,7 @@ sub change_access{ sub load_registry_from_url { my ( $self, $url, $verbose, $no_cache ) = @_; - if ( $url =~ /mysql\:\/\/([^\@]+\@)?([^\:\/]+)(\:\d+)?(\/\d+)?/x ) { + if ( $url =~ /^mysql\:\/\/([^\@]+\@)?([^\:\/]+)(\:\d+)?(\/\d+)?$/x ) { my $user_pass = $1; my $host = $2; my $port = $3; @@ -1398,6 +1413,23 @@ sub load_registry_from_url { -no_cache => $no_cache ); } + my $uri = parse_uri($url); + if($uri) { + if($uri->scheme() eq 'mysql') { + my %params = $uri->generate_dbsql_params(); + if($params{-DBNAME}) { + $params{-SPECIES} = $params{-DBNAME} unless $params{-SPECIES}; + $params{-NO_CACHE} = 1 if $no_cache; + my $group = $params{-GROUP}; + my $class = $self->_group_to_adaptor_class($group); + if($verbose) { + printf("Loading database '%s' from group '%s' with DBAdaptor class '%s' from url %s\n", $params{-DBNAME}, $group, $class, $url); + } + $class->new(%params); + return 1; + } + } + } throw("Only MySQL URLs are accepted. Given URL was '${url}'"); } ## end sub load_registry_from_url @@ -1578,14 +1610,14 @@ sub load_registry_from_db { $ontology_version = $1; } } elsif ( - $db =~ /^([a-z]+_[a-z0-9]+ # species name e.g. homo_sapiens + $db =~ /^([a-z]+_[a-z0-9]+(?:_[a-z0-9]+)? # species name e.g. homo_sapiens or canis_lupus_familiaris _ [a-z]+ # db type (?:_\d+)?) # optional end bit for ensembl genomes databases _ (\d+) # database release _ - (\w+) # assembly number can have letters too e.g 37c + (\w+)$ # assembly number can have letters too e.g 37c /x ) { @@ -1614,7 +1646,7 @@ sub load_registry_from_db { # Register Core like databases foreach my $type (qw(core cdna vega otherfeatures rnaseq)) { - my @dbs = grep { /^[a-z]+_[a-z0-9]+ # species name + my @dbs = grep { /^[a-z]+_[a-z0-9]+(?:_[a-z0-9]+)? # species name _ $type # the database type _ @@ -1631,7 +1663,7 @@ sub load_registry_from_db { my ( $species, $num ) = - ( $database =~ /(^[a-z]+_[a-z0-9]+) # species name + ( $database =~ /(^[a-z]+_[a-z0-9]+(?:_[a-z0-9]+)?) # species name _ $type # type _ @@ -1779,7 +1811,7 @@ sub load_registry_from_db { } else { my @variation_dbs = - grep { /^[a-z]+_[a-z0-9]+_variation_(?:\d+_)?\d+_/ } @dbnames; + grep { /^[a-z]+_[a-z0-9]+(?:_[a-z0-9]+)?_variation_(?:\d+_)?\d+_/ } @dbnames; for my $variation_db (@variation_dbs) { @@ -1789,7 +1821,7 @@ sub load_registry_from_db { } my ( $species, $num ) = - ( $variation_db =~ /(^[a-z]+_[a-z0-9]+)_variation_(?:\d+_)?(\d+)_/ ); + ( $variation_db =~ /(^[a-z]+_[a-z0-9]+(?:_[a-z0-9]+)?)_variation_(?:\d+_)?(\d+)_/ ); my $dba = Bio::EnsEMBL::Variation::DBSQL::DBAdaptor->new( -group => "variation", @@ -1856,7 +1888,7 @@ sub load_registry_from_db { } } else { my @funcgen_dbs = - grep { /^[a-z]+_[a-z0-9]+_funcgen_(?:\d+_)?\d+_/ } @dbnames; + grep { /^[a-z]+_[a-z0-9]+(?:_[a-z0-9]+)?_funcgen_(?:\d+_)?\d+_/ } @dbnames; for my $funcgen_db (@funcgen_dbs) { if ( index( $funcgen_db, 'collection' ) != -1 ) { @@ -1865,7 +1897,7 @@ sub load_registry_from_db { } my ( $species, $num ) = - ( $funcgen_db =~ /(^[a-z]+_[a-z0-9]+)_funcgen_(?:\d+_)?(\d+)_/ ); + ( $funcgen_db =~ /(^[a-z]+_[a-z0-9]+(?:_[a-z0-9]+)?)_funcgen_(?:\d+_)?(\d+)_/ ); my $dba = Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor->new( -group => "funcgen", -species => $species.$species_suffix, @@ -2053,6 +2085,34 @@ sub load_registry_from_db { } ## end sub load_registry_from_db +=head2 _group_to_adaptor_class + + Arg [1] : The group you wish to decode to an adaptor class + Example : Bio::EnsEMBL::Registry->_group_to_adaptor_class('core'); + Description : Has an internal lookup of groups to their adaptor classes + Returntype : String + Exceptions : Thrown if the group is unknown + Status : Stable + +=cut + +sub _group_to_adaptor_class { + my ($self, $group) = @_; + my $class = { + core => 'Bio::EnsEMBL::DBSQL::DBAdaptor', + cdna => 'Bio::EnsEMBL::DBSQL::DBAdaptor', + otherfeatures => 'Bio::EnsEMBL::DBSQL::DBAdaptor', + rnaseq => 'Bio::EnsEMBL::DBSQL::DBAdaptor', + vega => 'Bio::EnsEMBL::DBSQL::DBAdaptor', + variation => 'Bio::EnsEMBL::Variation::DBSQL::DBAdaptor', + funcgen => 'Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor', + compara => 'Bio::EnsEMBL::Compara::DBSQL::DBAdaptor', + }->{$group}; + throw "Group '${group}' is unknown"; + return $class; +} + + =head2 find_and_add_aliases Arg [ADAPTOR] : (optional) Bio::EnsEMBL::DBSQL::DBAdaptor -- GitLab