diff --git a/modules/Bio/EnsEMBL/DBSQL/AffyArrayAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/AffyArrayAdaptor.pm index d8c4dfea02dbc57761918edeab2449773fff4de2..f26b324207a768db339c5e33558fdbdd8130321d 100644 --- a/modules/Bio/EnsEMBL/DBSQL/AffyArrayAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/AffyArrayAdaptor.pm @@ -110,8 +110,11 @@ sub _objs_from_sth { Args : None Example : my @array_ids = @{$aaa->list_dbIDs()}; - Description: Gets an array of internal IDs for all AffyArray objects in the - current database. + Description: Gets an array of internal IDs for all AffyArray objects + in the current database. NOTE: In a multi-species + database, this method will return the dbIDs of all + AffyArray objects, not just the ones associated with the + current species. Returntype : List of ints Exceptions : None Caller : ? @@ -126,7 +129,12 @@ sub list_dbIDs { # Can't use _list_dbIDs because only want OligoArray objects of type AFFY my @out; + + # FIXME: This SQL will not work as expected on multi-species + # databases. It needs to be anchored in a coord_system entry + # coord_system.species_id = $self->species_id(). /ak4@2008-07-15 my $sql = "SELECT oligo_array_id FROM oligo_array WHERE type='AFFY'"; + my $sth = $self->prepare($sql); $sth->execute; diff --git a/modules/Bio/EnsEMBL/DBSQL/AffyFeatureAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/AffyFeatureAdaptor.pm index a1a16ffa20b258fc7bf91d3c235b01dbbf889051..d361d6dfd0516f4f440e5838d3dc22bfcf651112 100644 --- a/modules/Bio/EnsEMBL/DBSQL/AffyFeatureAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/AffyFeatureAdaptor.pm @@ -112,8 +112,11 @@ sub _new_fast { Args : None Example : my @feature_ids = @{$afa->list_dbIDs()}; - Description: Gets an array of internal IDs for all AffyFeature objects in - the current database. + Description: Gets an array of internal IDs for all AffyFeature objects + in the current database. NOTE: In a multi-species + database, this method will return the dbIDs of all + AffyFeature objects, not just the ones associated with + the current species. Returntype : List of ints Exceptions : None Caller : ? @@ -128,6 +131,10 @@ sub list_dbIDs { # Can't use _list_dbIDs because only want OligoProbe objects on arrays of type AFFY my @out; + + # FIXME: This SQL will not work as expected on multi-species + # databases. It needs to be anchored in a coord_system entry + # coord_system.species_id = $self->species_id(). /ak4@2008-07-15 my $sql = " SELECT DISTINCT of.oligo_feature_id FROM oligo_feature of, oligo_probe op, oligo_array oa diff --git a/modules/Bio/EnsEMBL/DBSQL/AffyProbeAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/AffyProbeAdaptor.pm index 7880bbf4ba5330c947eb2e078f15ab1758a1dd9c..54a0dc7fc85eb20946b0131707c75c5e1b2d37b9 100644 --- a/modules/Bio/EnsEMBL/DBSQL/AffyProbeAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/AffyProbeAdaptor.pm @@ -158,8 +158,11 @@ sub _objs_from_sth { Arg [1] : none Example : my @probe_ids = @{$apa->list_dbIDs()}; - Description: Gets an array of internal IDs for all AffyProbe objects in the - current database. + Description: Gets an array of internal IDs for all AffyProbe objects + in the current database. NOTE: In a multi-species + database, this method will return the dbIDs of all + AffyProbe objects, not just the ones associated with + the current species. Returntype : List of ints Exceptions : None Caller : ? @@ -174,6 +177,10 @@ sub list_dbIDs { # Can't use _list_dbIDs because only want OligoProbe objects on arrays of type AFFY my @out; + + # FIXME: This SQL will not work as expected on multi-species + # databases. It needs to be anchored in a coord_system entry + # coord_system.species_id = $self->species_id(). /ak4@2008-07-15 my $sql = " SELECT DISTINCT op.oligo_probe_id FROM oligo_probe op, oligo_array oa diff --git a/modules/Bio/EnsEMBL/DBSQL/AssemblyExceptionFeatureAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/AssemblyExceptionFeatureAdaptor.pm index dfaae4157126916db1479de8987dfe3a00178968..42b6e9a636f65055f05976758245da1dd2fd1b41 100644 --- a/modules/Bio/EnsEMBL/DBSQL/AssemblyExceptionFeatureAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/AssemblyExceptionFeatureAdaptor.pm @@ -97,11 +97,26 @@ sub fetch_all { return $self->{'_aexc_cache'}; } - my $sth = $self->prepare - ("SELECT assembly_exception_id, seq_region_id, seq_region_start, - seq_region_end, exc_type, exc_seq_region_id, exc_seq_region_start, - exc_seq_region_end, ori - FROM assembly_exception"); + my $statement = qq( + SELECT ae.assembly_exception_id, + ae.seq_region_id, + ae.seq_region_start, + ae.seq_region_end, + ae.exc_type, + ae.exc_seq_region_id, + ae.exc_seq_region_start, + ae.exc_seq_region_end, + ae.ori + FROM assembly_exception ae, + coord_system cs, + seq_region sr + WHERE cs.species_id = ? + AND sr.coord_system_id = cs.coord_system_id + AND sr.seq_region_id = ae.seq_region_id); + + my $sth = $self->prepare($statement); + + $sth->bind_param( 1, $self->species_id(), SQL_INTEGER ); $sth->execute(); diff --git a/modules/Bio/EnsEMBL/DBSQL/AssemblyMapperAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/AssemblyMapperAdaptor.pm index 09f03dcbcb6bb891cbfb0939d4b06d75283de6d5..07bc9f2adf1ecfd9b21151b3175734649a3b8582 100644 --- a/modules/Bio/EnsEMBL/DBSQL/AssemblyMapperAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/AssemblyMapperAdaptor.pm @@ -134,14 +134,22 @@ sub cache_seq_ids_with_mult_assemblys{ $self->{'multi_seq_ids'} = {}; - my $sql=(<<SQL); - SELECT seq_region_id - FROM seq_region_attrib sra, attrib_type at - WHERE sra.attrib_type_id = at.attrib_type_id and code = "MultAssem" -SQL + my $sql = qq( + SELECT sra.seq_region_id + FROM seq_region_attrib sra, + attrib_type at, + seq_region sr, + coord_system cs + WHERE sra.attrib_type_id = at.attrib_type_id + AND code = "MultAssem" + AND sra.seq_region_id = sr.seq_region_id + AND sr.coord_system_id = cs.coord_system_id + AND cs.species_id = ?); my $sth = $self->prepare($sql); + $sth->bind_param( 1, $self->species_id(), SQL_INTEGER ); + $sth->execute(); my ($seq_region_id); diff --git a/modules/Bio/EnsEMBL/DBSQL/BaseAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/BaseAdaptor.pm index 06cbc45c144ef7944ed5ce30626134d1a5f62d50..970eb744bb0d7fe561b24728be4b68bc6b1fa923 100755 --- a/modules/Bio/EnsEMBL/DBSQL/BaseAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/BaseAdaptor.pm @@ -114,29 +114,27 @@ use DBI qw(:sql_types); =cut sub new { - my ($class,$dbobj) = @_; - - my $self = {}; - bless $self,$class; - - if( !defined $dbobj || !ref $dbobj ) { + my ( $class, $dbobj ) = @_; + + my $self = bless {}, $class; + + if ( !defined $dbobj || !ref $dbobj ) { throw("Don't have a db [$dbobj] for new adaptor"); } - if($dbobj->isa('Bio::EnsEMBL::DBSQL::DBAdaptor')){ + + if ( $dbobj->isa('Bio::EnsEMBL::DBSQL::DBAdaptor') ) { $self->db($dbobj); - $self->dbc($dbobj->dbc); - } - elsif( ref($dbobj) =~ /DBAdaptor$/){ + $self->dbc( $dbobj->dbc ); + $self->species_id( $dbobj->species_id() ); + } elsif ( ref($dbobj) =~ /DBAdaptor$/ ) { $self->db($dbobj); - $self->dbc($dbobj->dbc); - } - elsif( ref($dbobj) =~ /DBConnection$/){ - $self->dbc($dbobj); - } - else{ + $self->dbc( $dbobj->dbc ); + } elsif ( ref($dbobj) =~ /DBConnection$/ ) { + $self->dbc($dbobj); + } else { throw("Don't have a DBAdaptor [$dbobj] for new adaptor"); } - + return $self; } @@ -175,17 +173,19 @@ sub prepare{ using. Returntype : Bio::EnsEMBL::DBSQL::DBAdaptor Exceptions : none - Caller : Adaptors inherited fro BaseAdaptor + Caller : Adaptors inherited from BaseAdaptor Status : Stable =cut -sub db{ - my $self = shift; - $self->{'db'} = shift if(@_); +sub db { + my ( $self, $value ) = @_; - return $self->{'db'}; + if ( defined($value) ) { + $self->{'db'} = $value; + } + return $self->{'db'}; } =head2 dbc @@ -197,18 +197,45 @@ sub db{ using. Returntype : Bio::EnsEMBL::DBSQL::DBConnection Exceptions : none - Caller : Adaptors inherited fro BaseAdaptor + Caller : Adaptors inherited from BaseAdaptor Status : Stable =cut -sub dbc{ - my $self = shift; - $self->{'dbc'} = shift if(@_); +sub dbc { + my ( $self, $value ) = @_; + + if ( defined($value) ) { + $self->{'dbc'} = $value; + } return $self->{'dbc'}; } +=head2 species_id + + Arg [1] : (optional) int $species_id + The internal ID of the species in a multi-species database. + Example : $db = $adaptor->db(); + Description: Getter/Setter for the internal ID of the species in a + multi-species database. The default species ID is 1. + Returntype : Integer + Exceptions : none + Caller : Adaptors inherited from BaseAdaptor + Status : Stable + +=cut + +sub species_id { + my ( $self, $value ) = @_; + + if ( defined($value) ) { + $self->{'species_id'} = $value; + } + + return $self->{'species_id'} || 1; +} + # list primary keys for a particular table # args are table name and primary key field diff --git a/modules/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm index 828a2e772ac0ab5ed692e75b765c41bca9cbb277..4cc56046e6c404117c4d2b88df9078743544a1c2 100644 --- a/modules/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm @@ -919,12 +919,20 @@ sub _list_seq_region_ids { my @out; my $sql = qq( - SELECT distinct(sr.seq_region_id) - FROM seq_region sr, $table a - WHERE sr.seq_region_id = a.seq_region_id - ); + SELECT DISTINCT + sr.seq_region_id + FROM seq_region sr, + $table a, + coord_system cs + WHERE sr.seq_region_id = a.seq_region_id + AND sr.coord_system_id = cs.coord_system_id + AND cs.species_id = ?); + my $sth = $self->prepare($sql); - $sth->execute; + + $sth->bin_param( 1, $self->species_id(), SQL_INTEGER ); + + $sth->execute(); while (my ($id) = $sth->fetchrow) { push(@out, $id); diff --git a/modules/Bio/EnsEMBL/DBSQL/BaseMetaContainer.pm b/modules/Bio/EnsEMBL/DBSQL/BaseMetaContainer.pm index b0180d11d758a8ae85d71f8160ee84630d8bc27a..80e3ceb020e3b1c11173cddc338c741d14d07f70 100644 --- a/modules/Bio/EnsEMBL/DBSQL/BaseMetaContainer.pm +++ b/modules/Bio/EnsEMBL/DBSQL/BaseMetaContainer.pm @@ -88,27 +88,48 @@ sub get_schema_version { =cut sub list_value_by_key { - my ($self,$key) = @_; - my @result; + my ( $self, $key ) = @_; $self->{'cache'} ||= {}; - if( exists $self->{'cache'}->{$key} ) { + + if ( exists $self->{'cache'}->{$key} ) { return $self->{'cache'}->{$key}; } - my $sth = $self->prepare( "SELECT meta_value - FROM meta - WHERE meta_key = ? ORDER BY meta_id" ); - $sth->execute( $key ); - while( my $arrRef = $sth->fetchrow_arrayref() ) { + my $sth; + if ( !$self->_species_specific_key($key) ) { + $sth = + $self->prepare( "SELECT meta_value " + . "FROM meta " + . "WHERE meta_key = ? " + . "AND species_id IS NULL " + . "ORDER BY meta_id" ); + + $sth->bind_param( 1, $key, SQL_VARCHAR ); + $sth->execute(); + } else { + $sth = + $self->prepare( "SELECT meta_value " + . "FROM meta " + . "WHERE meta_key = ? " + . "AND species_id = ? " + . "ORDER BY meta_id" ); + + $sth->bind_param( 1, $key, SQL_VARCHAR ); + $sth->bind_param( 2, $self->species_id(), SQL_INTEGER ); + $sth->execute(); + } + + my @result; + while ( my $arrRef = $sth->fetchrow_arrayref() ) { push( @result, $arrRef->[0] ); } + $sth->finish(); $self->{'cache'}->{$key} = \@result; return \@result; -} - +} ## end sub list_value_by_key =head2 store_key_value @@ -128,22 +149,36 @@ sub list_value_by_key { sub store_key_value { my ( $self, $key, $value ) = @_; - if ($self->key_value_exists($key, $value)) { - warn("Key/value pair $key/$value already exists in the meta table; not storing duplicate"); + if ( $self->key_value_exists( $key, $value ) ) { + warn( "Key-value pair '$key'-'$value' " + . "already exists in the meta table; " + . "not storing duplicate" ); return; } - my $sth = $self->prepare( "INSERT INTO meta( meta_key, meta_value) - VALUES( ?, ? )" ); + if ( !$self->_species_specific_key($key) ) { + my $sth = $self->prepare( + 'INSERT INTO meta (species_id, meta_key, meta_value) ' + . 'VALUES(\N, ?, ?)' ); - my $res = $sth->execute( $key, $value ); + $sth->bind_param( 1, $key, SQL_VARCHAR ); + $sth->bind_param( 2, $value, SQL_VARCHAR ); + $sth->execute(); + } else { + my $sth = $self->prepare( + 'INSERT INTO meta (species_id, meta_key, meta_value) ' + . 'VALUES (?, ?, ?)' ); + + $sth->bind_param( 1, $self->species_id(), SQL_INTEGER ); + $sth->bind_param( 2, $key, SQL_VARCHAR ); + $sth->bind_param( 3, $value, SQL_VARCHAR ); + $sth->execute(); + } $self->{'cache'} ||= {}; delete $self->{'cache'}->{$key}; - - return; -} +} ## end sub store_key_value =head2 update_key_value @@ -163,11 +198,28 @@ sub store_key_value { sub update_key_value { my ( $self, $key, $value ) = @_; - my $sth = $self->prepare( "UPDATE meta SET meta_value = ? WHERE meta_key = ?" ); + if ( !$self->_species_specific_key($key) ) { + my $sth = + $self->prepare( 'UPDATE meta SET meta_value = ? ' + . 'WHERE meta_key = ?' + . 'AND species_id IS NULL' ); - my $res = $sth->execute( $value, $key ); - return; -} + $sth->bind_param( 1, $value, SQL_VARCHAR ); + $sth->bind_param( 2, $key, SQL_VARCHAR ); + $sth->execute(); + } else { + my $sth = + $self->prepare( 'UPDATE meta ' + . 'SET meta_value = ? ' + . 'WHERE meta_key = ? ' + . 'AND species_id = ?' ); + + $sth->bind_param( 1, $value, SQL_VARCHAR ); + $sth->bind_param( 2, $key, SQL_VARCHAR ); + $sth->bind_param( 3, $self->species_id(), SQL_INTEGER ); + $sth->execute(); + } +} ## end sub update_key_value =head2 delete_key @@ -185,15 +237,28 @@ sub update_key_value { =cut sub delete_key { - my ($self, $key) = @_; + my ( $self, $key ) = @_; - my $sth = $self->prepare("DELETE FROM meta WHERE meta_key = ?"); - $sth->execute($key); - $sth->finish(); + if ( !$self->_species_specific_key($key) ) { + my $sth = + $self->prepare( 'DELETE FROM meta ' + . 'WHERE meta_key = ?' + . 'AND species_id IS NULL' ); - delete $self->{'cache'}->{$key}; + $sth->bin_param( 1, $key, SQL_VARCHAR ); + $sth->execute(); + } else { + my $sth = + $self->prepare( 'DELETE FROM meta ' + . 'WHERE meta_key = ? ' + . 'AND species_id = ?' ); + + $sth->bin_param( 1, $key, SQL_VARCHAR ); + $sth->bin_param( 2, $self->species_id(), SQL_INTEGER ); + $sth->execute(); + } - return; + delete $self->{'cache'}->{$key}; } =head2 delete_key_value @@ -213,16 +278,33 @@ sub delete_key { =cut sub delete_key_value { - my ($self, $key, $value) = @_; + my ( $self, $key, $value ) = @_; - my $sth = $self->prepare("DELETE FROM meta WHERE meta_key = ? AND meta_value = ?"); - $sth->execute($key, $value); - $sth->finish(); + if ( !$self->_species_specific_key($key) ) { + my $sth = + $self->prepare( 'DELETE FROM meta ' + . 'WHERE meta_key = ? ' + . 'AND meta_value = ?' + . 'AND species_id IS NULL' ); - delete $self->{'cache'}->{$key}; + $sth->bin_param( 1, $key, SQL_VARCHAR ); + $sth->bin_param( 2, $value, SQL_VARCHAR ); + $sth->execute(); + } else { + my $sth = + $self->prepare( 'DELETE FROM meta ' + . 'WHERE meta_key = ? ' + . 'AND meta_value = ? ' + . 'AND species_id = ?' ); + + $sth->bin_param( 1, $key, SQL_VARCHAR ); + $sth->bin_param( 2, $value, SQL_VARCHAR ); + $sth->bin_param( 3, $self->species_id(), SQL_INTEGER ); + $sth->execute(); + } - return; -} + delete $self->{'cache'}->{$key}; +} ## end sub delete_key_value =head2 key_value_exists @@ -231,7 +313,8 @@ sub delete_key_value { Arg [2] : string $value the value to check Example : if ($meta_container->key_value_exists($key, $value)) ... - Description: Return true if a particular key/value pair exists, undef otherwise + Description: Return true (1) if a particular key/value pair exists, + false (0) otherwise. Returntype : boolean Exceptions : none Caller : ? @@ -240,20 +323,51 @@ sub delete_key_value { =cut sub key_value_exists { + my ( $self, $key, $value ) = @_; - my ($self, $key, $value) = @_; - - my $sth = $self->prepare( "SELECT meta_value FROM meta WHERE meta_key = ? AND meta_value = ?" ); - $sth->execute($key, $value); + my $sth; + if ( !$self->_species_specific_key($key) ) { + $sth = + $self->prepare( 'SELECT meta_value ' + . 'FROM meta ' + . 'WHERE meta_key = ? ' + . 'AND meta_value = ?' + . 'AND species_id IS NULL' ); + + $sth->bin_param( 1, $key, SQL_VARCHAR ); + $sth->bin_param( 2, $value, SQL_VARCHAR ); + $sth->execute(); + } else { + $sth = + $self->prepare( 'SELECT meta_value ' + . 'FROM meta ' + . 'WHERE meta_key = ? ' + . 'AND meta_value = ? ' + . 'AND species_id = ?' ); + + $sth->bin_param( 1, $key, SQL_VARCHAR ); + $sth->bin_param( 2, $value, SQL_VARCHAR ); + $sth->bin_param( 3, $self->species_id(), SQL_INTEGER ); + $sth->execute(); + } - while( my $arrRef = $sth->fetchrow_arrayref() ) { - if ($arrRef->[0] eq $value) { + while ( my $arrRef = $sth->fetchrow_arrayref() ) { + if ( $arrRef->[0] eq $value ) { $sth->finish(); return 1; } } - return undef; + return 0; +} ## end sub key_value_exists + +# This utility method determines whether the key is a species-specific +# meta key or not. If the key is either 'patch' or 'schema_version', +# then it is not species-specific. + +sub _species_specific_key { + my ( $self, $key ) = @_; + return ( $key ne 'patch' && $key ne 'schema_version' ); } 1; diff --git a/modules/Bio/EnsEMBL/DBSQL/CoordSystemAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/CoordSystemAdaptor.pm index eb6791e34218594f0a2e56e6fbe85ef92ee90158..925bc13a63e6d8ae43e09c3ab1f843fad0361b36 100644 --- a/modules/Bio/EnsEMBL/DBSQL/CoordSystemAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/CoordSystemAdaptor.pm @@ -126,28 +126,27 @@ use vars qw(@ISA); =cut sub new { - my $caller = shift; + my ( $proto, @args ) = @_; - my $class = ref($caller) || $caller; - - my $self = $class->SUPER::new(@_); + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(@args); # # Cache the entire contents of the coord_system table cross-referenced - # by dbID and name + # by dbID and name. # - #keyed on name, list of coord_system value + # keyed on name, list of coord_system value $self->{'_name_cache'} = {}; - #keyed on id, coord_system value + # keyed on id, coord_system value $self->{'_dbID_cache'} = {}; - #keyed on rank + # keyed on rank $self->{'_rank_cache'} = {}; - #keyed on id, 1/undef values - $self->{'_is_sequence_level'} = {}; + # keyed on id, 1/undef values + $self->{'_is_sequence_level'} = {}; $self->{'_is_default_version'} = {}; #cache to store the seq_region_mapping information @@ -157,143 +156,184 @@ sub new { $self->{'_external_seq_region_mapping'} = {}; my $sth = $self->prepare( - 'SELECT coord_system_id, name, rank, version, attrib ' . - 'FROM coord_system'); + 'SELECT coord_system_id, name, rank, version, attrib ' + . 'FROM coord_system ' + . 'WHERE species_id = ?' ); + + $sth->bind_param( 1, $self->species_id(), SQL_INTEGER ); $sth->execute(); - my ($dbID, $name, $rank, $version, $attrib); - $sth->bind_columns(\$dbID, \$name, \$rank, \$version, \$attrib); + my ( $dbID, $name, $rank, $version, $attrib ); + $sth->bind_columns( \( $dbID, $name, $rank, $version, $attrib ) ); - while($sth->fetch()) { + while ( $sth->fetch() ) { my $seq_lvl = 0; my $default = 0; - if($attrib) { - foreach my $attrib (split(',', $attrib)) { + + if ( defined($attrib) ) { + foreach my $attrib ( split( ',', $attrib ) ) { $self->{"_is_$attrib"}->{$dbID} = 1; - if($attrib eq 'sequence_level') { + if ( $attrib eq 'sequence_level' ) { $seq_lvl = 1; - } elsif($attrib eq 'default_version') { + } elsif ( $attrib eq 'default_version' ) { $default = 1; } } } - my $cs = Bio::EnsEMBL::CoordSystem->new - (-DBID => $dbID, - -ADAPTOR => $self, - -NAME => $name, - -VERSION => $version, - -RANK => $rank, - -SEQUENCE_LEVEL => $seq_lvl, - -DEFAULT => $default); + my $cs = + Bio::EnsEMBL::CoordSystem->new( -DBID => $dbID, + -ADAPTOR => $self, + -NAME => $name, + -VERSION => $version, + -RANK => $rank, + -SEQUENCE_LEVEL => $seq_lvl, + -DEFAULT => $default ); $self->{'_dbID_cache'}->{$dbID} = $cs; - $self->{'_name_cache'}->{lc($name)} ||= []; + $self->{'_name_cache'}->{ lc($name) } ||= []; $self->{'_rank_cache'}->{$rank} = $cs; - push @{$self->{'_name_cache'}->{lc($name)}}, $cs; - } + + push @{ $self->{'_name_cache'}->{ lc($name) } }, $cs; + + } ## end while ( $sth->fetch() ) $sth->finish(); - $self->_cache_mapping_paths; + $self->_cache_mapping_paths(); - $self->_cache_seq_region_mapping; + $self->_cache_seq_region_mapping(); return $self; -} +} ## end sub new -#this cache will load the information from the seq_region_table, if any, to allow mapping -#between internal and external seq_region_id -sub _cache_seq_region_mapping{ - my $self = shift; - - #for a given core database, will return the schema_build information - my $schema_build = $self->db->_get_schema_build(); - #prepare the query to get relation for the current database being used - my $sql = 'SELECT s.internal_seq_region_id, s.external_seq_region_id from seq_region_mapping s, mapping_set ms where ms.mapping_set_id = s.mapping_set_id and ms.schema_build="'.$schema_build.'"'; - #load the cache: - foreach my $row (@{$self->db->dbc->db_handle->selectall_arrayref($sql)}){ - #the internal->external - $self->{'_internal_seq_region_mapping'}->{$row->[0]} = $row->[1]; - #the external->internal - $self->{'_external_seq_region_mapping'}->{$row->[1]} = $row->[0]; - } - #and return - return; -} +sub _cache_seq_region_mapping { + # + # This cache will load the information from the seq_region_table, if + # any, to allow mapping between internal and external seq_region_id. + # + + my ($self) = @_; + + # For a given core database, will return the schema_build information. + my $schema_build = $self->db->_get_schema_build(); + + # Prepare the query to get relation for the current database being + # used. + my $sql = qq( + SELECT s.internal_seq_region_id, + s.external_seq_region_id + FROM seq_region_mapping s, + mapping_set ms, + seq_region sr, + coord_system cs + WHERE ms.mapping_set_id = s.mapping_set_id + AND ms.schema_build = ? + AND s.internal_seq_region_id = sr.seq_region_id + AND sr.coord_system_id = cs.coord_system_id + AND cs.species_id = ?); + + my $sth = $self->prepare($sql); + + $sth->bind_param( 1, $schema_build, SQL_VARCHAR ); + $sth->bind_param( 2, $self->species_id(), SQL_INTEGER ); + + $sth->execute(); -sub _cache_mapping_paths{ + # Load the cache: + foreach my $row ( @{ $sth->fetchall_arrayref() } ) { + # internal->external + $self->{'_internal_seq_region_mapping'}->{ $row->[0] } = $row->[1]; + # external->internal + $self->{'_external_seq_region_mapping'}->{ $row->[1] } = $row->[0]; + } + + $sth->finish(); + +} ## end sub _cache_seq_region_mapping + + +sub _cache_mapping_paths { # - # Retrieve a list of available mappings from the meta table. - # this may eventually be moved a table of its own if this proves too - # cumbersome + # Retrieve a list of available mappings from the meta table. This + # may eventually be moved a table of its own if this proves too + # cumbersome. # - my $self = shift; + + my ($self) = @_; my %mapping_paths; my $mc = $self->db()->get_MetaContainer(); - MAP_PATH: - foreach my $map_path (@{$mc->list_value_by_key('assembly.mapping')}) { - my @cs_strings = split(/[|#]/, $map_path); +MAP_PATH: + foreach + my $map_path ( @{ $mc->list_value_by_key('assembly.mapping') } ) + { + my @cs_strings = split( /[|#]/, $map_path ); - if(@cs_strings < 2) { - warning("Incorrectly formatted assembly.mapping value in meta " . - "table: $map_path"); + if ( @cs_strings < 2 ) { + warning( "Incorrectly formatted assembly.mapping value in meta " + . "table: $map_path" ); next MAP_PATH; } my @coord_systems; foreach my $cs_string (@cs_strings) { - my($name, $version) = split(/:/, $cs_string); - my $cs = $self->fetch_by_name($name, $version); - if(!$cs) { - warning("Unknown coordinate system specified in meta table " . - " assembly.mapping:\n $name:$version"); + my ( $name, $version ) = split( /:/, $cs_string ); + + my $cs = $self->fetch_by_name( $name, $version ); + + if ( !$cs ) { + warning( "Unknown coordinate system specified in meta table " + . " assembly.mapping:\n $name:$version" ); next MAP_PATH; } + push @coord_systems, $cs; } - # if the delimiter is a # we want a special case, multiple parts of the same - # componente map to same assembly part. As this looks like the "long" mapping - # we just make the path a bit longer :-) + # If the delimiter is a # we want a special case, multiple parts of + # the same componente map to same assembly part. As this looks like + # the "long" mapping we just make the path a bit longer :-) - if( $map_path =~ /\#/ && scalar( @coord_systems ) == 2 ) { - splice( @coord_systems, 1, 0, ( undef )); + if ( $map_path =~ /#/ && scalar(@coord_systems) == 2 ) { + splice( @coord_systems, 1, 0, (undef) ); } my $cs1 = $coord_systems[0]; - my $cs2 = $coord_systems[$#coord_systems]; + my $cs2 = $coord_systems[$#coord_systems]; - my $key1 = $cs1->name().':'.$cs1->version(); - my $key2 = $cs2->name().':'.$cs2->version(); + my $key1 = $cs1->name() . ':' . $cs1->version(); + my $key2 = $cs2->name() . ':' . $cs2->version(); - if(exists($mapping_paths{"$key1|$key2"})) { - warning("Meta table specifies multiple mapping paths between " . - "coord systems $key1 and $key2.\n" . - "Choosing shorter path arbitrarily."); + if ( exists( $mapping_paths{"$key1|$key2"} ) ) { + warning( "Meta table specifies multiple mapping paths between " + . "coord systems $key1 and $key2.\n" + . "Choosing shorter path arbitrarily." ); - next MAP_PATH if(@{$mapping_paths{"$key1|$key2"}} < @coord_systems); + if ( @{ $mapping_paths{"$key1|$key2"} } < @coord_systems ) { + next MAP_PATH; + } } $mapping_paths{"$key1|$key2"} = \@coord_systems; - } + } ## end foreach my $map_path ( @{ $mc... # - # Create the pseudo coord system 'toplevel' and cache it so that - # only one of these is created for each db... + # Create the pseudo coord system 'toplevel' and cache it so that only + # one of these is created for each db... # - my $toplevel = Bio::EnsEMBL::CoordSystem->new(-TOP_LEVEL => 1, - -NAME => 'toplevel', - -ADAPTOR => $self); + + my $toplevel = + Bio::EnsEMBL::CoordSystem->new( -TOP_LEVEL => 1, + -NAME => 'toplevel', + -ADAPTOR => $self ); $self->{'_top_level'} = $toplevel; $self->{'_mapping_paths'} = \%mapping_paths; - return 1; -} - + return 1; +} ## end sub _cache_mapping_paths =head2 fetch_all @@ -934,16 +974,20 @@ sub store { # store the coordinate system in the database # - my $sth = $db->dbc->prepare('INSERT INTO coord_system ' . - 'SET name = ?, ' . - 'version = ?, ' . - 'attrib = ?,' . - 'rank = ?'); + my $sth = + $db->dbc->prepare( 'INSERT INTO coord_system ' + . 'SET name = ?, ' + . 'version = ?, ' + . 'attrib = ?,' + . 'rank = ?,' + . 'species_id = ?' ); + + $sth->bind_param( 1, $name, SQL_VARCHAR ); + $sth->bind_param( 2, $version, SQL_VARCHAR ); + $sth->bind_param( 3, $attrib_str, SQL_VARCHAR ); + $sth->bind_param( 4, $rank, SQL_INTEGER ); + $sth->bind_param( 5, $self->species_id(), SQL_INTEGER ); - $sth->bind_param(1,$name,SQL_VARCHAR); - $sth->bind_param(2,$version,SQL_VARCHAR); - $sth->bind_param(3,$attrib_str,SQL_VARCHAR); - $sth->bind_param(4,$rank,SQL_INTEGER); $sth->execute(); my $dbID = $sth->{'mysql_insertid'}; $sth->finish(); diff --git a/modules/Bio/EnsEMBL/DBSQL/DBAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/DBAdaptor.pm index 2852e19aa46c3d045ab68504027245273e2f8a5a..dc0bcf63fa5ef2e299d5e5679ae793a876b3b7a9 100755 --- a/modules/Bio/EnsEMBL/DBSQL/DBAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/DBAdaptor.pm @@ -81,36 +81,28 @@ my $reg = "Bio::EnsEMBL::Registry"; =cut sub new { - my($class, @args) = @_; + my ( $class, @args ) = @_; - my $self ={}; - bless $self,$class; + my $self = bless {}, $class; - - my ($species, $group, $con, $dnadb) = - rearrange([qw(SPECIES GROUP DBCONN DNADB)], @args); + my ( $species, $species_id, $group, $con, $dnadb ) = + rearrange( [qw(SPECIES SPECIES_ID GROUP DBCONN DNADB)], @args ); - if(defined($con)){ - $self->dbc($con); - } - else{ - $self->dbc(new Bio::EnsEMBL::DBSQL::DBConnection(@args)); - } - - if(defined($species)){ - $self->species($species); + if ( defined($con) ) { $self->dbc($con) } + else { + $self->dbc( new Bio::EnsEMBL::DBSQL::DBConnection(@args) ); } - if(defined($group)){ - $self->group($group); - } + if ( defined($species) ) { $self->species($species) } + if ( defined($group) ) { $self->group($group) } + + $species_id ||= 1; + $self->species_id($species_id); $self = Bio::EnsEMBL::Utils::ConfigRegistry::gen_load($self); - if(defined $dnadb) { - $self->dnadb($dnadb); - } - + if ( defined $dnadb ) { $self->dnadb($dnadb) } + return $self; } @@ -587,8 +579,7 @@ sub add_GenericFeatureAdaptor { $self->{'generic_feature_adaptors'}->{$name} = $adaptor_obj; } -=head2 species - +=head2 species Arg [1] : (optional) string $arg The new value of the species used by this DBAdaptor. Example : $species = $dba->species() @@ -604,12 +595,42 @@ sub add_GenericFeatureAdaptor { =cut sub species { - my ($self, $arg ) = @_; - ( defined $arg ) && - ( $self->{_species} = $arg ); + my ( $self, $arg ) = @_; + + if ( defined($arg) ) { + $self->{_species} = $arg; + } + $self->{_species}; } +=head2 species_id + + Arg [1] : (optional) string $arg + The new value of the species_id used by this DBAdaptor + when dealing with multi-species databases. + Example : $species_id = $dba->species_id() + Description: Getter/Setter for the species_id of to use for this + connection. There is currently no point in setting + this value after the connection has already been + established by the constructor. + Returntype : string + Exceptions : none + Caller : new + Status : Stable + +=cut + +sub species_id { + my ( $self, $arg ) = @_; + + if ( defined($arg) ) { + $self->{_species_id} = $arg; + } + + $self->{_species_id}; +} + =head2 group diff --git a/modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm index fe5167b784938106bf9a959a28f7e29b86b0e481..f061bd396078fb2f42d93bacd9c1e12a91ce4928 100644 --- a/modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm @@ -54,7 +54,8 @@ use strict; Arg [1] : int $dbID the unique database identifier for the DBEntry to retrieve Example : my $db_entry = $db_entry_adaptor->fetch_by_dbID($dbID); - Description: retrieves a dbEntry from the database via its unique identifier + Description: Retrieves a dbEntry from the database via its unique + identifier. Returntype : Bio::EnsEMBL::DBEntry Exceptions : none Caller : general @@ -153,9 +154,13 @@ sub fetch_by_dbID { retrieve. Example : my $xref = $dbea->fetch_by_db_accession('Interpro','IPR003439'); print $xref->description(), "\n" if($xref); - Description: Retrieves a DBEntry (xref) via the name of the database it is - from and its primary accession in that database. Undef is - returned if the xref cannot be found in the database. + Description: Retrieves a DBEntry (xref) via the name of the database + it is from and its primary accession in that database. + Undef is returned if the xref cannot be found in the + database. + NOTE: In a multi-species database, this method will + return all the entries matching the search criteria, not + just the ones associated with the current species. Returntype : Bio::EnsEMBL::DBSQL::DBEntry Exceptions : thrown if arguments are incorrect Caller : general, domainview @@ -788,6 +793,9 @@ sub remove_from_object { Arf [4] : optional $exdb_type (external database type) Example : $self->_fetch_by_object_type( $translation_id, 'Translation' ) Description: Fetches DBEntry by Object type + NOTE: In a multi-species database, this method will + return all the entries matching the search criteria, not + just the ones associated with the current species. Returntype : arrayref of DBEntry objects; may be of type IdentityXref if there is mapping data, or GoXref if there is linkage data. Exceptions : none @@ -968,10 +976,15 @@ SSQL Arg [1] : string $external_id Example : @gene_ids = $dbea->list_gene_ids_by_external_db_id(1020); - Description: Retrieve a list of geneid by an external identifier that is - linked to any of the genes transcripts, translations or the - gene itself. NOTE: if more than one external identifier has the - same primary accession then genes for each of these is returned. + Description: Retrieve a list of geneid by an external identifier that + is linked to any of the genes transcripts, translations + or the gene itself. + NOTE: If more than one external identifier has the + same primary accession then genes for each of these is + returned. + NOTE: In a multi-species database, this method will + return all the entries matching the search criteria, not + just the ones associated with the current species. Returntype : list of ints Exceptions : none Caller : unknown @@ -1077,6 +1090,9 @@ sub list_translation_ids_by_extids { Arg [4] : (optional) string $external_db_name other object type to be returned Example : $self->_type_by_external_id($name, 'Translation'); + NOTE: In a multi-species database, this method will + return all the entries matching the search criteria, not + just the ones associated with the current species. Description: Gets Returntype : list of dbIDs (gene_id, transcript_id, etc.) Exceptions : none @@ -1219,6 +1235,9 @@ sub _type_by_external_id { other object type to be returned Example : $self->_type_by_external_db_id(1030, 'Translation'); Description: Gets + NOTE: In a multi-species database, this method will + return all the entries matching the search criteria, not + just the ones associated with the current species. Returntype : list of dbIDs (gene_id, transcript_id, etc.) Exceptions : none Caller : list_translation_ids_by_extids @@ -1298,8 +1317,11 @@ sub _type_by_external_db_id{ Example : @canc_refs = @{$db_entry_adaptor->fetch_all_by_description("%cancer%")}; @db_entries = @{$db_entry_adaptor->fetch_all_by_description("%cancer%","MIM_MORBID")}; - Description: Retrieves DBEntrys that match the description. Optionally you can search on - external databases tpye + Description: Retrieves DBEntries that match the description. + Optionally you can search on external databases type. + NOTE: In a multi-species database, this method will + return all the entries matching the search criteria, not + just the ones associated with the current species. Returntype : ref to array of Bio::EnsEMBL::DBSQL::DBEntry Exceptions : None. Caller : General diff --git a/modules/Bio/EnsEMBL/DBSQL/DensityTypeAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/DensityTypeAdaptor.pm index 736c27e55c51cae085168ff861735badb0837e15..de23d96fca1ec88f5052e4379f4a621a56a31afb 100644 --- a/modules/Bio/EnsEMBL/DBSQL/DensityTypeAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/DensityTypeAdaptor.pm @@ -70,7 +70,10 @@ sub new { Arg [1] : none Example : my @density_types = @{$density_type_adaptor->fetch_all}; - Description: Retrieves every density type in the database + Description: Retrieves every density type in the database. + NOTE: In a multi-species database, this method will + return all the entries, not just the ones associated with + the current species. Returntype : reference to list of Bio::EnsEMBL::DensityType objects Exceptions : none Caller : general, new @@ -150,7 +153,10 @@ sub fetch_by_dbID { Arg [1] : string $logic_name Example : my @dts = @{$dtype_adaptor->fetch_all('repeat_coverage')}; - Description: Retrieves all density types with a given logic name + Description: Retrieves all density types with a given logic name. + NOTE: In a multi-species database, this method will + return all the entries matching the search criteria, not + just the ones associated with the current species. Returntype : reference to list of Bio::EnsEMBL::DensityTypes Exceptions : thrown if logic_name argument is not provided Caller : general diff --git a/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm index f9031a88f82f270e7921dbf454311d8b36ec5d0e..54da59ceab42b77ff0b0c7c894e6cb4b5b19b508 100644 --- a/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm @@ -337,20 +337,26 @@ sub fetch_all_by_domain { throw("domain argument is required") unless ($domain); my $sth = $self->prepare(qq( - SELECT tr.gene_id - FROM interpro i, - protein_feature pf, - transcript tr, - translation tl - WHERE i.interpro_ac = ? - AND i.id = pf.hit_name - AND pf.translation_id = tl.translation_id - AND tr.transcript_id = tl.transcript_id - AND tr.is_current = 1 - GROUP BY tr.gene_id - )); + SELECT tr.gene_id + FROM interpro i, + protein_feature pf, + transcript tr, + translation tl, + seq_region sr, + coord_system cs + WHERE cs.species_id = ? + AND cs.coord_system_id = sr.coord_system_id + AND sr.seq_region_id = tr.seq_region_id + AND tr.is_current = 1 + AND tr.transcript_id = tl.transcript_id + AND tl.translation_id = pf.translation_id + AND pf.hit_name = i.id + AND i.interpro_ac = ? + GROUP BY tr.gene_id)); + + $sth->bind_param( 1, $self->species_id(), SQL_VARCHAR ); + $sth->bind_param( 2, $domain, SQL_VARCHAR ); - $sth->bind_param(1, $domain, SQL_VARCHAR); $sth->execute(); my @array = @{$sth->fetchall_arrayref()}; @@ -965,17 +971,17 @@ sub store { # column status is used from schema version 34 onwards (before it was # confidence) - my $sth = $self->prepare( $store_gene_sql ); - $sth->bind_param(1, $type, SQL_VARCHAR); - $sth->bind_param(2, $analysis_id, SQL_INTEGER); - $sth->bind_param(3, $seq_region_id, SQL_INTEGER); - $sth->bind_param(4, $gene->start, SQL_INTEGER); - $sth->bind_param(5, $gene->end, SQL_INTEGER); - $sth->bind_param(6, $gene->strand, SQL_TINYINT); - $sth->bind_param(7, $gene->description, SQL_LONGVARCHAR); - $sth->bind_param(8, $gene->source, SQL_VARCHAR); - $sth->bind_param(9, $gene->status, SQL_VARCHAR); - $sth->bind_param(10, $is_current, SQL_TINYINT); + my $sth = $self->prepare($store_gene_sql); + $sth->bind_param( 1, $type, SQL_VARCHAR ); + $sth->bind_param( 2, $analysis_id, SQL_INTEGER ); + $sth->bind_param( 3, $seq_region_id, SQL_INTEGER ); + $sth->bind_param( 4, $gene->start, SQL_INTEGER ); + $sth->bind_param( 5, $gene->end, SQL_INTEGER ); + $sth->bind_param( 6, $gene->strand, SQL_TINYINT ); + $sth->bind_param( 7, $gene->description, SQL_LONGVARCHAR ); + $sth->bind_param( 8, $gene->source, SQL_VARCHAR ); + $sth->bind_param( 9, $gene->status, SQL_VARCHAR ); + $sth->bind_param( 10, $is_current, SQL_TINYINT ); $sth->execute(); $sth->finish(); @@ -1132,8 +1138,8 @@ sub remove { } # remove all alternative allele entries associated with this gene - my $sth = $self->prepare("delete from alt_allele where gene_id = ?"); - $sth->bind_param(1, $gene->dbID, SQL_INTEGER); + my $sth = $self->prepare("DELETE FROM alt_allele WHERE gene_id = ?"); + $sth->bind_param( 1, $gene->dbID, SQL_INTEGER ); $sth->execute(); $sth->finish(); @@ -1149,22 +1155,25 @@ sub remove { # remove the gene stable identifier - $sth = $self->prepare( "delete from gene_stable_id where gene_id = ? " ); - $sth->bind_param(1, $gene->dbID, SQL_INTEGER); + $sth = + $self->prepare("DELETE FROM gene_stable_id WHERE gene_id = ? "); + $sth->bind_param( 1, $gene->dbID, SQL_INTEGER ); $sth->execute(); $sth->finish(); # remove any unconventional transcript associations involving this gene - $sth = $self->prepare( "delete from unconventional_transcript_association where gene_id = ? " ); - $sth->bind_param(1, $gene->dbID, SQL_INTEGER); + $sth = + $self->prepare( "DELETE FROM unconventional_transcript_association " + . "WHERE gene_id = ? " ); + $sth->bind_param( 1, $gene->dbID, SQL_INTEGER ); $sth->execute(); $sth->finish(); # remove this gene from the database - $sth = $self->prepare( "delete from gene where gene_id = ? " ); - $sth->bind_param(1, $gene->dbID, SQL_INTEGER); + $sth = $self->prepare("DELETE FROM gene WHERE gene_id = ? "); + $sth->bind_param( 1, $gene->dbID, SQL_INTEGER ); $sth->execute(); $sth->finish(); @@ -1193,26 +1202,28 @@ sub remove { sub get_Interpro_by_geneid { my ($self, $gene_stable_id) = @_; - + my $sql = qq( - SELECT i.interpro_ac, - x.description - FROM transcript t, - translation tl, - protein_feature pf, - interpro i, - xref x, - gene_stable_id gsi - WHERE gsi.stable_id = '$gene_stable_id' - AND t.gene_id = gsi.gene_id - AND t.is_current = 1 - AND tl.transcript_id = t.transcript_id - AND tl.translation_id = pf.translation_id - AND i.id = pf.hit_name - AND i.interpro_ac = x.dbprimary_acc - ); - + SELECT i.interpro_ac, + x.description + FROM transcript t, + translation tl, + protein_feature pf, + interpro i, + xref x, + gene_stable_id gsi + WHERE gsi.stable_id = ? + AND t.gene_id = gsi.gene_id + AND t.is_current = 1 + AND tl.transcript_id = t.transcript_id + AND tl.translation_id = pf.translation_id + AND i.id = pf.hit_name + AND i.interpro_ac = x.dbprimary_acc); + my $sth = $self->prepare($sql); + + $sth->bind_param( 1, $gene_stable_id, SQL_VARCHAR ); + $sth->execute; my @out; @@ -1511,22 +1522,23 @@ sub _objs_from_sth { =cut -sub cache_gene_seq_mappings{ +sub cache_gene_seq_mappings { my ($self) = @_; # get the sequence level to map too - my $sql = qq( - SELECT name - FROM coord_system - WHERE attrib like "%sequence_level%" - ); + my $sql = + 'SELECT name ' + . 'FROM coord_system ' + . 'WHERE attrib like "%%sequence_level%%"' + . 'AND species_id = ?'; my $sth = $self->prepare($sql); + $sth->bind_param( 1, $self->species_id(), SQL_INTEGER ); $sth->execute(); - + my $sequence_level = $sth->fetchrow_array(); - + $sth->finish(); my $csa = $self->db->get_CoordSystemAdaptor(); @@ -1536,14 +1548,15 @@ sub cache_gene_seq_mappings{ # get level to map to two - my $mcc = $self->db->get_MetaCoordContainerAdaptor(); + my $mcc = $self->db->get_MetaCoordContainerAdaptor(); my $csnew = $mcc->fetch_all_CoordSystems_by_feature_type('gene'); foreach my $cs2 (@$csnew) { - my $am = $ama->fetch_by_CoordSystems($cs1, $cs2); - $am->register_all(); + my $am = $ama->fetch_by_CoordSystems( $cs1, $cs2 ); + $am->register_all(); } -} + +} ## end sub cache_gene_seq_mappings =head2 fetch_all_by_exon_supporting_evidence diff --git a/modules/Bio/EnsEMBL/DBSQL/MetaCoordContainer.pm b/modules/Bio/EnsEMBL/DBSQL/MetaCoordContainer.pm index d121a19f3ee66a294b2b173ca4150ff87254ef0c..d7d2fad9dd61572cc3f6b2b4c6a223a77c2d825e 100644 --- a/modules/Bio/EnsEMBL/DBSQL/MetaCoordContainer.pm +++ b/modules/Bio/EnsEMBL/DBSQL/MetaCoordContainer.pm @@ -20,22 +20,31 @@ sub new { my $self = $class->SUPER::new(@_); # - # Retrieve the list of the coordinate systems that features are stored in - # and cache them + # Retrieve the list of the coordinate systems that features are stored + # in and cache them. # - my $sth = $self->prepare - ('SELECT table_name, coord_system_id, max_length FROM meta_coord'); + + my $sth = $self->prepare( + 'SELECT mc.table_name, mc.coord_system_id, mc.max_length ' + . 'FROM meta_coord mc, coord_system cs ' + . 'WHERE mc.coord_system_id = cs.coord_system_id ' + . 'AND cs.species_id = ?' ); + + $sth->bind_param( 1, $self->species_id(), SQL_INTEGER ); $sth->execute(); - while(my ($table_name, $cs_id, $max_length) = $sth->fetchrow_array()) { - $self->{'_feature_cache'}->{lc($table_name)} ||= []; - push @{$self->{'_feature_cache'}->{lc($table_name)}}, $cs_id; - $self->{'_max_len_cache'}->{$cs_id}->{lc($table_name)} = $max_length; + while ( my ( $table_name, $cs_id, $max_length ) = + $sth->fetchrow_array() ) + { + $self->{'_feature_cache'}->{ lc($table_name) } ||= []; + push @{ $self->{'_feature_cache'}->{ lc($table_name) } }, $cs_id; + $self->{'_max_len_cache'}->{$cs_id}->{ lc($table_name) } = + $max_length; } $sth->finish(); return $self; -} +} ## end sub new diff --git a/modules/Bio/EnsEMBL/DBSQL/MiscFeatureAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/MiscFeatureAdaptor.pm index 360bc71cc0a27f07c168ed803fdd9a7e2cdab0ab..b1f50a088981fe2026efed3b710c382fd550f894 100644 --- a/modules/Bio/EnsEMBL/DBSQL/MiscFeatureAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/MiscFeatureAdaptor.pm @@ -130,10 +130,11 @@ sub fetch_all_by_Slice_and_set_code { #get the misc feature with synonym 'AL014121' ($feat)=@{$mfa->fetch_all_by_attrib_type_value('synonym','AL014121'); Description: Retrieves MiscFeatures which have a particular attribute. - If the attribute value argument is also provided only features - which have the attribute AND a particular value are returned. - The features are returned in their native coordinate system - (i.e. the coordinate system that they are stored in). + If the attribute value argument is also provided only + features which have the attribute AND a particular value + are returned. The features are returned in their native + coordinate system (i.e. the coordinate system that they + are stored in). Returntype : listref of Bio::EnsEMBL::MiscFeatures Exceptions : throw if attrib_type code arg is not provided Caller : general @@ -142,59 +143,71 @@ sub fetch_all_by_Slice_and_set_code { =cut sub fetch_all_by_attribute_type_value { - my $self = shift; + my $self = shift; my $attrib_type_code = shift; - my $attrib_value = shift; - - throw("Attrib type code argument is required.") if(!$attrib_type_code); - - # - # Need to do 2 queries so that all of the ids come back with the features. - # The problem with adding attrib constraints to filter the misc_features - # which come back is that not all of the attributes will come back - # - - my $sql = "SELECT DISTINCT ma.misc_feature_id " . - "FROM misc_attrib ma, attrib_type at " . - "WHERE ma.attrib_type_id = at.attrib_type_id " . - "AND at.code = ?"; - - my @bind_vals = ($attrib_type_code); - - if($attrib_value) { - push @bind_vals, $attrib_value; + my $attrib_value = shift; + + throw("Attrib type code argument is required.") + if ( !$attrib_type_code ); + + # Need to do 2 queries so that all of the ids come back with the + # features. The problem with adding attrib constraints to filter the + # misc_features which come back is that not all of the attributes will + # come back + + my $sql = qq( + SELECT DISTINCT + ma.misc_feature_id + FROM misc_attrib ma, + attrib_type at, + misc_feature mf, + seq_region sr, + coord_system cs + WHERE ma.attrib_type_id = at.attrib_type_id + AND at.code = ? + AND ma.misc_feature_id = mf.misc_feature_id + AND mf.seq_region_id = sr.seq_region_id + AND sr.coord_system_id = cs.coord_system_id + AND cs.species_id = ?); + + if ($attrib_value) { $sql .= " AND ma.value = ?"; } my $sth = $self->prepare($sql); - $sth->bind_param(1,$attrib_type_code,SQL_VARCHAR); - $sth->bind_param(2,$attrib_value,SQL_VARCHAR) if ($attrib_value); + + $sth->bind_param( 1, $attrib_type_code, SQL_VARCHAR ); + $sth->bind_param( 2, $self->species_id(), SQL_INTEGER ); + if ($attrib_value) { + $sth->bind_param( 3, $attrib_value, SQL_VARCHAR ); + } + $sth->execute(); - my @ids = map {$_->[0]} @{$sth->fetchall_arrayref()}; + my @ids = map { $_->[0] } @{ $sth->fetchall_arrayref() }; $sth->finish(); - #construct constraints from the list of ids. Split ids into - #groups of 1000 to ensure that the query is not too big + # Construct constraints from the list of ids. Split ids into groups + # of 1000 to ensure that the query is not too big. my @constraints; - while(@ids) { - my @subset = splice(@ids, 0, 1000); - if(@subset == 1) { + while (@ids) { + my @subset = splice( @ids, 0, 1000 ); + if ( @subset == 1 ) { push @constraints, "mf.misc_feature_id = $subset[0]"; } else { - my $id_str = join(',',@subset); + my $id_str = join( ',', @subset ); push @constraints, "mf.misc_feature_id in ($id_str)"; } } my @results; foreach my $constraint (@constraints) { - push @results, @{$self->generic_fetch($constraint)}; + push @results, @{ $self->generic_fetch($constraint) }; } return \@results; -} +} ## end sub fetch_all_by_attribute_type_value #_tables diff --git a/modules/Bio/EnsEMBL/DBSQL/MiscSetAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/MiscSetAdaptor.pm index 551330b3da8577945631b6109080b06e2153957b..f2ea0b67d7e8032e922ace3e347c860daf495e48 100644 --- a/modules/Bio/EnsEMBL/DBSQL/MiscSetAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/MiscSetAdaptor.pm @@ -89,7 +89,10 @@ sub new { Example : foreach my $ms (@{$msa->fetch_all()}) { print $ms->code(), ' ', $ms->name(), "\n"; } - Description: Retrieves every MiscSet defined in the DB + Description: Retrieves every MiscSet defined in the DB. + NOTE: In a multi-species database, this method will + return all the entries matching the search criteria, not + just the ones associated with the current species. Returntype : listref of Bio::EnsEMBL::MiscSets Exceptions : none Caller : general diff --git a/modules/Bio/EnsEMBL/DBSQL/OligoProbeAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/OligoProbeAdaptor.pm index 6245dcbd67ba3f965ec695f15ca7083c3a283880..c6e76f93f6f255ce82116b893ceb6cc87f344eec 100644 --- a/modules/Bio/EnsEMBL/DBSQL/OligoProbeAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/OligoProbeAdaptor.pm @@ -53,9 +53,12 @@ use vars qw(@ISA); Arg [2] : (optional) string - name of probeset Arg [3] : string - name of probe Example : my $probe = $opa->fetch_by_array_probeset_probe('Array-1', 'Probeset-1', 'Probe-1'); - Description: Returns a probe given a combination of array name, probeset and - probe name. This will uniquely define an Affy probe. Only one - probe is ever returned. + Description: Returns a probe given a combination of array name, + probeset and probe name. This will uniquely define an + Affy probe. Only one probe is ever returned. + NOTE: In a multi-species database, this method will + return all the entries matching the search criteria, not + just the ones associated with the current species. Returntype : Bio::EnsEMBL::OligoProbe Exceptions : None Caller : General diff --git a/modules/Bio/EnsEMBL/DBSQL/PredictionExonAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/PredictionExonAdaptor.pm index d01803c2401ad71610524a3ed6d7a3fa4339a9eb..5186a2cd983892a08b233844245b7bebf7d90c0d 100644 --- a/modules/Bio/EnsEMBL/DBSQL/PredictionExonAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/PredictionExonAdaptor.pm @@ -247,8 +247,9 @@ sub remove { return undef; } - my $sth = $self->prepare( "delete from prediction_exon where prediction_exon_id = ?" ); - $sth->bind_param(1,$pexon->dbID,SQL_INTEGER); + my $sth = $self->prepare( + "DELETE FROM prediction_exon WHERE prediction_exon_id = ?"); + $sth->bind_param( 1, $pexon->dbID, SQL_INTEGER ); $sth->execute(); $pexon->dbID(undef); diff --git a/modules/Bio/EnsEMBL/DBSQL/SequenceAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/SequenceAdaptor.pm index 5db6a8c4abe3d3dfd47cdbfa0dc49ad17907c08b..5cae0f3fb4a9b2227c64f2eb304c502e135580b8 100644 --- a/modules/Bio/EnsEMBL/DBSQL/SequenceAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/SequenceAdaptor.pm @@ -235,18 +235,18 @@ sub _fetch_seq { } else { # retrieve uncached portions of the sequence - my $sth = $self->prepare - ("SELECT SUBSTRING( d.sequence, ?, ?) - FROM dna d - WHERE d.seq_region_id = ?"); + my $sth = + $self->prepare( "SELECT SUBSTRING(d.sequence, ?, ?) " + . "FROM dna d " + . "WHERE d.seq_region_id = ?" ); my $tmp_seq; my $min = ($i << $SEQ_CHUNK_PWR) + 1; - $sth->bind_param(1,$min,SQL_INTEGER); - $sth->bind_param(2,1 << $SEQ_CHUNK_PWR,SQL_INTEGER); - $sth->bind_param(3,$seq_region_id,SQL_INTEGER); + $sth->bind_param( 1, $min, SQL_INTEGER ); + $sth->bind_param( 2, 1 << $SEQ_CHUNK_PWR, SQL_INTEGER ); + $sth->bind_param( 3, $seq_region_id, SQL_INTEGER ); $sth->execute(); $sth->bind_columns(\$tmp_seq); @@ -260,24 +260,24 @@ sub _fetch_seq { } # return only the requested portion of the entire sequence - my $min = ($chunk_min << $SEQ_CHUNK_PWR) + 1; - my $max = ($chunk_max+1) << $SEQ_CHUNK_PWR; - my $seq = substr($entire_seq, $start-$min, $length); + my $min = ( $chunk_min << $SEQ_CHUNK_PWR ) + 1; + my $max = ( $chunk_max + 1 ) << $SEQ_CHUNK_PWR; + my $seq = substr( $entire_seq, $start - $min, $length ); return \$seq; } else { # do not do any caching for requests of very large sequences - my $sth = $self->prepare - ("SELECT SUBSTRING( d.sequence, ?, ?) - FROM dna d - WHERE d.seq_region_id = ?"); + my $sth = + $self->prepare( "SELECT SUBSTRING(d.sequence, ?, ?) " + . "FROM dna d " + . "WHERE d.seq_region_id = ?" ); my $tmp_seq; - $sth->bind_param(1,$start,SQL_INTEGER); - $sth->bind_param(2,$length,SQL_INTEGER); - $sth->bind_param(3,$seq_region_id,SQL_INTEGER); + $sth->bind_param( 1, $start, SQL_INTEGER ); + $sth->bind_param( 2, $length, SQL_INTEGER ); + $sth->bind_param( 3, $seq_region_id, SQL_INTEGER ); $sth->execute(); $sth->bind_columns(\$tmp_seq); diff --git a/modules/Bio/EnsEMBL/DBSQL/SliceAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/SliceAdaptor.pm index f8444521e9b2c426fc621fc2d536d027e2f1352a..dac3fc5ea6f92610b88b6b494587d8f9857da3e9 100644 --- a/modules/Bio/EnsEMBL/DBSQL/SliceAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/SliceAdaptor.pm @@ -187,16 +187,18 @@ sub fetch_by_region { $strand, $version, $no_fuzz ) = @_; - $start = 1 if (!defined($start)); - $strand = 1 if (!defined($strand)); + if ( !defined($start) ) { $start = 1 } + if ( !defined($strand) ) { $strand = 1 } - throw('seq_region_name argument is required') if (!defined($seq_region_name)); + if ( !defined($seq_region_name) ) { + throw('seq_region_name argument is required'); + } my $cs; my $csa = $self->db->get_CoordSystemAdaptor(); - if ($coord_system_name) { - $cs = $csa->fetch_by_name($coord_system_name,$version); + if ( defined($coord_system_name) ) { + $cs = $csa->fetch_by_name( $coord_system_name, $version ); ## REMOVE THESE THREE LINES WHEN STICKLEBACK DB IS FIXED! ## Anne/ap5 (2007-10-09): @@ -206,167 +208,194 @@ sub fetch_by_region { # 'groups' in the stickleback community, even though they really are # chromosomes! - if( !$cs && $coord_system_name eq 'chromosome' ) { - $cs = $csa->fetch_by_name('group',$version); + if ( !defined($cs) && $coord_system_name eq 'chromosome' ) { + $cs = $csa->fetch_by_name( 'group', $version ); } - if (!$cs) { - throw("Unknown coordinate system:\n name='$coord_system_name' " . - "version='$version'\n"); + if ( !defined($cs) ) { + throw( + sprintf( "Unknown coordinate system:\n" + . "name='%s' version='%s'\n", + $coord_system_name, $version + ) ); } + # fetching by toplevel is same as fetching w/o name or version - if ($cs->is_top_level()) { - $cs = undef; + if ( $cs->is_top_level() ) { + $cs = undef; $version = undef; } - } + } ## end if ( defined($coord_system_name... my $constraint; my $sql; - my @bind_vals; + my @bind_params; my $key; - if ($cs) { - - push @bind_vals, $cs->dbID(); - $sql = "SELECT sr.name, sr.seq_region_id, sr.length, " . - $cs->dbID() ." FROM seq_region sr "; + if ( defined($cs) ) { + $sql = sprintf( "SELECT sr.name, sr.seq_region_id, sr.length, %d " + . "FROM seq_region sr ", + $cs->dbID() ); - $constraint = "sr.coord_system_id = ?"; + $constraint = "AND sr.coord_system_id = ?"; + push( @bind_params, [ $cs->dbID(), SQL_INTEGER ] ); - $key = "$seq_region_name:".$cs->dbID(); + $key = "$seq_region_name:" . $cs->dbID(); } else { - $sql = "SELECT sr.name, sr.seq_region_id, sr.length, " . - " cs.coord_system_id " . - "FROM seq_region sr, coord_system cs "; + $sql = + "SELECT sr.name, sr.seq_region_id, sr.length, cs.coord_system_id " + . "FROM seq_region sr, coord_system cs "; - $constraint = "sr.coord_system_id = cs.coord_system_id "; - if($version) { + $constraint = "AND sr.coord_system_id = cs.coord_system_id " + . "AND cs.species_id = ? "; + push( @bind_params, [ $self->species_id(), SQL_INTEGER ] ); + + if ( defined($version) ) { $constraint .= "AND cs.version = ? "; - push @bind_vals, $version; + push( @bind_params, [ $version, SQL_VARCHAR ] ); } + $constraint .= "ORDER BY cs.rank ASC"; } # check the cache so we only go to the db if necessary my $length; my $arr; - if ($key) { - $arr = $self->{'sr_name_cache'}->{$key}; - } - if ($arr) { + if ( defined($key) ) { $arr = $self->{'sr_name_cache'}->{$key} } + + if ( defined($arr) ) { $length = $arr->[3]; } else { - my $sth = $self->prepare($sql . " WHERE sr.name = ? AND " . $constraint); - $sth->bind_param(1, $seq_region_name, SQL_VARCHAR); - if ($cs){ - $sth->bind_param(2, $cs->dbID, SQL_INTEGER); - } - else{ - $sth->bind_param(2, $version, SQL_VARCHAR) if ($version); + my $sth = + $self->prepare( $sql . "WHERE sr.name = ? " . $constraint ); + + unshift( @bind_params, [ $seq_region_name, SQL_VARCHAR ] ); + + my $pos = 0; + foreach my $param (@bind_params) { + $sth->bind_param( ++$pos, $param->[0], $param->[1] ); } $sth->execute(); - if ($sth->rows() == 0) { + if ( $sth->rows() == 0 ) { $sth->finish(); if ($no_fuzz) { return undef } - # do fuzzy matching, assuming that we are just missing a version on - # the end of the seq_region name - - $sth = $self->prepare($sql . " WHERE sr.name LIKE ? AND " . $constraint); + # Do fuzzy matching, assuming that we are just missing a version + # on the end of the seq_region name. - $sth->bind_param(1, "$seq_region_name.%", SQL_VARCHAR); + $sth = + $self->prepare( $sql . " WHERE sr.name LIKE ? " . $constraint ); - if ($cs){ - $sth->bind_param(2, $cs->dbID, SQL_INTEGER); - } - else{ - $sth->bind_param(2, $version, SQL_VARCHAR) if ($version); + $bind_params[0] = + [ sprintf( '%s.%%', $seq_region_name ), SQL_VARCHAR ]; + + $pos = 0; + foreach my $param (@bind_params) { + $sth->bind_param( ++$pos, $param->[0], $param->[1] ); } + $sth->execute(); my $prefix_len = length($seq_region_name) + 1; - my $high_ver = undef; - my $high_cs = $cs; + my $high_ver = undef; + my $high_cs = $cs; - # find the fuzzy-matched seq_region with the highest postfix (which ought - # to be a version) + # Find the fuzzy-matched seq_region with the highest postfix + # (which ought to be a version). - my ($tmp_name, $id, $tmp_length, $cs_id); - $sth->bind_columns(\$tmp_name, \$id, \$tmp_length, \$cs_id); + my ( $tmp_name, $id, $tmp_length, $cs_id ); + $sth->bind_columns( \( $tmp_name, $id, $tmp_length, $cs_id ) ); my $i = 0; - while ($sth->fetch) { - my $tmp_cs = ($cs) ? $cs : $csa->fetch_by_dbID($cs_id); + while ( $sth->fetch ) { + my $tmp_cs = + ( defined($cs) ? $cs : $csa->fetch_by_dbID($cs_id) ); # cache values for future reference my $arr = [ $id, $tmp_name, $cs_id, $tmp_length ]; $self->{'sr_name_cache'}->{"$tmp_name:$cs_id"} = $arr; - $self->{'sr_id_cache'}->{"$id"} = $arr; - - my $tmp_ver = substr($tmp_name, $prefix_len); - - # skip versions which are non-numeric and apparently not versions - next if($tmp_ver !~ /^\d+$/); - - # take version with highest num, if two versions match take one with - # highest ranked coord system (lowest num) - if(!defined($high_ver) || $tmp_ver > $high_ver || - ($tmp_ver == $high_ver && $tmp_cs->rank < $high_cs->rank)) { - $seq_region_name = $tmp_name; - $length = $tmp_length; - $high_ver = $tmp_ver; - $high_cs = $tmp_cs; + $self->{'sr_id_cache'}->{"$id"} = $arr; + + my $tmp_ver = substr( $tmp_name, $prefix_len ); + + # skip versions which are non-numeric and apparently not + # versions + if ( $tmp_ver !~ /^\d+$/ ) { next } + + # take version with highest num, if two versions match take one + # with highest ranked coord system (lowest num) + if ( !defined($high_ver) + || $tmp_ver > $high_ver + || ( $tmp_ver == $high_ver && $tmp_cs->rank < $high_cs->rank ) + ) + { + $seq_region_name = $tmp_name; + $length = $tmp_length; + $high_ver = $tmp_ver; + $high_cs = $tmp_cs; } $i++; - } + } ## end while ( $sth->fetch ) $sth->finish(); # warn if fuzzy matching found more than one result - if ($i > 1) { - warning("Fuzzy matching of seq_region_name returned more than one result.\nYou might want to check whether the returned seq_region\n(".$high_cs->name.":$seq_region_name) is the one you intended to fetch.\n"); + if ( $i > 1 ) { + warning( + sprintf( + "Fuzzy matching of seq_region_name " + . "returned more than one result.\n" + . "You might want to check whether the returned seq_region\n" + . "(%s:%s) is the one you intended to fetch.\n", + $high_cs->name(), $seq_region_name + ) ); } $cs = $high_cs; - #return if we did not find any appropriate match: - return undef if (!defined($high_ver)); + # return if we did not find any appropriate match: + if ( !defined($high_ver) ) { return undef } } else { - my ($id, $cs_id); - ($seq_region_name, $id, $length, $cs_id) = $sth->fetchrow_array(); + my ( $id, $cs_id ); + ( $seq_region_name, $id, $length, $cs_id ) = + $sth->fetchrow_array(); $sth->finish(); - # cahce to speed up for future queries + # cache to speed up for future queries my $arr = [ $id, $seq_region_name, $cs_id, $length ]; $self->{'sr_name_cache'}->{"$seq_region_name:$cs_id"} = $arr; - $self->{'sr_id_cache'}->{"$id"} = $arr; - $cs = $csa->fetch_by_dbID( $cs_id ); + $self->{'sr_id_cache'}->{"$id"} = $arr; + $cs = $csa->fetch_by_dbID($cs_id); } - } + } ## end else [ if ( defined($arr) ) - $end = $length if(!defined($end)); + if ( !defined($end) ) { $end = $length } - if($end+1 < $start) { - throw("start [$start] must be less than or equal to end+1 [$end+1]"); + if ( $end + 1 < $start ) { + throw( + sprintf( + "start [%d] must be less than or equal to end+1 [%d]", + $start, $end + 1 + ) ); } - return Bio::EnsEMBL::Slice->new(-COORD_SYSTEM => $cs, - -SEQ_REGION_NAME => $seq_region_name, - -SEQ_REGION_LENGTH => $length, - -START => $start, - -END => $end, - -STRAND => $strand, - -ADAPTOR => $self); -} + return + Bio::EnsEMBL::Slice->new( -COORD_SYSTEM => $cs, + -SEQ_REGION_NAME => $seq_region_name, + -SEQ_REGION_LENGTH => $length, + -START => $start, + -END => $end, + -STRAND => $strand, + -ADAPTOR => $self ); +} ## end sub fetch_by_region @@ -444,27 +473,26 @@ sub fetch_by_name { =cut sub fetch_by_seq_region_id { - my ($self, $seq_region_id,$start,$end,$strand) = @_; + my ( $self, $seq_region_id, $start, $end, $strand ) = @_; - my $arr = $self->{'sr_id_cache'}->{ $seq_region_id }; - my ($name, $length, $cs); + my $arr = $self->{'sr_id_cache'}->{$seq_region_id}; + my ( $name, $length, $cs, $cs_id ); - if( $arr && defined($arr->[2])) { - my $cs_id; - ($name, $cs_id, $length ) = ( $arr->[1], $arr->[2], $arr->[3] ); + if ( $arr && defined( $arr->[2] ) ) { + ( $name, $cs_id, $length ) = ( $arr->[1], $arr->[2], $arr->[3] ); $cs = $self->db->get_CoordSystemAdaptor->fetch_by_dbID($cs_id); } else { - my $sth = $self->prepare("SELECT name, length, coord_system_id " . - "FROM seq_region " . - "WHERE seq_region_id = ?"); + my $sth = + $self->prepare( "SELECT sr.name, sr.coord_system_id, sr.length " + . "FROM seq_region sr" + . "WHERE sr.seq_region_id = ? " ); - $sth->bind_param(1,$seq_region_id,SQL_INTEGER); + $sth->bind_param( 1, $seq_region_id, SQL_INTEGER ); $sth->execute(); - return undef if($sth->rows() == 0); + if ( $sth->rows() == 0 ) { return undef } - my $cs_id; - ($name, $length, $cs_id) = $sth->fetchrow_array(); + ( $name, $cs_id, $length ) = $sth->fetchrow_array(); $sth->finish(); $cs = $self->db->get_CoordSystemAdaptor->fetch_by_dbID($cs_id); @@ -476,14 +504,15 @@ sub fetch_by_seq_region_id { $self->{'sr_id_cache'}->{"$seq_region_id"} = $arr; } - return Bio::EnsEMBL::Slice->new(-COORD_SYSTEM => $cs, - -SEQ_REGION_NAME => $name, - -SEQ_REGION_LENGTH => $length, - -START => $start || 1, - -END => $end || $length, - -STRAND => $strand || 1, - -ADAPTOR => $self); -} + return + Bio::EnsEMBL::Slice->new( -COORD_SYSTEM => $cs, + -SEQ_REGION_NAME => $name, + -SEQ_REGION_LENGTH => $length, + -START => $start || 1, + -END => $end || $length, + -STRAND => $strand || 1, + -ADAPTOR => $self ); +} ## end sub fetch_by_seq_region_id @@ -621,48 +650,64 @@ sub fetch_all { my $sth; my %bad_vals=(); + # # Get a hash of non reference seq regions # - if(!$include_non_reference){ - my $sth2 = $self->prepare( - "SELECT sr.seq_region_id ". - "FROM seq_region sr, seq_region_attrib sra, attrib_type at ". - " WHERE at.code='non_ref'". - " AND sra.seq_region_id=sr.seq_region_id ". - " AND at.attrib_type_id=sra.attrib_type_id " ); + if ( !$include_non_reference ) { + my $sth2 = + $self->prepare( 'SELECT sr.seq_region_id ' + . 'FROM seq_region sr, seq_region_attrib sra, ' + . 'attrib_type at, coord_system cs ' + . 'WHERE at.code = "non_ref" ' + . 'AND sra.seq_region_id = sr.seq_region_id ' + . 'AND at.attrib_type_id = sra.attrib_type_id ' + . 'AND sr.coord_system_id = cs.coord_system_id ' + . 'AND cs.species_id = ?' ); + + $sth->bind_param( 1, $self->species_id(), SQL_INTEGER ); $sth2->execute(); + my ($seq_region_id); - $sth2->bind_columns(\$seq_region_id); - while($sth2->fetch()) { + $sth2->bind_columns( \$seq_region_id ); + + while ( $sth2->fetch() ) { $bad_vals{$seq_region_id} = 1; } + $sth2->finish(); } + # # Retrieve the seq_regions from the database # - if($orig_cs->is_top_level()) { - $sth = - $self->prepare("SELECT sr.seq_region_id, sr.name, sr.length, " . - " sr.coord_system_id " . - "FROM seq_region sr, " . - " seq_region_attrib sra, attrib_type at " . - "WHERE at.code='toplevel' " . - "AND at.attrib_type_id=sra.attrib_type_id " . - "AND sra.seq_region_id=sr.seq_region_id"); + if ( $orig_cs->is_top_level() ) { + $sth = + $self->prepare( 'SELECT sr.seq_region_id, sr.name, ' + . 'sr.length, sr.coord_system_id ' + . 'FROM seq_region sr, seq_region_attrib sra, ' + . 'attrib_type at, coord_system cs ' + . 'WHERE at.code = "toplevel" ' + . 'AND at.attrib_type_id = sra.attrib_type_id ' + . 'AND sra.seq_region_id = sr.seq_region_id ' + . 'AND sr.coord_system_id = cs.coord_system_id ' + . 'AND cs.species_id = ?' ); + + $sth->bind_param( 1, $self->species_id(), SQL_INTEGER ); $sth->execute(); } else { - $sth = - $self->prepare('SELECT seq_region_id, name, length, coord_system_id ' . - 'FROM seq_region ' . - 'WHERE coord_system_id =?'); - $sth->bind_param(1,$orig_cs->dbID,SQL_INTEGER); - $sth->execute(); + $sth = + $self->prepare( 'SELECT seq_region_id, name, ' + . 'length, coord_system_id ' + . 'FROM seq_region sr, coord_system cs ' + . 'WHERE coord_system_id = ?' ); + + $sth->bind_param( 1, $orig_cs->dbID, SQL_INTEGER ); + $sth->execute(); } - my ($seq_region_id, $name, $length, $cs_id); - $sth->bind_columns(\$seq_region_id, \$name, \$length, \$cs_id); + my ( $seq_region_id, $name, $length, $cs_id ); + $sth->bind_columns( \( $seq_region_id, $name, $length, $cs_id ) ); my $cache_count = 0; @@ -734,20 +779,27 @@ sub fetch_all { =cut -sub is_toplevel{ +sub is_toplevel { my $self = shift; - my $id = shift; + my $id = shift; - my $sth = $self->prepare("SELECT at.code from seq_region_attrib sra, attrib_type at WHERE sra.seq_region_id = $id AND at.attrib_type_id = sra.attrib_type_id AND at.code = 'toplevel'"); - + my $sth = $self->prepare( + "SELECT at.code from seq_region_attrib sra, attrib_type at " + . "WHERE sra.seq_region_id = ? " + . "AND at.attrib_type_id = sra.attrib_type_id " + . "AND at.code = 'toplevel'" ); + + $sth->bind_param( 1, $id, SQL_INTEGER ); $sth->execute(); - - my $code = undef; - $sth->bind_columns(\$code); - while($sth->fetch){ + + my $code; + $sth->bind_columns( \$code ); + + while ( $sth->fetch ) { $sth->finish; return 1; } + $sth->finish; return 0; } @@ -797,27 +849,32 @@ sub fetch_by_band { =cut sub fetch_by_chr_band { - my ($self,$chr,$band) = @_; + my ( $self, $chr, $band ) = @_; - my $chr_slice = $self->fetch_by_region('toplevel', $chr); + my $chr_slice = $self->fetch_by_region( 'toplevel', $chr ); my $seq_region_id = $self->get_seq_region_id($chr_slice); - my $sth = $self->dbc->prepare - ("select min(k.seq_region_start), max(k.seq_region_end) " . - "from karyotype as k " . - "where k.seq_region_id = ? and k.band like ?"); + my $sth = + $self->prepare( 'SELECT MIN(k.seq_region_start), ' + . 'MAX(k.seq_region_end) ' + . 'FROM karyotype k ' + . 'WHERE k.seq_region_id = ? ' + . 'AND k.band LIKE ?' ); - $sth->bind_param(1,$seq_region_id,SQL_INTEGER); - $sth->bind_param(2,"$band%",SQL_VARCHAR); + $sth->bind_param( 1, $seq_region_id, SQL_INTEGER ); + $sth->bind_param( 2, "$band%", SQL_VARCHAR ); $sth->execute(); - my ( $slice_start, $slice_end) = $sth->fetchrow_array; - if(defined $slice_start) { - return $self->fetch_by_region('toplevel',$chr,$slice_start,$slice_end); + my ( $slice_start, $slice_end ) = $sth->fetchrow_array; + + if ( defined $slice_start ) { + return + $self->fetch_by_region( 'toplevel', $chr, + $slice_start, $slice_end ); } throw("Band not recognised in database"); -} +} ## end sub fetch_by_chr_band @@ -1486,13 +1543,9 @@ sub store_assembly{ =cut - - sub prepare { - my $self = shift; - my $sql = shift; - - return $self->db()->dnadb()->dbc->prepare( $sql ); + my ( $self, $sql ) = @_; + return $self->db()->dnadb()->dbc->prepare($sql); } sub _build_exception_cache { @@ -1500,27 +1553,30 @@ sub _build_exception_cache { # build up a cache of the entire assembly exception table # it should be small anyway - my $sth = $self->prepare - ("SELECT seq_region_id, seq_region_start, seq_region_end, - exc_type, exc_seq_region_id, exc_seq_region_start, - exc_seq_region_end - FROM assembly_exception"); - + my $sth = + $self->prepare( 'SELECT ae.seq_region_id, ae.seq_region_start, ' + . 'ae.seq_region_end, ae.exc_type, ae.exc_seq_region_id, ' + . 'ae.exc_seq_region_start, ae.exc_seq_region_end' + . 'FROM assembly_exception ae, ' + . 'seq_region sr, coord_system cs ' + . 'WHERE sr.seq_region_id = ar.seq_region_id ' + . 'AND sr.coord_system_id = cs.coord_system_id ' + . 'AND cs.species_id = ?' ); + + $sth->bind_param( 1, $self->species_id(), SQL_INTEGER ); $sth->execute(); my %hash; $self->{'asm_exc_cache'} = \%hash; my $row; - while($row = $sth->fetchrow_arrayref()) { + while ( $row = $sth->fetchrow_arrayref() ) { my @result = @$row; - $hash{$result[0]} ||= []; - push(@{$hash{$result[0]}}, \@result); + $hash{ $result[0] } ||= []; + push( @{ $hash{ $result[0] } }, \@result ); } $sth->finish(); - - return; -} +} ## end sub _build_exception_cache =head2 cache_toplevel_seq_mappings @@ -1544,9 +1600,11 @@ sub cache_toplevel_seq_mappings { SELECT name FROM coord_system WHERE attrib like "%sequence_level%" + AND species_id = ? SSQL my $sth = $self->prepare($sql); + $sth->bin_param( 1, $self->species_id(), SQL_INTEGER ); $sth->execute(); my $sequence_level = $sth->fetchrow_array(); @@ -1570,9 +1628,11 @@ SSQL AND sra.attrib_type_id = at.attrib_type_id AND at.code = "toplevel" AND cs.coord_system_id = sr.coord_system_id; + AND cs.species_id = ? LSQL $sth = $self->prepare($sql); + $sth->bind_param( 1, $self->species_id(), SQL_INTEGER ); $sth->execute(); while ( my $csn = $sth->fetchrow_array() ) { @@ -1681,12 +1741,17 @@ sub fetch_by_clone_accession{ #this unfortunately needs a version on the end to work if(! ($name =~ /\./)) { - my $sth = $self->prepare("SELECT sr.name " . - "FROM seq_region sr, coord_system cs " . - "WHERE cs.name = 'clone' " . - "AND cs.coord_system_id = sr.coord_system_id ". - "AND sr.name LIKE '$name.%'"); + my $sth = + $self->prepare( "SELECT sr.name " + . "FROM seq_region sr, coord_system cs " + . "WHERE cs.name = 'clone' " + . "AND cs.coord_system_id = sr.coord_system_id " + . "AND sr.name LIKE '$name.%'" + . "AND cs.species_id = ?" ); + + $sth->bind_param( 1, $self->species_id(), SQL_INTEGER ); $sth->execute(); + if(!$sth->rows()) { $sth->finish(); throw("Clone $name not found in database"); diff --git a/modules/Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm index 22d7fbc52001bf52a56af5b16c2b5f0ac4620cb1..6339dd341b811ef7772d20a92642cf153da36281 100644 --- a/modules/Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm @@ -718,8 +718,10 @@ sub store { } if(defined($dxref_id)) { - my $sth = $self->prepare( "update transcript set display_xref_id = ?". - " where transcript_id = ?"); + my $sth = + $self->prepare( "UPDATE transcript " + . "SET display_xref_id = ? " + . "WHERE transcript_id = ?" ); $sth->bind_param(1, $dxref_id, SQL_INTEGER); $sth->bind_param(2, $transc_dbID, SQL_INTEGER); $sth->execute(); @@ -738,9 +740,9 @@ sub store { # # Link transcript to exons in exon_transcript table # - my $etst = - $self->prepare("insert into exon_transcript (exon_id,transcript_id,rank)" - ." values (?,?,?)"); + my $etst = $self->prepare( + "INSERT INTO exon_transcript (exon_id,transcript_id,rank) " + . "VALUES (?,?,?)" ); my $rank = 1; foreach my $exon ( @{$transcript->get_all_Exons} ) { $etst->bind_param(1, $exon->dbID, SQL_INTEGER); diff --git a/modules/Bio/EnsEMBL/DBSQL/UnconventionalTranscriptAssociationAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/UnconventionalTranscriptAssociationAdaptor.pm index ad7b6608f04078e139ab81f962a7156851f2521b..f2ed230b8775ee44d80b03cfaf2251fcb0d0a835 100644 --- a/modules/Bio/EnsEMBL/DBSQL/UnconventionalTranscriptAssociationAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/UnconventionalTranscriptAssociationAdaptor.pm @@ -51,8 +51,11 @@ use vars qw(@ISA); Arg [1] : String type the type of associations to obtain Example : $utas = $utaa->fetch_all_by_type('antisense'); - Description: Obtains all unconventional transcript associations that have - a particular interaction type. + Description: Obtains all unconventional transcript associations that + have a particular interaction type. + NOTE: In a multi-species database, this method will + return all the entries matching the search criteria, not + just the ones associated with the current species. Returntype : listREF of Bio::EnsEMBL::UnconventionalTranscriptAssociations Exceptions : none Caller : general diff --git a/modules/Bio/EnsEMBL/Registry.pm b/modules/Bio/EnsEMBL/Registry.pm index fbe7e27b294d5ced89a46f098f20c0c289de45ef..6b39dab70affe59f6600fab6eb1014b8ade9f997 100644 --- a/modules/Bio/EnsEMBL/Registry.pm +++ b/modules/Bio/EnsEMBL/Registry.pm @@ -601,9 +601,9 @@ sub remove_DBAdaptor{ delete $registry_register{$species}{$group}; - #This will remove the DBAdaptor and all the other adaptors + # This will remove the DBAdaptor and all the other adaptors - #Now remove if from the _DBA array + # Now remove if from the _DBA array my $index; @@ -618,7 +618,7 @@ sub remove_DBAdaptor{ } } - #Now remove from _DBA cache + # Now remove from _DBA cache splice(@{$registry_register{'_DBA'}}, $index, 1); return; @@ -644,17 +644,17 @@ sub remove_DBAdaptor{ sub reset_DBAdaptor{ my ($self, $species, $group, $dbname, $host, $port, $user, $pass) = @_; - #Check mandatory params + # Check mandatory params if(! (defined $species && defined $group && defined $dbname)){ throw('Must provide at least a species, group and dbname parmeter to redefine a DB in the registry'); } - #validate species here + # Validate species here my $alias = $self->get_alias($species); throw("Could not find registry alias for species:\t$species") if(! defined $alias); - #Get all current defaults if not defined + # Get all current defaults if not defined my $current_db = $self->get_DBAdaptor($alias, $group); if(! defined $current_db){ @@ -673,7 +673,7 @@ sub reset_DBAdaptor{ my @adaptors = @{$self->get_all_adaptors}; - #ConfigRegistry should automatically add this to the Registry + # ConfigRegistry should automatically add this to the Registry my $db = $class->new( -user => $user, -host => $host, @@ -763,51 +763,45 @@ sub get_DNAAdaptor{ =cut -sub add_adaptor{ - my ($class,$species,$group,$type,$adap, $reset)= @_; +sub add_adaptor { + my ( $class, $species, $group, $type, $adap, $reset ) = @_; $species = $class->get_alias($species); -# -# Becouse the adaptors are not stored initially only there class paths when -# the adaptors are obtained we need to store these instead. -# It is not necessarily an error if the registry is overwritten without -# the reset set but it is an indication that we are overwriting a database -# which should be a warning for now -# + # Since the adaptors are not stored initially, only their class paths + # when the adaptors are obtained, we need to store these instead. It + # is not necessarily an error if the registry is overwritten without + # the reset set but it is an indication that we are overwriting a + # database which should be a warning for now - if(defined($reset)){ # JUST REST THE HASH VALUE NO MORE PROCESSING NEEDED - $registry_register{$species}{lc($group)}{lc($type)} = $adap; + if ( defined($reset) ) + { # JUST REST THE HASH VALUE NO MORE PROCESSING NEEDED + $registry_register{$species}{ lc($group) }{ lc($type) } = $adap; return; } - if(defined($registry_register{$species}{lc($group)}{lc($type)})){ - #print STDERR ("Overwriting Adaptor in Registry for $species $group $type\n"); - $registry_register{$species}{lc($group)}{lc($type)} = $adap; - return; + if ( + defined( $registry_register{$species}{ lc($group) }{ lc($type) } ) ) + { + # print STDERR ( + # "Overwriting Adaptor in Registry for $species $group $type\n"); + $registry_register{$species}{ lc($group) }{ lc($type) } = $adap; + return; } - $registry_register{$species}{lc($group)}{lc($type)} = $adap; + $registry_register{$species}{ lc($group) }{ lc($type) } = $adap; - if(!defined ($registry_register{$species}{'list'})){ - my @list =(); - push(@list,$type); - $registry_register{$species}{'list'}= \@list; - } - else{ - push(@{$registry_register{$species}{'list'}},$type); + if ( !defined( $registry_register{$species}{'list'} ) ) { + $registry_register{$species}{'list'} = [$type]; + } else { + push( @{ $registry_register{$species}{'list'} }, $type ); } - - - if(!defined ($registry_register{lc($type)}{$species})){ - my @list =(); - push(@list,$adap); - $registry_register{lc($type)}{$species}= \@list; - } - else{ - push(@{$registry_register{lc($type)}{$species}},$adap); + if ( !defined( $registry_register{ lc($type) }{$species} ) ) { + $registry_register{ lc($type) }{$species} = [$type]; + } else { + push( @{ $registry_register{ lc($type) }{$species} }, $adap ); } -} +} ## end sub add_adaptor =head2 get_adaptor @@ -835,11 +829,9 @@ sub get_adaptor{ $group = $dnadb_group; } - my $ret = $registry_register{$species}{lc($group)}{lc($type)}; - if(!defined($ret)){ - - return undef; - } + my $ret = $registry_register{$species}{ lc($group) }{ lc($type) }; + if ( !defined($ret) ) { return undef; } + if(!ref($ret)){ # not instantiated yet my $dba = $registry_register{$species}{lc($group)}{'_DB'}; my $module = $ret; @@ -883,7 +875,7 @@ sub get_all_adaptors{ my (%species_hash, %group_hash, %type_hash); - if(@args == 1){ #old species only one parameter + if(@args == 1){ # Old species only one parameter warn("-SPECIES argument should now be used to get species adaptors"); $species = $args[0]; } @@ -1008,7 +1000,7 @@ sub alias_exists{ sub set_disconnect_when_inactive{ foreach my $dba ( @{get_all_DBAdaptors()}){ my $dbc = $dba->dbc; - #disconnect if connected + # Disconnect if connected $dbc->disconnect_if_idle() if $dbc->connected(); $dbc->disconnect_when_inactive(1); } @@ -1029,7 +1021,7 @@ sub disconnect_all { foreach my $dba ( @{get_all_DBAdaptors()||[]} ){ my $dbc = $dba->dbc; next unless $dbc; - #disconnect if connected + # Disconnect if connected $dbc->disconnect_if_idle() if $dbc->connected(); } } @@ -1159,148 +1151,211 @@ sub load_registry_from_url { =cut sub load_registry_from_db { - my($self, @args) = @_; - my ($host, $port, $user, $pass, $verbose, $db_version, $wait_timeout) = - rearrange([qw(HOST PORT USER PASS VERBOSE DB_VERSION WAIT_TIMEOUT )], @args); + my ( $self, @args ) = @_; + + my ( $host, $port, $user, $pass, $verbose, $db_version, + $wait_timeout ) = + rearrange( + [qw(HOST PORT USER PASS VERBOSE DB_VERSION WAIT_TIMEOUT )], + @args ); my $go_version = 0; my $compara_version =0; my $ancestral_version =0; $user ||= "ensro"; - if(!defined($port)){ - $port = 3306; - if($host eq "ensembldb.ensembl.org"){ - if( !defined($db_version) or $db_version >= 48){ - $port = 5306; + if ( !defined($port) ) { + $port = 3306; + if ( $host eq "ensembldb.ensembl.org" ) { + if ( !defined($db_version) or $db_version >= 48 ) { + $port = 5306; } } } - $wait_timeout ||= 0; - my $db = DBI->connect( "DBI:mysql:host=$host;port=$port" , $user, $pass ); - my $res = $db->selectall_arrayref( "show databases" ); - my @dbnames = map {$_->[0] } @$res; + my $dbh = + DBI->connect( "DBI:mysql:host=$host;port=$port", $user, $pass ); + + my $res = $dbh->selectall_arrayref("show databases"); + my @dbnames = map { $_->[0] } @$res; my %temp; my $software_version = $self->software_version(); - if (defined($db_version)) { + + if ( defined($db_version) ) { $software_version = $db_version; } - print "Will only load release $software_version databases\n" if ($verbose); - for my $db (@dbnames){ - if($db =~ /^([a-z]+_[a-z]+_[a-z]+)_(\d+)_(\d+[a-z]*)/){ - if($2 eq $software_version){ - $temp{$1} = $2."_".$3; + + if ($verbose) { + printf( "Will only load v%d databases\n", $software_version ); + } + + for my $db (@dbnames) { + if ( $db =~ /^(\w+)_collection_core_(\d+)/ ) { + if ( $2 eq $software_version ) { + $temp{$1} = 'collection_core_' . $2; } - } - elsif($db =~ /^(.+)_(userdata)$/){ - $temp{$1} = $2; - } - elsif($db =~ /^ensembl_compara_(\d+)/){ - if($1 eq $software_version){ - $compara_version = $1; + } elsif ( $db =~ /^([a-z]+_[a-z]+_[a-z]+)_(\d+)_(\d+[a-z]*)/ ) { + if ( $2 eq $software_version ) { + $temp{$1} = $2 . "_" . $3; } - } - elsif($db =~ /^ensembl_ancestral_(\d+)/){ - if($1 eq $software_version){ - $ancestral_version = $1; + } elsif ( $db =~ /^(.+)_(userdata)$/ ) { + $temp{$1} = $2; + } elsif ( $db =~ /^ensembl_compara_(\d+)/ ) { + if ( $1 eq $software_version ) { + $compara_version = $1; } - } - elsif($db =~ /^ensembl_go_(\d+)/){ - if($1 eq $software_version){ - $go_version = $1; + } elsif ( $db =~ /^ensembl_ancestral_(\d+)/ ) { + if ( $1 eq $software_version ) { + $ancestral_version = $1; + } + } elsif ( $db =~ /^ensembl_go_(\d+)/ ) { + if ( $1 eq $software_version ) { + $go_version = $1; } } + } ## end for my $db (@dbnames) + + @dbnames = (); + + foreach my $key ( keys %temp ) { + push @dbnames, $key . "_" . $temp{$key}; } - - @dbnames =(); - - foreach my $key ( keys %temp){ - push @dbnames, $key."_".$temp{$key}; - } - # register core databases - + + # Register Core databases + my @core_dbs = grep { /^[a-z]+_[a-z]+_core_\d+_/ } @dbnames; - - for my $coredb ( @core_dbs ) { - my ($species, $num ) = ( $coredb =~ /(^[a-z]+_[a-z]+)_core_(\d+)/ ); - my $dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new - ( -group => "core", - -species => $species, - -host => $host, - -user => $user, - -pass => $pass, - -port => $port, - -dbname => $coredb, - -wait_timeout => $wait_timeout + + foreach my $coredb (@core_dbs) { + my ( $species, $num ) = + ( $coredb =~ /(^[a-z]+_[a-z]+)_core_(\d+)/ ); + + my $dba = + Bio::EnsEMBL::DBSQL::DBAdaptor->new(-group => "core", + -species => $species, + -host => $host, + -user => $user, + -pass => $pass, + -port => $port, + -dbname => $coredb, + -wait_timeout => $wait_timeout ); - (my $sp = $species ) =~ s/_/ /g; + + ( my $sp = $species ) =~ s/_/ /g; $self->add_alias( $species, $sp ); - print $coredb." loaded\n" if ($verbose); + + if ($verbose) { + printf( "Species '%s' loaded from database '%s'\n", + $species, $coredb ); + } } + # Register multi-species databases + my @multi_dbs = grep { /_collection_core_\d+$/ } @dbnames; + + foreach my $multidb (@multi_dbs) { + my $sth = + $dbh->prepare( + sprintf( 'SELECT species_id, meta_value FROM %s.species_meta ', + $dbh->quote_identifier($multidb) ) + . "WHERE meta_key = 'species.db_name'" ); + $sth->execute(); + + my ( $species_id, $species ); + $sth->bind_columns( \( $species_id, $species ) ); + + while ( $sth->fetch() ) { + my $dba = + Bio::EnsEMBL::DBSQL::DBAdaptor->new( + -group => "core", + -species => $species, + -species_id => $species_id, + -host => $host, + -user => $user, + -pass => $pass, + -port => $port, + -dbname => $multidb, + -wait_timeout => $wait_timeout + ); + + ( my $sp = $species ) =~ s/_/ /g; + $self->add_alias( $species, $sp ); + + if ($verbose) { + printf( "Species '%s' (id:%d) loaded from database '%s'\n", + $species, $species_id, $multidb ); + } + } + } ## end foreach my $multidb (@multi_dbs) + # register cdna databases - + my @cdna_dbs = grep { /^[a-z]+_[a-z]+_cdna_\d+_/ } @dbnames; - - for my $cdnadb ( @cdna_dbs ) { - my ($species, $num ) = ( $cdnadb =~ /(^[a-z]+_[a-z]+)_cdna_(\d+)/ ); - my $dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new - ( -group => "cdna", - -species => $species, - -host => $host, - -user => $user, - -pass => $pass, - -port => $port, - -dbname => $cdnadb, - -wait_timeout => $wait_timeout + + for my $cdnadb (@cdna_dbs) { + my ( $species, $num ) = + ( $cdnadb =~ /(^[a-z]+_[a-z]+)_cdna_(\d+)/ ); + my $dba = + Bio::EnsEMBL::DBSQL::DBAdaptor->new(-group => "cdna", + -species => $species, + -host => $host, + -user => $user, + -pass => $pass, + -port => $port, + -dbname => $cdnadb, + -wait_timeout => $wait_timeout ); - (my $sp = $species ) =~ s/_/ /g; + ( my $sp = $species ) =~ s/_/ /g; $self->add_alias( $species, $sp ); - print $cdnadb." loaded\n" if ($verbose); + print $cdnadb. " loaded\n" if ($verbose); } my @vega_dbs = grep { /^[a-z]+_[a-z]+_vega_\d+_/ } @dbnames; - - for my $vegadb ( @vega_dbs ) { - my ($species, $num ) = ( $vegadb =~ /(^[a-z]+_[a-z]+)_vega_(\d+)/ ); - my $dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new - ( -group => "vega", - -species => $species, - -host => $host, - -user => $user, - -pass => $pass, - -port => $port, - -wait_timeout => $wait_timeout, - -dbname => $vegadb - ); - (my $sp = $species ) =~ s/_/ /g; + + for my $vegadb (@vega_dbs) { + my ( $species, $num ) = + ( $vegadb =~ /(^[a-z]+_[a-z]+)_vega_(\d+)/ ); + my $dba = + Bio::EnsEMBL::DBSQL::DBAdaptor->new( + -group => "vega", + -species => $species, + -host => $host, + -user => $user, + -pass => $pass, + -port => $port, + -wait_timeout => $wait_timeout, + -dbname => $vegadb ); + ( my $sp = $species ) =~ s/_/ /g; $self->add_alias( $species, $sp ); - print $vegadb." loaded\n" if ($verbose); + print $vegadb. " loaded\n" if ($verbose); } - + + # Otherfeatures + my @other_dbs = grep { /^[a-z]+_[a-z]+_otherfeatures_\d+_/ } @dbnames; - - for my $other_db ( @other_dbs ) { - my ($species, $num) = ( $other_db =~ /(^[a-z]+_[a-z]+)_otherfeatures_(\d+)/ ); - my $dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new - ( -group => "otherfeatures", - -species => $species, - -host => $host, - -user => $user, - -pass => $pass, - -port => $port, - -wait_timeout => $wait_timeout, - -dbname => $other_db - ); - (my $sp = $species ) =~ s/_/ /g; - $self->add_alias( $species, $sp ); - print $other_db." loaded\n" if ($verbose); + + for my $other_db (@other_dbs) { + my ( $species, $num ) = + ( $other_db =~ /(^[a-z]+_[a-z]+)_otherfeatures_(\d+)/ ); + my $dba = + Bio::EnsEMBL::DBSQL::DBAdaptor->new( + -group => "otherfeatures", + -species => $species, + -host => $host, + -user => $user, + -pass => $pass, + -port => $port, + -wait_timeout => $wait_timeout, + -dbname => $other_db ); + ( my $sp = $species ) =~ s/_/ /g; + $self->add_alias( $species, $sp ); + print $other_db. " loaded\n" if ($verbose); } - + + # User upload DBs + my @userupload_dbs = grep { /_userdata$/ } @dbnames; for my $userupload_db ( @userupload_dbs ) { my ($species) = ( $userupload_db =~ /(^.+)_userdata$/ ); @@ -1319,282 +1374,385 @@ sub load_registry_from_db { print $userupload_db." loaded\n" if ($verbose); } - + # Variation + eval "require Bio::EnsEMBL::Variation::DBSQL::DBAdaptor"; - if($@) { - #ignore variations as code required not there for this - print "Bio::EnsEMBL::Variation::DBSQL::DBAdaptor module not found so variation databases will be ignored if found\n" if ($verbose); - } - else{ - my @variation_dbs = grep { /^[a-z]+_[a-z]+_variation_\d+_/ } @dbnames; - - for my $variation_db ( @variation_dbs ) { - my ($species, $num ) = ( $variation_db =~ /(^[a-z]+_[a-z]+)_variation_(\d+)/ ); - my $dba = Bio::EnsEMBL::Variation::DBSQL::DBAdaptor->new - ( -group => "variation", - -species => $species, - -host => $host, - -user => $user, - -pass => $pass, - -port => $port, - -wait_timeout => $wait_timeout, - -dbname => $variation_db - ); - print $variation_db." loaded\n" if ($verbose); + if ($@) { + # Ignore variations as code required not there for this + print +"Bio::EnsEMBL::Variation::DBSQL::DBAdaptor module not found so variation databases will be ignored if found\n" + if ($verbose); + } else { + my @variation_dbs = + grep { /^[a-z]+_[a-z]+_variation_\d+_/ } @dbnames; + + for my $variation_db (@variation_dbs) { + my ( $species, $num ) = + ( $variation_db =~ /(^[a-z]+_[a-z]+)_variation_(\d+)/ ); + my $dba = + Bio::EnsEMBL::Variation::DBSQL::DBAdaptor->new( + -group => "variation", + -species => $species, + -host => $host, + -user => $user, + -pass => $pass, + -port => $port, + -wait_timeout => $wait_timeout, + -dbname => $variation_db + ); + print $variation_db. " loaded\n" if ($verbose); } } eval "require Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor"; - if($@) { + if ($@) { #ignore funcgen DBs as code required not there for this - print "Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor module not found so functional genomics databases will be ignored if found\n" if ($verbose); - } - else{ + print +"Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor module not found so functional genomics databases will be ignored if found\n" + if ($verbose); + } else { my @funcgen_dbs = grep { /^[a-z]+_[a-z]+_funcgen_\d+_/ } @dbnames; - - for my $funcgen_db ( @funcgen_dbs ) { - my ($species, $num ) = ( $funcgen_db =~ /(^[a-z]+_[a-z]+)_funcgen_(\d+)/ ); - my $dba = Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor->new - ( -group => "funcgen", - -species => $species, - -host => $host, - -user => $user, - -pass => $pass, - -port => $port, - -wait_timeout => $wait_timeout, - -dbname => $funcgen_db - ); - print $funcgen_db." loaded\n" if ($verbose); - } - } - - #Compara - if($compara_version){ - eval "require Bio::EnsEMBL::Compara::DBSQL::DBAdaptor"; - if($@) { - #ignore compara as code required not there for this - print "Bio::EnsEMBL::Compara::DBSQL::DBAdaptor not found so compara database ensembl_compara_$compara_version will be ignored\n" if ($verbose); + for my $funcgen_db (@funcgen_dbs) { + my ( $species, $num ) = + ( $funcgen_db =~ /(^[a-z]+_[a-z]+)_funcgen_(\d+)/ ); + my $dba = + Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor->new( + -group => "funcgen", + -species => $species, + -host => $host, + -user => $user, + -pass => $pass, + -port => $port, + -wait_timeout => $wait_timeout, + -dbname => $funcgen_db ); + print $funcgen_db. " loaded\n" if ($verbose); } - else{ - my $compara_db = "ensembl_compara_".$compara_version; - - my $dba = Bio::EnsEMBL::Compara::DBSQL::DBAdaptor->new - ( -group => "compara", - -species => "multi", - -host => $host, - -user => $user, - -pass => $pass, - -port => $port, - -wait_timeout => $wait_timeout, - -dbname => $compara_db - ); - print $compara_db." loaded\n" if ($verbose); - } - } - else{ - print "No Compara database found" if ($verbose); } + # Compara - #Ancestral sequences - if($ancestral_version){ - my $ancestral_db = "ensembl_ancestral_".$ancestral_version; - my $dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new - ( -group => "core", - -species => "Ancestral sequences", - -host => $host, - -user => $user, - -pass => $pass, - -port => $port, - -wait_timeout => $wait_timeout, - -dbname => $ancestral_db - ); - print $ancestral_db." loaded\n" if ($verbose); - } - else{ + if ($compara_version) { + eval "require Bio::EnsEMBL::Compara::DBSQL::DBAdaptor"; + if ($@) { + # Ignore Compara as code required not there for this + if ($verbose) { + printf( "Bio::EnsEMBL::Compara::DBSQL::DBAdaptor " + . "not found so compara database " + . "ensembl_compara_%d will be ignored\n", + $compara_version ); + } + } else { + my $compara_db = "ensembl_compara_" . $compara_version; + + my $dba = + Bio::EnsEMBL::Compara::DBSQL::DBAdaptor->new( + -group => "compara", + -species => "multi", + -host => $host, + -user => $user, + -pass => $pass, + -port => $port, + -wait_timeout => $wait_timeout, + -dbname => $compara_db ); + if ($verbose) { + printf( "%s loaded\n", $compara_db ); + } + } + } elsif ($verbose) { + print("No Compara database found\n"); + } + + # Ancestral sequences + + if ($ancestral_version) { + my $ancestral_db = "ensembl_ancestral_" . $ancestral_version; + my $dba = + Bio::EnsEMBL::DBSQL::DBAdaptor->new( + -group => "core", + -species => "Ancestral sequences", + -host => $host, + -user => $user, + -pass => $pass, + -port => $port, + -wait_timeout => $wait_timeout, + -dbname => $ancestral_db ); + print $ancestral_db. " loaded\n" if ($verbose); + } else { print "No Ancestral database found" if ($verbose); } + # GO - #GO - if($go_version){ + if ($go_version) { eval "require Bio::EnsEMBL::ExternalData::GO::GOAdaptor"; - if($@) { + if ($@) { #ignore go as code required not there for this -# print $@; - print "GO software not installed so go database ensemb_go_$go_version will be ignored\n" if ($verbose); - } - else{ - my $go_db = "ensembl_go_".$go_version; - my $dba = Bio::EnsEMBL::ExternalData::GO::GOAdaptor->new - ( -group => "go", - -species => "multi", - -host => $host, - -user => $user, - -pass => $pass, - -port => $port, - -dbname => $go_db - ); - print $go_db." loaded\n" if ($verbose); + # print $@; + if ($verbose) { + print "GO software not installed " + . "so GO database ensemb_go_$go_version will be ignored\n"; + } + } else { + my $go_db = "ensembl_go_" . $go_version; + my $dba = + Bio::EnsEMBL::ExternalData::GO::GOAdaptor->new( + -group => "go", + -species => "multi", + -host => $host, + -user => $user, + -pass => $pass, + -port => $port, + -dbname => $go_db + ); + print $go_db. " loaded\n" if ($verbose); } - } - else{ - print "No go database found" if ($verbose); + } elsif ($verbose) { + print("No GO database found\n"); } - #hard coded aliases for the different species + # Hard coded aliases for the different species - my @aliases = ('chimp','PanTro1', 'Pan', 'P_troglodytes'); - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Pan_troglodytes", - -alias => \@aliases); - - @aliases = ('elegans','worm'); - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Caenorhabditis_elegans", - -alias => \@aliases); - - @aliases = ('tetraodon'); - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Tetraodon_nigroviridis", - -alias => \@aliases); - - @aliases = ('H_Sapiens', 'homo sapiens', 'Homo_Sapiens', 'Homo', 'human', 'Hg17','ensHS', '9606'); - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Homo_sapiens", - -alias => \@aliases); - - @aliases = ('M_Musculus', 'mus musculus', 'Mus_Musculus', 'Mus', 'mouse','Mm5','ensMM','10090'); - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Mus_musculus", - -alias => \@aliases); - - @aliases = ('R_Norvegicus', 'rattus norvegicus', 'Rattus_Norvegicus', 'Rattus', 'rat', 'Rn3', '10116'); - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Rattus_norvegicus", - -alias => \@aliases); - - @aliases = ('T_Rubripes', 'Fugu', 'takifugu'); - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Takifugu_rubripes", - -alias => \@aliases); - - @aliases = ('G_Gallus', 'gallus gallus', 'Chicken', 'GalGal2'); - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Gallus_Gallus", - -alias => \@aliases); - - @aliases = ('D_Rerio', 'danio rerio', 'Danio_Rerio', 'Danio', 'zebrafish', 'zfish'); - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Danio_rerio", - -alias => \@aliases); - - @aliases = ('X_Tropicalis', 'xenopus tropicalis','Xenopus_tropicalis', 'Xenopus'); - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Xenopus_tropicalis", - -alias => \@aliases); - - @aliases = ('A_Gambiae', 'Anopheles Gambiae','Anopheles_gambiae', 'Anopheles','mosquito'); - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Anopheles_gambiae", - -alias => \@aliases); - - - @aliases = ('D_Melanogaster', 'drosophila melanogaster', 'Drosophila_melanogaster', 'drosophila', 'fly'); - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Drosophila_melanogaster", - -alias => \@aliases); - - @aliases = ('S_Cerevisiae', 'Saccharomyces Cerevisiae', - 'Saccharomyces_cerevisiae', 'Saccharomyces', 'yeast'); - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Saccharomyces_cerevisiae", - -alias => \@aliases); - - @aliases = ('C_Familiaris', 'Canis Familiaris', - 'Canis_familiaris', 'Canis', 'dog'); - - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Canis_familiaris", - -alias => \@aliases); - - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Ciona_intestinalis", - -alias => ['ciona','Ciona intestinalis']); - - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Bos_taurus", - -alias => ['cow','bos_taurus']); - - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Macaca_mulatta", - -alias => ['rhesus','rhesus_monkey','macaque','macaca mulatta']); - - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Otolemur_garnettii", - -alias => ['bushbaby','galago','Otolemur garnettii']); - - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Oryctolagus_cuniculus", - -alias => ['rabbit','Oryctolagus cuniculus']); - - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Felis_catus", - -alias => ['cat','felis catus']); + my @aliases = ( 'chimp', 'PanTro1', 'Pan', 'P_troglodytes' ); + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Pan_troglodytes", + -alias => \@aliases ); - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Sus_scrofa", - -alias => ['pig','sus scrofa']); + @aliases = ( 'elegans', 'worm' ); + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Caenorhabditis_elegans", + -alias => \@aliases ); - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Sorex_araneus", - -alias => ['shrew','ground_shrew','european_shrew','Sorex araneus']); - - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Erinaceus_europaeus", - -alias => ['western_european_hedgehog','Erinaceus europaeus']); - - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Myotis_lucifugus", - -alias => ['microbat','little_brown_bat']); - - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Dasypus_novemcinctus", - -alias => ['armadillo','arma','Dasypus novemcinctu']); - - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Loxodonta_africana", - -alias => ['african_elephant','elephant','Loxodonta africana']); - - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Echinops_telfairi", - -alias => ['tenrec','madagascar_hedgehog','lesser_hedgehog','Echinops telfairi']); - - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Monodelphis_domestica", - -alias => ['opossum','Monodelphis domestica']); - - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Ornithorhynchus_anatinus", - -alias => ['platypus','Ornithorhynchus anatinus']); - - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Gasterosteus_aculeatus", - -alias => ['stickleback','Gasterosteus aculeatus']); - - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Oryzias_latipes", - -alias => ['medaka','Oryzias latipes']); - - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Cavia_porcellus", - -alias => ['guinea_pig','"Cavia porcellus']); - - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Aedes_aegypti", - -alias => ['aedes','Aedes aegypti']); - - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Spermophilus_tridecemlineatus", - -alias => ['squirrel','Spermophilus tridecemlineatus']); - - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Tupaia_belangeri", - -alias => ['tree_shrew','Tupaia belangeri']); - - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Culex_pipiens", - -alias => ['culex','Culex Pipiens']); - - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Ochotona_princeps", - -alias => ['pika','Ochotona princeps']); - - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Anolis_carolinensis", - -alias => ['anolis','anolis_lizard','Anolis carolinensis']); - - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Microcebus_murinus", - -alias => ['mouse_lemur','Microcebus murinus']); - - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Pongo_pygmaeus", - -alias => ['orang','orang_utan','orangutan','Pongo pygmaeus']); - - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "Equus_caballus", - -alias => ['horse', 'Equuscaballus']); + @aliases = ('tetraodon'); + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Tetraodon_nigroviridis", + -alias => \@aliases ); + + @aliases = ( 'H_Sapiens', 'homo sapiens', + 'Homo_Sapiens', 'Homo', + 'human', 'Hg17', + 'ensHS', '9606' ); + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Homo_sapiens", + -alias => \@aliases ); + + @aliases = ( 'M_Musculus', 'mus musculus', + 'Mus_Musculus', 'Mus', + 'mouse', 'Mm5', + 'ensMM', '10090' ); + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Mus_musculus", + -alias => \@aliases ); + + @aliases = ( 'R_Norvegicus', 'rattus norvegicus', + 'Rattus_Norvegicus', 'Rattus', + 'rat', 'Rn3', + '10116' ); + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Rattus_norvegicus", + -alias => \@aliases ); + + @aliases = ( 'T_Rubripes', 'Fugu', 'takifugu' ); + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Takifugu_rubripes", + -alias => \@aliases ); + + @aliases = ( 'G_Gallus', 'gallus gallus', 'Chicken', 'GalGal2' ); + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Gallus_Gallus", + -alias => \@aliases ); + + @aliases = ( 'D_Rerio', 'danio rerio', + 'Danio_Rerio', 'Danio', + 'zebrafish', 'zfish' ); + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Danio_rerio", + -alias => \@aliases ); + + @aliases = ( 'X_Tropicalis', 'xenopus tropicalis', + 'Xenopus_tropicalis', 'Xenopus' ); + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Xenopus_tropicalis", + -alias => \@aliases ); + + @aliases = ( 'A_Gambiae', 'Anopheles Gambiae', + 'Anopheles_gambiae', 'Anopheles', + 'mosquito' ); + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Anopheles_gambiae", + -alias => \@aliases ); + + @aliases = ( 'D_Melanogaster', 'drosophila melanogaster', + 'Drosophila_melanogaster', 'drosophila', + 'fly' ); + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Drosophila_melanogaster", + -alias => \@aliases ); + + @aliases = ( 'S_Cerevisiae', 'Saccharomyces Cerevisiae', + 'Saccharomyces_cerevisiae', 'Saccharomyces', + 'yeast' ); + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Saccharomyces_cerevisiae", + -alias => \@aliases ); + + @aliases = ( 'C_Familiaris', 'Canis Familiaris', + 'Canis_familiaris', 'Canis', + 'dog' ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Canis_familiaris", + -alias => \@aliases ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Ciona_intestinalis", + -alias => [ 'ciona', 'Ciona intestinalis' ] + ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Bos_taurus", + -alias => [ 'cow', 'bos_taurus' ] + ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Macaca_mulatta", + -alias => [ 'rhesus', 'rhesus_monkey', 'macaque', 'macaca mulatta' ] + ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Otolemur_garnettii", + -alias => [ 'bushbaby', 'galago', 'Otolemur garnettii' ] + ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Oryctolagus_cuniculus", + -alias => [ 'rabbit', 'Oryctolagus cuniculus' ] + ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Felis_catus", + -alias => [ 'cat', 'felis catus' ] + ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Sus_scrofa", + -alias => [ 'pig', 'sus scrofa' ] + ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Sorex_araneus", + -alias => + [ 'shrew', 'ground_shrew', 'european_shrew', 'Sorex araneus' ] + ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Erinaceus_europaeus", + -alias => [ 'western_european_hedgehog', 'Erinaceus europaeus' ] + ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Myotis_lucifugus", + -alias => [ 'microbat', 'little_brown_bat' ] + ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Dasypus_novemcinctus", + -alias => [ 'armadillo', 'arma', 'Dasypus novemcinctu' ] + ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Loxodonta_africana", + -alias => [ 'african_elephant', 'elephant', 'Loxodonta africana' ] + ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Echinops_telfairi", + -alias => [ + 'tenrec', 'madagascar_hedgehog', + 'lesser_hedgehog', 'Echinops telfairi' + ] ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Monodelphis_domestica", + -alias => [ 'opossum', 'Monodelphis domestica' ] + ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Ornithorhynchus_anatinus", + -alias => [ 'platypus', 'Ornithorhynchus anatinus' ] + ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Gasterosteus_aculeatus", + -alias => [ 'stickleback', 'Gasterosteus aculeatus' ] + ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Oryzias_latipes", + -alias => [ 'medaka', 'Oryzias latipes' ] + ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Cavia_porcellus", + -alias => [ 'guinea_pig', '"Cavia porcellus' ] + ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Aedes_aegypti", + -alias => [ 'aedes', 'Aedes aegypti' ] + ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Spermophilus_tridecemlineatus", + -alias => [ 'squirrel', 'Spermophilus tridecemlineatus' ] + ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Tupaia_belangeri", + -alias => [ 'tree_shrew', 'Tupaia belangeri' ] + ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Culex_pipiens", + -alias => [ 'culex', 'Culex Pipiens' ] + ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Ochotona_princeps", + -alias => [ 'pika', 'Ochotona princeps' ] + ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Anolis_carolinensis", + -alias => [ 'anolis', 'anolis_lizard', 'Anolis carolinensis' ] + ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Microcebus_murinus", + -alias => [ 'mouse_lemur', 'Microcebus murinus' ] + ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Pongo_pygmaeus", + -alias => [ 'orang', 'orang_utan', 'orangutan', 'Pongo pygmaeus' ] + ); + + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => "Equus_caballus", + -alias => [ 'horse', 'Equuscaballus' ] + ); @aliases = ('compara'); - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "multi", - -alias => \@aliases); + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( -species => "multi", + -alias => \@aliases + ); @aliases = ('go'); - Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(-species => "multi", - -alias => \@aliases); -} + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( -species => "multi", + -alias => \@aliases + ); +} ## end sub load_registry_from_db # @@ -1772,7 +1930,7 @@ sub version_check{ $database_version = $mca->get_schema_version(); } if($database_version == 0){ - #try to work out the version + # Try to work out the version if($dba->dbc->dbname() =~ /^_test_db_/){ return 1; } @@ -1826,7 +1984,7 @@ sub get_species_and_object_type{ $type{G} = "gene"; $type{P} = "translation"; - #Do each in turn in order of the usual suspects. This should increase speed on average. + # Do each in turn in order of the usual suspects. This should increase speed on average. if($stable_id =~ /^ENS([GTP])000/){ # HUMAN NOTE 000 needed else other species will match return "Homo_sapiens", $type{$1}; @@ -1853,7 +2011,7 @@ sub get_species_and_object_type{ return "Pan_troglodytes", $type{$1}; } - #rest done alphabetically + # Rest done alphabetically elsif($stable_id =~ /^AAEL/){ # if($stable_id =~ /-R\w$/){ return "aedes_aegypti", "Transcript";