From ac52bc8e147f4f8b9c98ebf7fffaeed4ead981e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20Kusalananda=20K=C3=A4h=C3=A4ri?= <ak4@sanger.ac.uk> Date: Thu, 21 Jun 2007 14:42:21 +0000 Subject: [PATCH] Added new optional argument $external_db_name to fetch_all_by_external_name() in GeneAdaptor.pm, TranscriptAdaptor.pm, and in TranslationAdaptor.pm. This will limit the returned genes, transcripts, or translations, to those that originates from the named external database. This also means adding the optional argument to list_gene_ids_by_extids(), list_transcript_ids_by_extids(), list_translation_ids_by_extids(), and to _type_by_external_id() in DBEntryAdaptor.pm. The matching is done on 'db_name' in the 'external_db' table with LIKE and an appended '%' to the given $external_db_name. --- modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm | 172 +++++++++++------- modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm | 30 +-- .../Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm | 43 +++-- .../Bio/EnsEMBL/DBSQL/TranslationAdaptor.pm | 32 ++-- 4 files changed, 170 insertions(+), 107 deletions(-) diff --git a/modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm index 25f476acd7..67ef7f0a85 100644 --- a/modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm @@ -837,9 +837,10 @@ sub list_gene_ids_by_external_db_id{ return keys %T; } -=head2 list_gene_ids_by_external_db_id +=head2 list_gene_ids_by_extids - Arg [1] : string $external_id + Arg [1] : string $external_name + Arg [2] : (optional) string $external_db_name Example : @gene_ids = $dbea->list_gene_ids_by_extids('ARSE'); Description: Retrieve a list of geneid by an external identifier that is linked to any of the genes transcripts, translations or the @@ -851,20 +852,25 @@ sub list_gene_ids_by_external_db_id{ =cut -sub list_gene_ids_by_extids{ - my ($self,$name) = @_; +sub list_gene_ids_by_extids { + my ( $self, $external_name, $external_db_name ) = @_; - my %T = map { ($_, 1) } - $self->_type_by_external_id( $name, 'Translation', 'gene' ), - $self->_type_by_external_id( $name, 'Transcript', 'gene' ), - $self->_type_by_external_id( $name, 'Gene' ); - return keys %T; + my %T = map { ( $_, 1 ) } + $self->_type_by_external_id( $external_name, 'Translation', 'gene', + $external_db_name ), + $self->_type_by_external_id( $external_name, 'Transcript', 'gene', + $external_db_name ), + $self->_type_by_external_id( $external_name, 'Gene', undef, + $external_db_name ); + + return keys %T; } =head2 list_transcript_ids_by_extids - Arg [1] : string $external_id + Arg [1] : string $external_name + Arg [2] : (optional) string $external_db_name Example : @tr_ids = $dbea->list_gene_ids_by_extids('BCRA2'); Description: Retrieve a list transcript ids by an external identifier that is linked to any of the genes transcripts, translations or the @@ -876,32 +882,39 @@ sub list_gene_ids_by_extids{ =cut -sub list_transcript_ids_by_extids{ - my ($self,$name) = @_; - my @transcripts; +sub list_transcript_ids_by_extids { + my ( $self, $external_name, $external_db_name ) = @_; - my %T = map { ($_, 1) } - $self->_type_by_external_id( $name, 'Translation', 'transcript' ), - $self->_type_by_external_id( $name, 'Transcript' ); - return keys %T; + my %T = map { ( $_, 1 ) } + $self->_type_by_external_id( $external_name, 'Translation', + 'transcript', $external_db_name + ), + $self->_type_by_external_id( $external_name, 'Transcript', undef, + $external_db_name ); + + return keys %T; } =head2 list_translation_ids_by_extids - Arg [1] : string $name - Example : @tr_ids = $dbea->list_gene_ids_by_extids('GO:0004835'); - Description: Gets a list of translation IDs by external display IDs - Returntype : list of Ints - Exceptions : none - Caller : unknown + Arg [1] : string $external_name + Arg [2] : (optional) string $external_db_name + Example : @tr_ids = $dbea->list_gene_ids_by_extids('GO:0004835'); + Description: Gets a list of translation IDs by external display IDs + Returntype : list of Ints + Exceptions : none + Caller : unknown Status : Stable =cut -sub list_translation_ids_by_extids{ - my ($self,$name) = @_; - return $self->_type_by_external_id($name, 'Translation'); +sub list_translation_ids_by_extids { + my ( $self, $external_name, $external_db_name ) = @_; + + return + $self->_type_by_external_id( $external_name, 'Translation', undef, + $external_db_name ); } =head2 _type_by_external_id @@ -909,6 +922,7 @@ sub list_translation_ids_by_extids{ Arg [1] : string $name - dbprimary_acc Arg [2] : string $ensType - ensembl_object_type Arg [3] : (optional) string $extraType + Arg [4] : (optional) string $external_db_name other object type to be returned Example : $self->_type_by_external_id($name, 'Translation'); Description: Gets @@ -921,42 +935,45 @@ sub list_translation_ids_by_extids{ =cut -sub _type_by_external_id{ - my ($self, $name, $ensType, $extraType) = @_; +sub _type_by_external_id { + my ( $self, $name, $ensType, $extraType, $external_db_name ) = @_; - my $from_sql = ''; + my $from_sql = ''; my $where_sql = ''; - my $ID_sql = "oxr.ensembl_id"; + my $ID_sql = "oxr.ensembl_id"; - if (defined $extraType) { - if (lc($extraType) eq 'translation') { + if ( defined $extraType ) { + if ( lc($extraType) eq 'translation' ) { $ID_sql = "tl.translation_id"; } else { $ID_sql = "t.${extraType}_id"; } - if (lc($ensType) eq 'translation') { - $from_sql = 'transcript t, translation tl, '; + if ( lc($ensType) eq 'translation' ) { + $from_sql = 'transcript t, translation tl, '; $where_sql = qq( t.transcript_id = tl.transcript_id AND tl.translation_id = oxr.ensembl_id AND t.is_current = 1 AND ); } else { - $from_sql = 'transcript t, '; - $where_sql = 't.'.lc($ensType).'_id = oxr.ensembl_id AND '. - 't.is_current = 1 AND '; + $from_sql = 'transcript t, '; + $where_sql = 't.' + . lc($ensType) + . '_id = oxr.ensembl_id AND ' + . 't.is_current = 1 AND '; } } - if (lc($ensType) eq 'gene') { - $from_sql = 'gene g, '; + if ( lc($ensType) eq 'gene' ) { + $from_sql = 'gene g, '; $where_sql = 'g.gene_id = oxr.ensembl_id AND g.is_current = 1 AND '; - } elsif (lc($ensType) eq 'transcript') { + } elsif ( lc($ensType) eq 'transcript' ) { $from_sql = 'transcript t, '; - $where_sql = 't.transcript_id = oxr.ensembl_id AND t.is_current = 1 AND '; - } elsif (lc($ensType) eq 'translation') { - $from_sql = 'transcript t, translation tl, '; + $where_sql = + 't.transcript_id = oxr.ensembl_id AND t.is_current = 1 AND '; + } elsif ( lc($ensType) eq 'translation' ) { + $from_sql = 'transcript t, translation tl, '; $where_sql = qq( t.transcript_id = tl.transcript_id AND tl.translation_id = oxr.ensembl_id AND @@ -964,43 +981,74 @@ sub _type_by_external_id{ ); } + if ( defined($external_db_name) ) { + # Involve the 'external_db' table to limit the hits to a particular + # external database. + + $from_sql .= 'external_db xdb, '; + $where_sql .= + 'xdb.db_name LIKE ' + . $self->dbc()->db_handle()->quote( $external_db_name . '%' ) + . ' AND xdb.external_db_id = x.external_db_id AND'; + } + my @queries = ( "SELECT $ID_sql FROM $from_sql xref x, object_xref oxr WHERE $where_sql x.dbprimary_acc = ? AND - x.xref_id = oxr.xref_id AND oxr.ensembl_object_type= ?", + x.xref_id = oxr.xref_id AND + oxr.ensembl_object_type= ?", "SELECT $ID_sql FROM $from_sql xref x, object_xref oxr WHERE $where_sql x.display_label = ? AND - x.xref_id = oxr.xref_id AND oxr.ensembl_object_type= ?", - "SELECT $ID_sql + x.xref_id = oxr.xref_id AND + oxr.ensembl_object_type= ?" + ); + + if ( defined $external_db_name ) { + # If we are given the name of an external database, we need to join + # between the 'xref' and the 'object_xref' tables on 'xref_id'. + + push @queries, "SELECT $ID_sql + FROM $from_sql xref x, object_xref oxr, external_synonym syn + WHERE $where_sql syn.synonym = ? AND + x.xref_id = oxr.xref_id AND + oxr.ensembl_object_type= ? AND + syn.xref_id = oxr.xref_id"; + } else { + # If we weren't given an external database name, we can get away + # with less joins here. + + push @queries, "SELECT $ID_sql FROM $from_sql object_xref oxr, external_synonym syn WHERE $where_sql syn.synonym = ? AND - syn.xref_id = oxr.xref_id AND oxr.ensembl_object_type= ?", - ); + oxr.ensembl_object_type= ? AND + syn.xref_id = oxr.xref_id"; + } -# Increase speed of query by splitting the OR in query into three separate -# queries. This is because the 'or' statments render the index useless -# because MySQL can't use any fields in the index. + # Increase speed of query by splitting the OR in query into three + # separate queries. This is because the 'or' statments render the + # index useless because MySQL can't use any fields in it. - my %hash = (); + my %hash = (); my @result = (); - foreach( @queries ) { - - my $sth = $self->prepare( $_ ); - $sth->bind_param(1, "$name", SQL_VARCHAR); - $sth->bind_param(2, $ensType, SQL_VARCHAR); + foreach (@queries) { + my $sth = $self->prepare($_); + $sth->bind_param( 1, "$name", SQL_VARCHAR ); + $sth->bind_param( 2, $ensType, SQL_VARCHAR ); $sth->execute(); - while( my $r = $sth->fetchrow_array() ) { - if( !exists $hash{$r} ) { - $hash{$r} = 1; - push( @result, $r ); + + while ( my $r = $sth->fetchrow_array() ) { + if ( !exists $hash{$r} ) { + $hash{$r} = 1; + push( @result, $r ); } } } + return @result; -} +} ## end sub _type_by_external_id =head2 _type_by_external_type diff --git a/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm index 6e04517203..74d97cb2bf 100644 --- a/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm @@ -670,15 +670,18 @@ sub fetch_by_translation_stable_id { =head2 fetch_all_by_external_name - Arg [1] : String $external_id + Arg [1] : String $external_name The external identifier for the gene to be obtained + Arg [2] : (optional) String $external_db_name + The name of the external database from which the + identifier originates. Example : @genes = @{$gene_adaptor->fetch_all_by_external_name('BRCA2')} - Description: Retrieves a list of genes with an external database - idenitifier $external_id. The genes returned are in their - native coordinate system. I.e. in the coordinate system they - are stored in the database in. If another coordinate system - is required then the Gene::transfer or Gene::transform method - can be used. + Description: Retrieves a list of genes with an external database + identifier $external_name. The genes returned are in + their native coordinate system, i.e. in the coordinate + system they are stored in the database in. If another + coordinate system is required then the Gene::transfer or + Gene::transform method can be used. Returntype : listref of Bio::EnsEMBL::Genes Exceptions : none Caller : goview, general @@ -687,17 +690,18 @@ sub fetch_by_translation_stable_id { =cut sub fetch_all_by_external_name { - my ($self, $external_id) = @_; + my ( $self, $external_name, $external_db_name ) = @_; my $entryAdaptor = $self->db->get_DBEntryAdaptor(); - my (@ids, @result); - @ids = $entryAdaptor->list_gene_ids_by_extids($external_id); + my @ids = + $entryAdaptor->list_gene_ids_by_extids( $external_name, + $external_db_name ); - my $genes = $self->fetch_all_by_dbID_list(\@ids); + my %genes_by_dbIDs = + map { $_->dbID(), $_ } @{ $self->fetch_all_by_dbID_list( \@ids ) }; - my %genes_by_dbIDs = map { $_->dbID(),$_ } @$genes; - @result = map { $genes_by_dbIDs{ $_ } } @ids; + my @result = map { $genes_by_dbIDs{$_} } @ids; return \@result; } diff --git a/modules/Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm index 4423eba79b..cd5320d551 100644 --- a/modules/Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm @@ -415,20 +415,24 @@ sub fetch_all_by_Slice { =head2 fetch_all_by_external_name - Arg [1] : String $external_id + Arg [1] : String $external_name An external identifier of the transcript to be obtained - Example : my @transcripts = @{ $tr_adaptor->fetch_all_by_external_name - ('NP_065811.1') }; - Description: Retrieves all transcripts which are associated with an - external identifier such as a GO term, Swissprot - identifer, etc. Usually there will only be a single transcript - returned in the listref, but not always. Transcripts are - returned in their native coordinate system. That is, the - coordinate system in which they are stored in the database. If - they are required in another coordinate system the - Transcript::transfer or Transcript::transform method can be - used to convert them. If no transcripts with the external - identifier are found, a reference to an empty list is returned. + Arg [2] : (optional) String $external_db_name + The name of the external database from which the + identifier originates. + Example : my @transcripts = + @{ $tr_adaptor->fetch_all_by_external_name( 'NP_065811.1') }; + Description: Retrieves all transcripts which are associated with + an external identifier such as a GO term, Swissprot + identifer, etc. Usually there will only be a single + transcript returned in the list reference, but not + always. Transcripts are returned in their native + coordinate system, i.e. the coordinate system in which + they are stored in the database. If they are required + in another coordinate system the Transcript::transfer or + Transcript::transform method can be used to convert them. + If no transcripts with the external identifier are found, + a reference to an empty list is returned. Returntype : Listref of Bio::EnsEMBL::Transcript objects Exceptions : none Caller : general @@ -437,17 +441,16 @@ sub fetch_all_by_Slice { =cut sub fetch_all_by_external_name { - my $self = shift; - my $external_id = shift; - - my @trans = (); + my ( $self, $external_name, $external_db_name ) = @_; my $entryAdaptor = $self->db->get_DBEntryAdaptor(); - my @ids = $entryAdaptor->list_transcript_ids_by_extids($external_id); - return $self->fetch_all_by_dbID_list(\@ids); -} + my @ids = + $entryAdaptor->list_transcript_ids_by_extids( $external_name, + $external_db_name ); + return $self->fetch_all_by_dbID_list( \@ids ); +} =head2 fetch_by_display_label diff --git a/modules/Bio/EnsEMBL/DBSQL/TranslationAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/TranslationAdaptor.pm index bb63fa15e0..8d05dc7ce1 100644 --- a/modules/Bio/EnsEMBL/DBSQL/TranslationAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/TranslationAdaptor.pm @@ -144,13 +144,19 @@ sub fetch_by_Transcript { =head2 fetch_all_by_external_name - Arg [1] : string $external_id - The external identifier for the tranlsation(s) to be obtained. - Example : @tls = @{$trl_adaptor->fetch_all_by_external_name('BRCA2')}; - Description: Retrieves a list of translations fetched via an external - identifier. Note that this may not be a particularly useful - method, because translations do not make much sense out of the - context of their transcript. It may be better to use the + Arg [1] : string $external_name + The external identifier for the translation(s) to be + obtained. + Arg [2] : (optional) string $external_db_name + The name of the external database from which the + identifier originates. + Example : my @translations = + @{ $trl_adaptor->fetch_all_by_external_name('BRCA2') }; + Description: Retrieves a list of translations fetched via an + external identifier. Note that this may not be a + particularly useful method, because translations + do not make much sense out of the context of + their transcript. It may be better to use the TranscriptAdaptor::fetch_all_by_external_name instead. Returntype : reference to a list of Translations Exceptions : none @@ -162,19 +168,21 @@ sub fetch_by_Transcript { =cut sub fetch_all_by_external_name { - my $self = shift; - my $external_id = shift; + my ( $self, $external_name, $external_db_name ) = @_; my $entry_adaptor = $self->db->get_DBEntryAdaptor(); - my @ids = $entry_adaptor->list_translation_ids_by_extids($external_id); + + my @ids = + $entry_adaptor->list_translation_ids_by_extids( $external_name, + $external_db_name ); my $transcript_adaptor = $self->db()->get_TranscriptAdaptor(); my @out; - foreach my $id (@ids) { my $transcript = $transcript_adaptor->fetch_by_translation_id($id); - if($transcript) { + + if ( defined($transcript) ) { push @out, $self->fetch_by_Transcript($transcript); } } -- GitLab