From ac52bc8e147f4f8b9c98ebf7fffaeed4ead981e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Kusalananda=20K=C3=A4h=C3=A4ri?=
 <ak4@sanger.ac.uk>
Date: Thu, 21 Jun 2007 14:42:21 +0000
Subject: [PATCH] Added new optional argument $external_db_name to
 fetch_all_by_external_name() in GeneAdaptor.pm, TranscriptAdaptor.pm, and in
 TranslationAdaptor.pm.

This will limit the returned genes, transcripts, or translations, to
those that originates from the named external database.

This also means adding the optional argument to
list_gene_ids_by_extids(), list_transcript_ids_by_extids(),
list_translation_ids_by_extids(), and to _type_by_external_id() in
DBEntryAdaptor.pm.

The matching is done on 'db_name' in the 'external_db' table with LIKE
and an appended '%' to the given $external_db_name.
---
 modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm   | 172 +++++++++++-------
 modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm      |  30 +--
 .../Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm    |  43 +++--
 .../Bio/EnsEMBL/DBSQL/TranslationAdaptor.pm   |  32 ++--
 4 files changed, 170 insertions(+), 107 deletions(-)

diff --git a/modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm
index 25f476acd7..67ef7f0a85 100644
--- a/modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm
+++ b/modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm
@@ -837,9 +837,10 @@ sub list_gene_ids_by_external_db_id{
    return keys %T;
 }
 
-=head2 list_gene_ids_by_external_db_id
+=head2 list_gene_ids_by_extids
 
-  Arg [1]    : string $external_id
+  Arg [1]    : string $external_name
+  Arg [2]    : (optional) string $external_db_name
   Example    : @gene_ids = $dbea->list_gene_ids_by_extids('ARSE');
   Description: Retrieve a list of geneid by an external identifier that is 
                linked to  any of the genes transcripts, translations or the 
@@ -851,20 +852,25 @@ sub list_gene_ids_by_external_db_id{
 
 =cut
 
-sub list_gene_ids_by_extids{
-   my ($self,$name) = @_;
+sub list_gene_ids_by_extids {
+  my ( $self, $external_name, $external_db_name ) = @_;
 
-   my %T = map { ($_, 1) }
-       $self->_type_by_external_id( $name, 'Translation', 'gene' ),
-       $self->_type_by_external_id( $name, 'Transcript',  'gene' ),
-       $self->_type_by_external_id( $name, 'Gene' );
-   return keys %T;
+  my %T = map { ( $_, 1 ) }
+    $self->_type_by_external_id( $external_name, 'Translation', 'gene',
+                                 $external_db_name ),
+    $self->_type_by_external_id( $external_name, 'Transcript', 'gene',
+                                 $external_db_name ),
+    $self->_type_by_external_id( $external_name, 'Gene', undef,
+                                 $external_db_name );
+
+  return keys %T;
 }
 
 
 =head2 list_transcript_ids_by_extids
 
-  Arg [1]    : string $external_id
+  Arg [1]    : string $external_name
+  Arg [2]    : (optional) string $external_db_name
   Example    : @tr_ids = $dbea->list_gene_ids_by_extids('BCRA2');
   Description: Retrieve a list transcript ids by an external identifier that 
                is linked to any of the genes transcripts, translations or the 
@@ -876,32 +882,39 @@ sub list_gene_ids_by_extids{
 
 =cut
 
-sub list_transcript_ids_by_extids{
-   my ($self,$name) = @_;
-   my @transcripts;
+sub list_transcript_ids_by_extids {
+  my ( $self, $external_name, $external_db_name ) = @_;
 
-   my %T = map { ($_, 1) }
-       $self->_type_by_external_id( $name, 'Translation', 'transcript' ),
-       $self->_type_by_external_id( $name, 'Transcript' );
-   return keys %T;
+  my %T = map { ( $_, 1 ) }
+    $self->_type_by_external_id( $external_name, 'Translation',
+                                 'transcript',   $external_db_name
+    ),
+    $self->_type_by_external_id( $external_name, 'Transcript', undef,
+                                 $external_db_name );
+
+  return keys %T;
 }
 
 
 =head2 list_translation_ids_by_extids
 
-  Arg [1]    :  string $name 
-  Example    :  @tr_ids = $dbea->list_gene_ids_by_extids('GO:0004835');
-  Description:  Gets a list of translation IDs by external display IDs
-  Returntype :  list of Ints
-  Exceptions :  none
-  Caller     :  unknown
+  Arg [1]    : string $external_name
+  Arg [2]    : (optional) string $external_db_name
+  Example    : @tr_ids = $dbea->list_gene_ids_by_extids('GO:0004835');
+  Description: Gets a list of translation IDs by external display IDs
+  Returntype : list of Ints
+  Exceptions : none
+  Caller     : unknown
   Status     : Stable
 
 =cut
 
-sub list_translation_ids_by_extids{
-  my ($self,$name) = @_;
-  return $self->_type_by_external_id($name, 'Translation');
+sub list_translation_ids_by_extids {
+  my ( $self, $external_name, $external_db_name ) = @_;
+
+  return
+    $self->_type_by_external_id( $external_name, 'Translation', undef,
+                                 $external_db_name );
 }
 
 =head2 _type_by_external_id
@@ -909,6 +922,7 @@ sub list_translation_ids_by_extids{
   Arg [1]    : string $name - dbprimary_acc
   Arg [2]    : string $ensType - ensembl_object_type
   Arg [3]    : (optional) string $extraType
+  Arg [4]    : (optional) string $external_db_name
   	       other object type to be returned
   Example    : $self->_type_by_external_id($name, 'Translation');
   Description: Gets
@@ -921,42 +935,45 @@ sub list_translation_ids_by_extids{
 
 =cut
 
-sub _type_by_external_id{
-  my ($self, $name, $ensType, $extraType) = @_;
+sub _type_by_external_id {
+  my ( $self, $name, $ensType, $extraType, $external_db_name ) = @_;
 
-  my $from_sql = '';
+  my $from_sql  = '';
   my $where_sql = '';
-  my $ID_sql = "oxr.ensembl_id";
+  my $ID_sql    = "oxr.ensembl_id";
 
-  if (defined $extraType) {
-    if (lc($extraType) eq 'translation') {
+  if ( defined $extraType ) {
+    if ( lc($extraType) eq 'translation' ) {
       $ID_sql = "tl.translation_id";
     } else {
       $ID_sql = "t.${extraType}_id";
     }
 
-    if (lc($ensType) eq 'translation') {
-      $from_sql = 'transcript t, translation tl, ';
+    if ( lc($ensType) eq 'translation' ) {
+      $from_sql  = 'transcript t, translation tl, ';
       $where_sql = qq(
           t.transcript_id = tl.transcript_id AND
           tl.translation_id = oxr.ensembl_id AND
           t.is_current = 1 AND
       );
     } else {
-      $from_sql = 'transcript t, ';
-      $where_sql = 't.'.lc($ensType).'_id = oxr.ensembl_id AND '.
-          't.is_current = 1 AND ';
+      $from_sql  = 'transcript t, ';
+      $where_sql = 't.'
+        . lc($ensType)
+        . '_id = oxr.ensembl_id AND '
+        . 't.is_current = 1 AND ';
     }
   }
 
-  if (lc($ensType) eq 'gene') {
-    $from_sql = 'gene g, ';
+  if ( lc($ensType) eq 'gene' ) {
+    $from_sql  = 'gene g, ';
     $where_sql = 'g.gene_id = oxr.ensembl_id AND g.is_current = 1 AND ';
-  } elsif (lc($ensType) eq 'transcript') {
+  } elsif ( lc($ensType) eq 'transcript' ) {
     $from_sql = 'transcript t, ';
-    $where_sql = 't.transcript_id = oxr.ensembl_id AND t.is_current = 1 AND ';
-  } elsif (lc($ensType) eq 'translation') {
-    $from_sql = 'transcript t, translation tl, ';
+    $where_sql =
+      't.transcript_id = oxr.ensembl_id AND t.is_current = 1 AND ';
+  } elsif ( lc($ensType) eq 'translation' ) {
+    $from_sql  = 'transcript t, translation tl, ';
     $where_sql = qq(
         t.transcript_id = tl.transcript_id AND
         tl.translation_id = oxr.ensembl_id AND
@@ -964,43 +981,74 @@ sub _type_by_external_id{
     );
   }
 
+  if ( defined($external_db_name) ) {
+    # Involve the 'external_db' table to limit the hits to a particular
+    # external database.
+
+    $from_sql .= 'external_db xdb, ';
+    $where_sql .=
+        'xdb.db_name LIKE '
+      . $self->dbc()->db_handle()->quote( $external_db_name . '%' )
+      . ' AND xdb.external_db_id = x.external_db_id AND';
+  }
+
   my @queries = (
     "SELECT $ID_sql
        FROM $from_sql xref x, object_xref oxr
       WHERE $where_sql x.dbprimary_acc = ? AND
-  	     x.xref_id = oxr.xref_id AND oxr.ensembl_object_type= ?",
+             x.xref_id = oxr.xref_id AND
+             oxr.ensembl_object_type= ?",
     "SELECT $ID_sql 
        FROM $from_sql xref x, object_xref oxr
       WHERE $where_sql x.display_label = ? AND
-  	     x.xref_id = oxr.xref_id AND oxr.ensembl_object_type= ?",
-    "SELECT $ID_sql
+             x.xref_id = oxr.xref_id AND
+             oxr.ensembl_object_type= ?"
+  );
+
+  if ( defined $external_db_name ) {
+    # If we are given the name of an external database, we need to join
+    # between the 'xref' and the 'object_xref' tables on 'xref_id'.
+
+    push @queries, "SELECT $ID_sql
+       FROM $from_sql xref x, object_xref oxr, external_synonym syn
+      WHERE $where_sql syn.synonym = ? AND
+             x.xref_id = oxr.xref_id AND
+             oxr.ensembl_object_type= ? AND
+             syn.xref_id = oxr.xref_id";
+  } else {
+    # If we weren't given an external database name, we can get away
+    # with less joins here.
+
+    push @queries, "SELECT $ID_sql
        FROM $from_sql object_xref oxr, external_synonym syn
       WHERE $where_sql syn.synonym = ? AND
-            syn.xref_id = oxr.xref_id AND oxr.ensembl_object_type= ?",
-  );
+             oxr.ensembl_object_type= ? AND
+             syn.xref_id = oxr.xref_id";
+  }
 
-# Increase speed of query by splitting the OR in query into three separate
-# queries. This is because the 'or' statments render the index useless
-# because MySQL can't use any fields in the index.
+  # Increase speed of query by splitting the OR in query into three
+  # separate queries.  This is because the 'or' statments render the
+  # index useless because MySQL can't use any fields in it.
 
-  my %hash = ();
+  my %hash   = ();
   my @result = ();
 
-  foreach( @queries ) {
-
-    my $sth = $self->prepare( $_ );
-    $sth->bind_param(1, "$name", SQL_VARCHAR);
-    $sth->bind_param(2, $ensType, SQL_VARCHAR);
+  foreach (@queries) {
+    my $sth = $self->prepare($_);
+    $sth->bind_param( 1, "$name", SQL_VARCHAR );
+    $sth->bind_param( 2, $ensType, SQL_VARCHAR );
     $sth->execute();
-    while( my $r = $sth->fetchrow_array() ) {
-      if( !exists $hash{$r} ) {
-	$hash{$r} = 1;
-	push( @result, $r );
+
+    while ( my $r = $sth->fetchrow_array() ) {
+      if ( !exists $hash{$r} ) {
+        $hash{$r} = 1;
+        push( @result, $r );
       }
     }
   }
+
   return @result;
-}
+} ## end sub _type_by_external_id
 
 =head2 _type_by_external_type
 
diff --git a/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm
index 6e04517203..74d97cb2bf 100644
--- a/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm
+++ b/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm
@@ -670,15 +670,18 @@ sub fetch_by_translation_stable_id {
 
 =head2 fetch_all_by_external_name
 
-  Arg [1]    : String $external_id
+  Arg [1]    : String $external_name
                The external identifier for the gene to be obtained
+  Arg [2]    : (optional) String $external_db_name
+               The name of the external database from which the
+               identifier originates.
   Example    : @genes = @{$gene_adaptor->fetch_all_by_external_name('BRCA2')}
-  Description: Retrieves a list of genes with an external database 
-               idenitifier $external_id. The genes returned are in their
-               native coordinate system. I.e. in the coordinate system they
-               are stored in the database in. If another coordinate system
-               is required then the Gene::transfer or Gene::transform method 
-               can be used.
+  Description: Retrieves a list of genes with an external database
+               identifier $external_name. The genes returned are in
+               their native coordinate system, i.e. in the coordinate
+               system they are stored in the database in.  If another
+               coordinate system is required then the Gene::transfer or
+               Gene::transform method can be used.
   Returntype : listref of Bio::EnsEMBL::Genes
   Exceptions : none
   Caller     : goview, general
@@ -687,17 +690,18 @@ sub fetch_by_translation_stable_id {
 =cut
 
 sub fetch_all_by_external_name {
-  my ($self, $external_id) = @_;
+  my ( $self, $external_name, $external_db_name ) = @_;
 
   my $entryAdaptor = $self->db->get_DBEntryAdaptor();
 
-  my (@ids, @result);
-  @ids = $entryAdaptor->list_gene_ids_by_extids($external_id);
+  my @ids =
+    $entryAdaptor->list_gene_ids_by_extids( $external_name,
+                                            $external_db_name );
 
-  my $genes = $self->fetch_all_by_dbID_list(\@ids);
+  my %genes_by_dbIDs =
+    map { $_->dbID(), $_ } @{ $self->fetch_all_by_dbID_list( \@ids ) };
 
-  my %genes_by_dbIDs = map { $_->dbID(),$_ } @$genes;
-  @result = map { $genes_by_dbIDs{ $_ } } @ids;
+  my @result = map { $genes_by_dbIDs{$_} } @ids;
 
   return \@result;
 }
diff --git a/modules/Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm
index 4423eba79b..cd5320d551 100644
--- a/modules/Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm
+++ b/modules/Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm
@@ -415,20 +415,24 @@ sub fetch_all_by_Slice {
 
 =head2 fetch_all_by_external_name
 
-  Arg [1]    : String $external_id
+  Arg [1]    : String $external_name
                An external identifier of the transcript to be obtained
-  Example    : my @transcripts = @{ $tr_adaptor->fetch_all_by_external_name
-                  ('NP_065811.1') };
-  Description: Retrieves all transcripts which are associated with an 
-               external identifier such as a GO term, Swissprot
-               identifer, etc. Usually there will only be a single transcript
-               returned in the listref, but not always. Transcripts are
-               returned in their native coordinate system. That is, the 
-               coordinate system in which they are stored in the database. If
-               they are required in another coordinate system the 
-               Transcript::transfer or Transcript::transform method can be 
-               used to convert them. If no transcripts with the external
-               identifier are found, a reference to an empty list is returned.
+  Arg [2]    : (optional) String $external_db_name
+               The name of the external database from which the
+               identifier originates.
+  Example    : my @transcripts =
+                  @{ $tr_adaptor->fetch_all_by_external_name( 'NP_065811.1') };
+  Description: Retrieves all transcripts which are associated with
+               an external identifier such as a GO term, Swissprot
+               identifer, etc.  Usually there will only be a single
+               transcript returned in the list reference, but not
+               always.  Transcripts are returned in their native
+               coordinate system, i.e. the coordinate system in which
+               they are stored in the database.  If they are required
+               in another coordinate system the Transcript::transfer or
+               Transcript::transform method can be used to convert them.
+               If no transcripts with the external identifier are found,
+               a reference to an empty list is returned.
   Returntype : Listref of Bio::EnsEMBL::Transcript objects
   Exceptions : none
   Caller     : general
@@ -437,17 +441,16 @@ sub fetch_all_by_Slice {
 =cut
 
 sub fetch_all_by_external_name {
-  my $self = shift;
-  my $external_id = shift;
-
-  my @trans = ();
+  my ( $self, $external_name, $external_db_name ) = @_;
 
   my $entryAdaptor = $self->db->get_DBEntryAdaptor();
-  my @ids = $entryAdaptor->list_transcript_ids_by_extids($external_id);
 
-  return $self->fetch_all_by_dbID_list(\@ids);
-}
+  my @ids =
+    $entryAdaptor->list_transcript_ids_by_extids( $external_name,
+                                                  $external_db_name );
 
+  return $self->fetch_all_by_dbID_list( \@ids );
+}
 
 =head2 fetch_by_display_label
 
diff --git a/modules/Bio/EnsEMBL/DBSQL/TranslationAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/TranslationAdaptor.pm
index bb63fa15e0..8d05dc7ce1 100644
--- a/modules/Bio/EnsEMBL/DBSQL/TranslationAdaptor.pm
+++ b/modules/Bio/EnsEMBL/DBSQL/TranslationAdaptor.pm
@@ -144,13 +144,19 @@ sub fetch_by_Transcript {
 
 =head2 fetch_all_by_external_name
 
-  Arg [1]    : string $external_id
-               The external identifier for the tranlsation(s) to be obtained.
-  Example    : @tls = @{$trl_adaptor->fetch_all_by_external_name('BRCA2')};
-  Description: Retrieves a list of translations fetched via an external
-               identifier.  Note that this may not be a particularly useful
-               method, because translations do not make much sense out of the 
-               context of their transcript.  It may be better to use the
+  Arg [1]    : string $external_name
+               The external identifier for the translation(s) to be
+               obtained.
+  Arg [2]    : (optional) string $external_db_name
+               The name of the external database from which the
+               identifier originates.
+  Example    : my @translations =
+                  @{ $trl_adaptor->fetch_all_by_external_name('BRCA2') };
+  Description: Retrieves a list of translations fetched via an
+               external identifier.  Note that this may not be a
+               particularly useful method, because translations
+               do not make much sense out of the context of
+               their transcript.  It may be better to use the
                TranscriptAdaptor::fetch_all_by_external_name instead.
   Returntype : reference to a list of Translations
   Exceptions : none
@@ -162,19 +168,21 @@ sub fetch_by_Transcript {
 =cut
 
 sub fetch_all_by_external_name {
-  my $self = shift;
-  my $external_id = shift;
+  my ( $self, $external_name, $external_db_name ) = @_;
 
   my $entry_adaptor = $self->db->get_DBEntryAdaptor();
-  my @ids = $entry_adaptor->list_translation_ids_by_extids($external_id);
+
+  my @ids =
+    $entry_adaptor->list_translation_ids_by_extids( $external_name,
+                                                    $external_db_name );
 
   my $transcript_adaptor = $self->db()->get_TranscriptAdaptor();
 
   my @out;
-
   foreach my $id (@ids) {
     my $transcript = $transcript_adaptor->fetch_by_translation_id($id);
-    if($transcript) {
+
+    if ( defined($transcript) ) {
       push @out, $self->fetch_by_Transcript($transcript);
     }
   }
-- 
GitLab