From c6f4b5d2542452b166482042fbd33ef4531a256d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Kusalananda=20K=C3=A4h=C3=A4ri?=
 <ak4@sanger.ac.uk>
Date: Wed, 23 Apr 2008 17:52:23 +0000
Subject: [PATCH] Performance improvement by *not* splitting OR into multiple
 queries (MySQL v5 knows how to do index merges) in _type_by_external_id():

  # decrease in time by 35%
  $dbea->list_transcript_ids_by_extids('BRCA2');

  # decrease in time by 28%
  $dbea->list_translation_ids_by_extids('GO:0004835');

  # decrease in time by 24%
  $dbea->list_gene_ids_by_extids('ARSE');


Also some formatting.
---
 modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm | 92 +++++++++++----------
 1 file changed, 48 insertions(+), 44 deletions(-)

diff --git a/modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm
index c78f95c84e..f3294fd14c 100644
--- a/modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm
+++ b/modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm
@@ -45,7 +45,6 @@ use strict;
 
 @ISA = qw( Bio::EnsEMBL::DBSQL::BaseAdaptor );
 
-
 =head2 fetch_by_dbID
 
   Arg [1]    : int $dbID
@@ -1020,7 +1019,7 @@ sub list_gene_ids_by_extids {
 
   Arg [1]    : string $external_name
   Arg [2]    : (optional) string $external_db_name
-  Example    : @tr_ids = $dbea->list_gene_ids_by_extids('BCRA2');
+  Example    : @tr_ids = $dbea->list_transcript_ids_by_extids('BCRA2');
   Description: Retrieve a list transcript ids by an external identifier that 
                is linked to any of the genes transcripts, translations or the 
                gene itself 
@@ -1049,7 +1048,7 @@ sub list_transcript_ids_by_extids {
 
   Arg [1]    : string $external_name
   Arg [2]    : (optional) string $external_db_name
-  Example    : @tr_ids = $dbea->list_gene_ids_by_extids('GO:0004835');
+  Example    : @tr_ids = $dbea->list_translation_ids_by_extids('GO:0004835');
   Description: Gets a list of translation IDs by external display IDs
   Returntype : list of Ints
   Exceptions : none
@@ -1141,71 +1140,79 @@ sub _type_by_external_id {
       . ' AND xdb.external_db_id = x.external_db_id AND';
   }
 
-  my @queries = (
-    "SELECT $ID_sql
-       FROM $from_sql xref x, object_xref oxr
-      WHERE $where_sql x.dbprimary_acc = ? AND
-             x.xref_id = oxr.xref_id AND
-             oxr.ensembl_object_type= ?",
-    "SELECT $ID_sql 
-       FROM $from_sql xref x, object_xref oxr
-      WHERE $where_sql x.display_label = ? AND
-             x.xref_id = oxr.xref_id AND
-             oxr.ensembl_object_type= ?"
-  );
+  my $query1 = qq(
+      SELECT    $ID_sql
+      FROM      $from_sql
+                xref x,
+                object_xref oxr
+      WHERE     $where_sql
+                ( x.dbprimary_acc = ? OR x.display_label = ? )
+      AND         x.xref_id = oxr.xref_id
+      AND         oxr.ensembl_object_type = ?);
+
+  my $query2;
 
   if ( defined $external_db_name ) {
     # If we are given the name of an external database, we need to join
     # between the 'xref' and the 'object_xref' tables on 'xref_id'.
 
-    push @queries, "SELECT $ID_sql
+    $query2 = "SELECT $ID_sql
        FROM $from_sql xref x, object_xref oxr, external_synonym syn
       WHERE $where_sql syn.synonym = ? AND
              x.xref_id = oxr.xref_id AND
              oxr.ensembl_object_type= ? AND
              syn.xref_id = oxr.xref_id";
+
   } else {
     # If we weren't given an external database name, we can get away
     # with less joins here.
 
-    push @queries, "SELECT $ID_sql
+    $query2 = "SELECT $ID_sql
        FROM $from_sql object_xref oxr, external_synonym syn
       WHERE $where_sql syn.synonym = ? AND
              oxr.ensembl_object_type= ? AND
              syn.xref_id = oxr.xref_id";
-  }
 
-  # Increase speed of query by splitting the OR in query into three
-  # separate queries.  This is because the 'or' statments render the
-  # index useless because MySQL can't use any fields in it.
+  }
 
   my %hash   = ();
   my @result = ();
 
-  foreach (@queries) {
-    my $sth = $self->prepare($_);
-    $sth->bind_param( 1, "$name", SQL_VARCHAR );
-    $sth->bind_param( 2, $ensType, SQL_VARCHAR );
-    $sth->execute();
+  my $sth = $self->prepare($query1);
 
-    while ( my $r = $sth->fetchrow_array() ) {
-      if ( !exists $hash{$r} ) {
-        $hash{$r} = 1;
-        push( @result, $r );
-      }
-    }
+  $sth->bind_param( 1, "$name",  SQL_VARCHAR );
+  $sth->bind_param( 2, "$name",  SQL_VARCHAR );
+  $sth->bind_param( 3, $ensType, SQL_VARCHAR );
+  $sth->execute();
+
+  while ( my $r = $sth->fetchrow_array() ) {
+    if ( exists( $hash{$r} ) ) { next }
+    $hash{$r} = 1;
+    push( @result, $r );
+  }
+
+  $sth = $self->prepare($query2);
+
+  $sth->bind_param( 1, "$name",  SQL_VARCHAR );
+  $sth->bind_param( 2, $ensType, SQL_VARCHAR );
+  $sth->execute();
+
+  while ( my $r = $sth->fetchrow_array() ) {
+    if ( exists( $hash{$r} ) ) { next }
+    $hash{$r} = 1;
+    push( @result, $r );
   }
 
   return @result;
 } ## end sub _type_by_external_id
 
-=head2 _type_by_external_type
+=head2 _type_by_external_db_id
 
   Arg [1]    : string $type - external_db type
   Arg [2]    : string $ensType - ensembl_object_type
   Arg [3]    : (optional) string $extraType
   	       other object type to be returned
-  Example    : $self->_type_by_external_id(1030, 'Translation');
+  Example    : $self->_type_by_external_db_id(1030, 'Translation');
   Description: Gets
   Returntype : list of dbIDs (gene_id, transcript_id, etc.)
   Exceptions : none
@@ -1265,24 +1272,21 @@ sub _type_by_external_db_id{
       WHERE $where_sql x.external_db_id = ? AND
   	     x.xref_id = oxr.xref_id AND oxr.ensembl_object_type= ?";
 
-# Increase speed of query by splitting the OR in query into three separate 
-# queries. This is because the 'or' statments render the index useless 
-# because MySQL can't use any fields in the index.
-
   my %hash = ();
   my @result = ();
 
-
-  my $sth = $self->prepare( $query );
-  $sth->bind_param(1, "$external_db_id", SQL_VARCHAR);
-  $sth->bind_param(2, $ensType, SQL_VARCHAR);
+  my $sth = $self->prepare($query);
+  $sth->bind_param( 1, "$external_db_id", SQL_VARCHAR );
+  $sth->bind_param( 2, $ensType,          SQL_VARCHAR );
   $sth->execute();
-  while( my $r = $sth->fetchrow_array() ) {
-    if( !exists $hash{$r} ) {
+
+  while ( my $r = $sth->fetchrow_array() ) {
+    if ( !exists $hash{$r} ) {
       $hash{$r} = 1;
       push( @result, $r );
     }
   }
+
   return @result;
 }
 
-- 
GitLab