From 24c8a5fbe62bf4a58968dbbb9639dd983ba31972 Mon Sep 17 00:00:00 2001
From: Kieron Taylor <ktaylor@ebi.ac.uk>
Date: Thu, 7 Feb 2013 13:06:16 +0000
Subject: [PATCH] Introduced fetching by stable ID lists

---
 modules/Bio/EnsEMBL/DBSQL/BaseAdaptor.pm      | 63 ++++++++++++++++---
 .../Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm   | 27 +++++++-
 modules/t/baseAdaptor.t                       | 11 ++++
 3 files changed, 92 insertions(+), 9 deletions(-)

diff --git a/modules/Bio/EnsEMBL/DBSQL/BaseAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/BaseAdaptor.pm
index 9190dc45cc..ffa4533c33 100755
--- a/modules/Bio/EnsEMBL/DBSQL/BaseAdaptor.pm
+++ b/modules/Bio/EnsEMBL/DBSQL/BaseAdaptor.pm
@@ -92,7 +92,7 @@ use vars qw(@ISA @EXPORT);
 use strict;
 
 use Bio::EnsEMBL::Utils::Exception qw(throw);
-use Bio::EnsEMBL::Utils::Scalar qw(assert_ref);
+use Bio::EnsEMBL::Utils::Scalar qw(assert_ref assert_integer);
 use DBI qw(:sql_types);
 use Data::Dumper;
 
@@ -654,6 +654,34 @@ sub fetch_all_by_dbID_list {
 sub _uncached_fetch_all_by_dbID_list {
   my ( $self, $id_list_ref, $slice ) = @_;
 
+  return $self->_uncached_fetch_all_by_id_list($id_list_ref, $slice, "dbID");
+} ## end sub fetch_all_by_dbID_list
+
+=head2 _uncached_fetch_all_by_id_list
+
+  Arg [1]    : listref of IDs
+  Arg [2]    : (optional) Bio::EnsEMBL::Slice $slice
+               A slice that features should be remapped to
+  Arg [3]    : String describing the ID type.
+               Valid values include dbID and stable_id. dbID is an alias for
+               the primary key, while other names map directly to table columns
+               of the Feature this adaptor manages.
+  Example    : $list_of_features = $adaptor->_uncached_fetch_all_by_id_list(
+                   [qw(ENSG00000101321 ENSG00000101346 ENSG00000101367)],
+                   undef,
+                   "stable_id");
+  Description: This is a generic method used to fetch lists of features by IDs.
+               It avoids caches, meaning it is best suited for block fetching.
+               See fetch_all_by_dbID_list() for more info.
+  Returntype : ListRef of Bio::EnsEMBL::Feature
+  Exceptions : Thrown if a list of IDs is not supplied.
+  Caller     : BaseFeatureAdaptor, BaseAdaptor and derived classes.
+
+=cut
+
+sub _uncached_fetch_all_by_id_list {
+    my ( $self, $id_list_ref, $slice, $id_type ) = @_;
+
   if ( !defined($id_list_ref) || ref($id_list_ref) ne 'ARRAY' ) {
     throw("id_list list reference argument is required");
   }
@@ -670,9 +698,22 @@ sub _uncached_fetch_all_by_dbID_list {
   # length of 16, this means 2048 dbIDs in each query.
   my $max_size = 2048;
 
-
+  # prepare column name for query
+  my $field_name;
+  if ($id_type eq "dbID") {
+      $field_name = $name."_id";
+  } else {
+      $field_name = $id_type;
+  }
+  
+  # build up unique id list, also validate on the way by
   my %id_list;
-  $id_list{$_}++ for @{$id_list_ref};
+  for (@{$id_list_ref}) {
+      $id_list{$_}++;
+      if ($id_type ne "stable_id") { 
+          assert_integer($_,"$field_name");
+      }
+  }
   my @id_list = keys %id_list;
 
   my @out;
@@ -687,20 +728,26 @@ sub _uncached_fetch_all_by_dbID_list {
       @ids     = @id_list;
       @id_list = ();
     }
-
+    
     if ( scalar(@ids) > 1 ) {
-      $id_str = " IN (" . join( ',', @ids ) . ")";
+        # stable_ids are the only feature attribute which is expressed as a
+        # varchar. These need to be quoted or the SQL will bounce
+        if ($id_type eq "stable_id") {
+            $id_str = " IN (" . join( ',', map qq("$_"), @ids ) . ")";
+        } else {
+            $id_str = " IN (" . join( ',', @ids). ")";
+        }
     } else {
       $id_str = " = " . $ids[0];
     }
-
-    my $constraint = "${syn}.${name}_id $id_str";
+    
+    my $constraint = "${syn}.${field_name} $id_str";
 
     push @out, @{ $self->generic_fetch($constraint, undef, $slice) };
   }
 
   return \@out;
-} ## end sub fetch_all_by_dbID_list
+}
 
 # might not be a good idea, but for convenience
 # shouldnt be called on the BIG tables though
diff --git a/modules/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm
index c4d271ffdd..1ecb522c89 100644
--- a/modules/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm
+++ b/modules/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm
@@ -504,7 +504,7 @@ sub fetch_all_by_Slice_constraint {
 
 =head2 fetch_all_by_logic_name
 
-  Arg [3]    : string $logic_name
+  Arg [1]    : string $logic_name
                the logic name of the type of features to obtain
   Example    : $fs = $a->fetch_all_by_logic_name('foobar');
   Description: Returns a listref of features created from the database.
@@ -536,6 +536,31 @@ sub fetch_all_by_logic_name {
   return $self->generic_fetch($constraint);
 }
 
+=head2 fetch_all_by_stable_id_list
+
+  Arg [1]    : string $logic_name
+               the logic name of the type of features to obtain
+  Arg [2]    : Bio::EnsEMBL::Slice $slice
+               the slice from which to obtain features
+  Example    : $fs = $a->fetch_all_by_stable_id_list(["ENSG00001","ENSG00002", ...]);
+  Description: Returns a listref of features identified by their stable IDs.
+               This method only fetches features of the same type as the calling
+               adaptor. 
+               Results are constrained to a slice if the slice is provided.
+  Returntype : listref of Bio::EnsEMBL::Feature
+  Exceptions : thrown if no stable ID list is provided.
+  Caller     : General
+  Status     : Stable
+
+=cut
+
+# Adapted from BaseAdaptor->uncached_fetch_all_by_dbID_list
+sub fetch_all_by_stable_id_list {
+  my ( $self, $id_list_ref, $slice ) = @_;
+
+  return $self->_uncached_fetch_all_by_id_list($id_list_ref,$slice,"stable_id");
+}
+
 # Method that creates an object.  Called by the _objs_from_sth() method
 # in the sub-classes (the various feature adaptors).  Overridden by the
 # feature collection classes.
diff --git a/modules/t/baseAdaptor.t b/modules/t/baseAdaptor.t
index 5d7041b9fc..f8229211ab 100644
--- a/modules/t/baseAdaptor.t
+++ b/modules/t/baseAdaptor.t
@@ -1,6 +1,7 @@
 use strict;
 use warnings;
 use Test::More;
+use Test::Exception;
 use Bio::EnsEMBL::Test::MultiTestDB;
 use DBI qw/:sql_types/;
 
@@ -12,4 +13,14 @@ $gene_adaptor->bind_param_generic_fetch('protein_coding', SQL_VARCHAR);
 my $count = $gene_adaptor->generic_count('g.biotype =?');
 is($count, 20, 'Checking generic_count for protein_coding genes returns expected amounts');
 
+
+# fetch_all_by_dbID_list tests
+
+my $gene_list = $gene_adaptor->_uncached_fetch_all_by_id_list([qw(ENSG00000101321 ENSG00000101346 ENSG00000101367)],undef,"stable_id");
+ok(scalar(@$gene_list) == 3, "Basic uncached fetch by list");
+
+dies_ok {
+    $gene_list = $gene_adaptor->_uncached_fetch_all_by_id_list([qw(ENSG00000101321 ENSG00000101346 ENSG00000101367)],undef,"dbID")
+} "Wrong data type for ID";
+
 done_testing();
\ No newline at end of file
-- 
GitLab