From 24c8a5fbe62bf4a58968dbbb9639dd983ba31972 Mon Sep 17 00:00:00 2001 From: Kieron Taylor <ktaylor@ebi.ac.uk> Date: Thu, 7 Feb 2013 13:06:16 +0000 Subject: [PATCH] Introduced fetching by stable ID lists --- modules/Bio/EnsEMBL/DBSQL/BaseAdaptor.pm | 63 ++++++++++++++++--- .../Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm | 27 +++++++- modules/t/baseAdaptor.t | 11 ++++ 3 files changed, 92 insertions(+), 9 deletions(-) diff --git a/modules/Bio/EnsEMBL/DBSQL/BaseAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/BaseAdaptor.pm index 9190dc45cc..ffa4533c33 100755 --- a/modules/Bio/EnsEMBL/DBSQL/BaseAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/BaseAdaptor.pm @@ -92,7 +92,7 @@ use vars qw(@ISA @EXPORT); use strict; use Bio::EnsEMBL::Utils::Exception qw(throw); -use Bio::EnsEMBL::Utils::Scalar qw(assert_ref); +use Bio::EnsEMBL::Utils::Scalar qw(assert_ref assert_integer); use DBI qw(:sql_types); use Data::Dumper; @@ -654,6 +654,34 @@ sub fetch_all_by_dbID_list { sub _uncached_fetch_all_by_dbID_list { my ( $self, $id_list_ref, $slice ) = @_; + return $self->_uncached_fetch_all_by_id_list($id_list_ref, $slice, "dbID"); +} ## end sub fetch_all_by_dbID_list + +=head2 _uncached_fetch_all_by_id_list + + Arg [1] : listref of IDs + Arg [2] : (optional) Bio::EnsEMBL::Slice $slice + A slice that features should be remapped to + Arg [3] : String describing the ID type. + Valid values include dbID and stable_id. dbID is an alias for + the primary key, while other names map directly to table columns + of the Feature this adaptor manages. + Example : $list_of_features = $adaptor->_uncached_fetch_all_by_id_list( + [qw(ENSG00000101321 ENSG00000101346 ENSG00000101367)], + undef, + "stable_id"); + Description: This is a generic method used to fetch lists of features by IDs. + It avoids caches, meaning it is best suited for block fetching. + See fetch_all_by_dbID_list() for more info. + Returntype : ListRef of Bio::EnsEMBL::Feature + Exceptions : Thrown if a list of IDs is not supplied. + Caller : BaseFeatureAdaptor, BaseAdaptor and derived classes. + +=cut + +sub _uncached_fetch_all_by_id_list { + my ( $self, $id_list_ref, $slice, $id_type ) = @_; + if ( !defined($id_list_ref) || ref($id_list_ref) ne 'ARRAY' ) { throw("id_list list reference argument is required"); } @@ -670,9 +698,22 @@ sub _uncached_fetch_all_by_dbID_list { # length of 16, this means 2048 dbIDs in each query. my $max_size = 2048; - + # prepare column name for query + my $field_name; + if ($id_type eq "dbID") { + $field_name = $name."_id"; + } else { + $field_name = $id_type; + } + + # build up unique id list, also validate on the way by my %id_list; - $id_list{$_}++ for @{$id_list_ref}; + for (@{$id_list_ref}) { + $id_list{$_}++; + if ($id_type ne "stable_id") { + assert_integer($_,"$field_name"); + } + } my @id_list = keys %id_list; my @out; @@ -687,20 +728,26 @@ sub _uncached_fetch_all_by_dbID_list { @ids = @id_list; @id_list = (); } - + if ( scalar(@ids) > 1 ) { - $id_str = " IN (" . join( ',', @ids ) . ")"; + # stable_ids are the only feature attribute which is expressed as a + # varchar. These need to be quoted or the SQL will bounce + if ($id_type eq "stable_id") { + $id_str = " IN (" . join( ',', map qq("$_"), @ids ) . ")"; + } else { + $id_str = " IN (" . join( ',', @ids). ")"; + } } else { $id_str = " = " . $ids[0]; } - - my $constraint = "${syn}.${name}_id $id_str"; + + my $constraint = "${syn}.${field_name} $id_str"; push @out, @{ $self->generic_fetch($constraint, undef, $slice) }; } return \@out; -} ## end sub fetch_all_by_dbID_list +} # might not be a good idea, but for convenience # shouldnt be called on the BIG tables though diff --git a/modules/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm index c4d271ffdd..1ecb522c89 100644 --- a/modules/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm @@ -504,7 +504,7 @@ sub fetch_all_by_Slice_constraint { =head2 fetch_all_by_logic_name - Arg [3] : string $logic_name + Arg [1] : string $logic_name the logic name of the type of features to obtain Example : $fs = $a->fetch_all_by_logic_name('foobar'); Description: Returns a listref of features created from the database. @@ -536,6 +536,31 @@ sub fetch_all_by_logic_name { return $self->generic_fetch($constraint); } +=head2 fetch_all_by_stable_id_list + + Arg [1] : string $logic_name + the logic name of the type of features to obtain + Arg [2] : Bio::EnsEMBL::Slice $slice + the slice from which to obtain features + Example : $fs = $a->fetch_all_by_stable_id_list(["ENSG00001","ENSG00002", ...]); + Description: Returns a listref of features identified by their stable IDs. + This method only fetches features of the same type as the calling + adaptor. + Results are constrained to a slice if the slice is provided. + Returntype : listref of Bio::EnsEMBL::Feature + Exceptions : thrown if no stable ID list is provided. + Caller : General + Status : Stable + +=cut + +# Adapted from BaseAdaptor->uncached_fetch_all_by_dbID_list +sub fetch_all_by_stable_id_list { + my ( $self, $id_list_ref, $slice ) = @_; + + return $self->_uncached_fetch_all_by_id_list($id_list_ref,$slice,"stable_id"); +} + # Method that creates an object. Called by the _objs_from_sth() method # in the sub-classes (the various feature adaptors). Overridden by the # feature collection classes. diff --git a/modules/t/baseAdaptor.t b/modules/t/baseAdaptor.t index 5d7041b9fc..f8229211ab 100644 --- a/modules/t/baseAdaptor.t +++ b/modules/t/baseAdaptor.t @@ -1,6 +1,7 @@ use strict; use warnings; use Test::More; +use Test::Exception; use Bio::EnsEMBL::Test::MultiTestDB; use DBI qw/:sql_types/; @@ -12,4 +13,14 @@ $gene_adaptor->bind_param_generic_fetch('protein_coding', SQL_VARCHAR); my $count = $gene_adaptor->generic_count('g.biotype =?'); is($count, 20, 'Checking generic_count for protein_coding genes returns expected amounts'); + +# fetch_all_by_dbID_list tests + +my $gene_list = $gene_adaptor->_uncached_fetch_all_by_id_list([qw(ENSG00000101321 ENSG00000101346 ENSG00000101367)],undef,"stable_id"); +ok(scalar(@$gene_list) == 3, "Basic uncached fetch by list"); + +dies_ok { + $gene_list = $gene_adaptor->_uncached_fetch_all_by_id_list([qw(ENSG00000101321 ENSG00000101346 ENSG00000101367)],undef,"dbID") +} "Wrong data type for ID"; + done_testing(); \ No newline at end of file -- GitLab