Increase maximum number of dbIDs to put into the IN-list from 200 to

2048 in fetch_all_by_dbID_list(): # Ensure that we do not exceed MySQL's max_allowed_packet (defaults to # 1 MB) splitting large queries into smaller queries of at most 256 KB # (32768 8-bit characters). Assuming a (generous) average dbID string # length of 16, this means 2048 dbIDs in each query. The observed decrease in time for unsorted dbID lists of 10000 IDs ranged between 5% to just under 10% depending on feature type, lower (around 3%) for sorted ID lists. Some reformatting.

Increase maximum number of dbIDs to put into the IN-list from 200 to
2048 in fetch_all_by_dbID_list(): # Ensure that we do not exceed MySQL's max_allowed_packet (defaults to # 1 MB) splitting large queries into smaller queries of at most 256 KB # (32768 8-bit characters). Assuming a (generous) average dbID string # length of 16, this means 2048 dbIDs in each query. The observed decrease in time for unsorted dbID lists of 10000 IDs ranged between 5% to just under 10% depending on feature type, lower (around 3%) for sorted ID lists. Some reformatting.
efa68a7b · Andreas Kusalananda Kähäri · 88d41113 · efa68a7b
Commit efa68a7b authored 16 years ago by Andreas Kusalananda Kähäri
--- a/modules/Bio/EnsEMBL/DBSQL/BaseAdaptor.pm
+++ b/modules/Bio/EnsEMBL/DBSQL/BaseAdaptor.pm
@@ -215,25 +215,23 @@ sub dbc{
 # if primary key field is not supplied, tablename_id is assumed
 # returns listref of IDs
 sub _list_dbIDs {
+  my ( $self, $table, $pk, $ordered ) = @_;
-  my ($self, $table, $pk, $ordered) = @_;
+  if ( !defined($pk) ) { $pk = $table . "_id" }
-  if (!defined($pk)) {
-    $pk = $table . "_id";
-  }
-  my @out;
  my $sql = "SELECT " . $pk . "  FROM " . $table;
-  if(defined($ordered) and $ordered){
-    $sql .= " order by seq_region_id, seq_region_start"
-  }	
-  my $sth = $self->prepare($sql);
-  $sth->execute;
-  while (my ($id) = $sth->fetchrow) {
+  if ( defined($ordered) && $ordered ) {
-    push(@out, $id);
+    $sql .= " order by seq_region_id, seq_region_start";
  }
-  $sth->finish;
+  my $sth = $self->prepare($sql);
+  $sth->execute();
+  my @out;
+  while ( my ($id) = $sth->fetchrow() ) { push( @out, $id ) }
+  $sth->finish();
  return \@out;
 }
@@ -395,17 +393,20 @@ sub fetch_by_dbID{
 =head2 fetch_all_by_dbID_list
-  Arg [1]    : listref of ints $id_list
+  Arg [1]    : listref of integers $id_list
-               The unique database identifiers for the features to be obtained
+               The unique database identifiers for the features to
+               be obtained.
  Example    : @feats = @{$adaptor->fetch_by_dbID_list([1234, 2131, 982]))};
-  Description: Returns the features created from the database defined by the
+  Description: Returns the features created from the database
-               the ids in contained in the id list $id_list.  The features 
+               defined by the the IDs in contained in the provided
-               will be returned in their native coordinate system. That is, 
+               ID list $id_list.  The features will be returned
-               the coordinate system in which they are stored in the database.
+               in their native coordinate system.  That is, the
-               In order to convert the features to a particular coordinate 
+               coordinate system in which they are stored in the
-               system use the transfer() or transform() method.  If none of the
+               database.  In order to convert the features to a
-               features are found in the database a reference to an empty 
+               particular coordinate system use the transfer() or
-               list is returned.
+               transform() method.  If none of the features are
+               found in the database a reference to an empty list is
+               returned.
  Returntype : listref of Bio::EnsEMBL::Features
  Exceptions : thrown if $id arg is not provided
               does not exist
@@ -415,46 +416,52 @@ sub fetch_by_dbID{
 =cut
 sub fetch_all_by_dbID_list {
-  my ($self,$id_list_ref) = @_;
+  my ( $self, $id_list_ref ) = @_;
-  if(!defined($id_list_ref) || ref($id_list_ref) ne 'ARRAY') {
+  if ( !defined($id_list_ref) || ref($id_list_ref) ne 'ARRAY' ) {
    throw("id_list list reference argument is required");
  }
-  return [] if(!@$id_list_ref);
+  if ( !@{$id_list_ref} ) { return [] }
-  my @out;
+  # Construct a constraint like 't1.table1_id = 123'
-  #construct a constraint like 't1.table1_id = 123'
+  my @tabs = $self->_tables();
-  my @tabs = $self->_tables;
+  my ( $name, $syn ) = @{ $tabs[0] };
-  my ($name, $syn) = @{$tabs[0]};
+  # Ensure that we do not exceed MySQL's max_allowed_packet (defaults to
+  # 1 MB) splitting large queries into smaller queries of at most 256 KB
+  # (32768 8-bit characters).  Assuming a (generous) average dbID string
+  # length of 16, this means 2048 dbIDs in each query.
+  my $max_size = 2048;
+  my @id_list = @{$id_list_ref};
-  # mysql is faster and we ensure that we do not exceed the max query size by
+  my @out;
-  # splitting large queries into smaller queries of 200 ids
-  my $max_size = 200;
-  my @id_list = @$id_list_ref;
-  while(@id_list) {
+  while (@id_list) {
    my @ids;
-    if(@id_list > $max_size) {
+    my $id_str;
-      @ids = splice(@id_list, 0, $max_size);
+    if ( scalar(@id_list) > $max_size ) {
+      @ids = splice( @id_list, 0, $max_size );
    } else {
-      @ids = splice(@id_list, 0);
+      @ids     = @id_list;
+      @id_list = ();
    }
-    my $id_str;
+    if ( scalar(@ids) > 1 ) {
-    if(@ids > 1)  {
+      $id_str = " IN (" . join( ',', @ids ) . ")";
-      $id_str = " IN (" . join(',', @ids). ")";
    } else {
      $id_str = " = " . $ids[0];
    }
    my $constraint = "${syn}.${name}_id $id_str";
-    push @out, @{$self->generic_fetch($constraint)};
+    push @out, @{ $self->generic_fetch($constraint) };
  }
  return \@out;
-}
+} ## end sub fetch_all_by_dbID_list
 # might not be a good idea, but for convenience
 # shouldnt be called on the BIG tables though