Skip to content
Snippets Groups Projects
Commit efa68a7b authored by Andreas Kusalananda Kähäri's avatar Andreas Kusalananda Kähäri
Browse files

Increase maximum number of dbIDs to put into the IN-list from 200 to

2048 in fetch_all_by_dbID_list():

  # Ensure that we do not exceed MySQL's max_allowed_packet (defaults to
  # 1 MB) splitting large queries into smaller queries of at most 256 KB
  # (32768 8-bit characters).  Assuming a (generous) average dbID string
  # length of 16, this means 2048 dbIDs in each query.

The observed decrease in time for unsorted dbID lists of 10000 IDs
ranged between 5% to just under 10% depending on feature type, lower
(around 3%) for sorted ID lists.

Some reformatting.
parent 88d41113
No related branches found
No related tags found
No related merge requests found
...@@ -215,25 +215,23 @@ sub dbc{ ...@@ -215,25 +215,23 @@ sub dbc{
# if primary key field is not supplied, tablename_id is assumed # if primary key field is not supplied, tablename_id is assumed
# returns listref of IDs # returns listref of IDs
sub _list_dbIDs { sub _list_dbIDs {
my ( $self, $table, $pk, $ordered ) = @_;
my ($self, $table, $pk, $ordered) = @_; if ( !defined($pk) ) { $pk = $table . "_id" }
if (!defined($pk)) {
$pk = $table . "_id";
}
my @out;
my $sql = "SELECT " . $pk . " FROM " . $table; my $sql = "SELECT " . $pk . " FROM " . $table;
if(defined($ordered) and $ordered){
$sql .= " order by seq_region_id, seq_region_start"
}
my $sth = $self->prepare($sql);
$sth->execute;
while (my ($id) = $sth->fetchrow) { if ( defined($ordered) && $ordered ) {
push(@out, $id); $sql .= " order by seq_region_id, seq_region_start";
} }
$sth->finish; my $sth = $self->prepare($sql);
$sth->execute();
my @out;
while ( my ($id) = $sth->fetchrow() ) { push( @out, $id ) }
$sth->finish();
return \@out; return \@out;
} }
...@@ -395,17 +393,20 @@ sub fetch_by_dbID{ ...@@ -395,17 +393,20 @@ sub fetch_by_dbID{
=head2 fetch_all_by_dbID_list =head2 fetch_all_by_dbID_list
Arg [1] : listref of ints $id_list Arg [1] : listref of integers $id_list
The unique database identifiers for the features to be obtained The unique database identifiers for the features to
be obtained.
Example : @feats = @{$adaptor->fetch_by_dbID_list([1234, 2131, 982]))}; Example : @feats = @{$adaptor->fetch_by_dbID_list([1234, 2131, 982]))};
Description: Returns the features created from the database defined by the Description: Returns the features created from the database
the ids in contained in the id list $id_list. The features defined by the the IDs in contained in the provided
will be returned in their native coordinate system. That is, ID list $id_list. The features will be returned
the coordinate system in which they are stored in the database. in their native coordinate system. That is, the
In order to convert the features to a particular coordinate coordinate system in which they are stored in the
system use the transfer() or transform() method. If none of the database. In order to convert the features to a
features are found in the database a reference to an empty particular coordinate system use the transfer() or
list is returned. transform() method. If none of the features are
found in the database a reference to an empty list is
returned.
Returntype : listref of Bio::EnsEMBL::Features Returntype : listref of Bio::EnsEMBL::Features
Exceptions : thrown if $id arg is not provided Exceptions : thrown if $id arg is not provided
does not exist does not exist
...@@ -415,46 +416,52 @@ sub fetch_by_dbID{ ...@@ -415,46 +416,52 @@ sub fetch_by_dbID{
=cut =cut
sub fetch_all_by_dbID_list { sub fetch_all_by_dbID_list {
my ($self,$id_list_ref) = @_; my ( $self, $id_list_ref ) = @_;
if(!defined($id_list_ref) || ref($id_list_ref) ne 'ARRAY') { if ( !defined($id_list_ref) || ref($id_list_ref) ne 'ARRAY' ) {
throw("id_list list reference argument is required"); throw("id_list list reference argument is required");
} }
return [] if(!@$id_list_ref); if ( !@{$id_list_ref} ) { return [] }
my @out; # Construct a constraint like 't1.table1_id = 123'
#construct a constraint like 't1.table1_id = 123' my @tabs = $self->_tables();
my @tabs = $self->_tables; my ( $name, $syn ) = @{ $tabs[0] };
my ($name, $syn) = @{$tabs[0]};
# Ensure that we do not exceed MySQL's max_allowed_packet (defaults to
# 1 MB) splitting large queries into smaller queries of at most 256 KB
# (32768 8-bit characters). Assuming a (generous) average dbID string
# length of 16, this means 2048 dbIDs in each query.
my $max_size = 2048;
my @id_list = @{$id_list_ref};
# mysql is faster and we ensure that we do not exceed the max query size by my @out;
# splitting large queries into smaller queries of 200 ids
my $max_size = 200;
my @id_list = @$id_list_ref;
while(@id_list) { while (@id_list) {
my @ids; my @ids;
if(@id_list > $max_size) { my $id_str;
@ids = splice(@id_list, 0, $max_size);
if ( scalar(@id_list) > $max_size ) {
@ids = splice( @id_list, 0, $max_size );
} else { } else {
@ids = splice(@id_list, 0); @ids = @id_list;
@id_list = ();
} }
my $id_str; if ( scalar(@ids) > 1 ) {
if(@ids > 1) { $id_str = " IN (" . join( ',', @ids ) . ")";
$id_str = " IN (" . join(',', @ids). ")";
} else { } else {
$id_str = " = " . $ids[0]; $id_str = " = " . $ids[0];
} }
my $constraint = "${syn}.${name}_id $id_str"; my $constraint = "${syn}.${name}_id $id_str";
push @out, @{$self->generic_fetch($constraint)}; push @out, @{ $self->generic_fetch($constraint) };
} }
return \@out; return \@out;
} } ## end sub fetch_all_by_dbID_list
# might not be a good idea, but for convenience # might not be a good idea, but for convenience
# shouldnt be called on the BIG tables though # shouldnt be called on the BIG tables though
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment