Commit 4734d9c6 authored by Andy Yates's avatar Andy Yates
Browse files

[ENSCORESW-577]. Adding support for additional lookups.

After talking to Compara we found the need to support additional
lookups with the full id cache. Since all values are stored in memory
the lookup is very easy to compute.
parent 1e36da22
......@@ -33,6 +33,8 @@ An implementation of caching which uses a raw hash to hold all available
values from an adaptor. Useful for working with a controlled vocabulary
table where cardinality is low.
Provides extra functionality to compute additional lookup keys.
=head1 METHODS
=cut
......@@ -59,63 +61,228 @@ sub build_cache {
my ($self) = @_;
my $adaptor = $self->adaptor();
my %cache;
my $objs = $adaptor->generic_fetch();
foreach my $obj (@{$objs}) {
$cache{$obj->dbID()} = $obj;
my $objects = $adaptor->generic_fetch();
my $support_additional_lookups = $self->support_additional_lookups();
foreach my $object (@{$objects}) {
my $key = $object->dbID();
$cache{$key} = $object;
#Add to additional lookup
$self->add_to_additional_lookups($key, $object);
}
return \%cache;
}
=head2 clear_cache
########### Additional lookup code
Description: Delegates to C<delete_cache()> in order to clear all values
and on the next cache request will force a C<build_cache()>
call
Returntype : None
Exceptions : None
Caller : BaseAdaptors
Status : Beta
sub put {
my ($self, $key, $object) = @_;
my $old = $self->SUPER::put($key, $object);
#Add to additional lookup
$self->add_to_additional_lookups($key, $object);
return $old if $old;
return;
}
=cut
sub remove {
my ($self, $key) = @_;
my $old = $self->SUPER::remove($key);
if($old) {
#Remove it from the additional lookup
$self->remove_from_additional_lookup($key, $old);
return $old;
}
return;
}
sub clear_cache {
my ($self) = @_;
$self->delete_cache();
#Remove the additional lookup hash contents
delete $self->{_additional_lookup};
return;
}
=head2 put
=head2 get_by_additional_lookup
Description: Unsupported operation since this cache is read only apart from
during the build process
Returntype : None
Exceptions : Thrown if ever called
Caller : BaseAdaptors
Status : Beta
Arg [1] : String key of the lookup to search for the value in
Arg [2] : String value to search for. We expect exact lookups in the hash
Description : Returns the object linked to the value in the specified lookup.
Example : my $analysis = $cache->get_by_additional_lookup('logic_name', 'xrefchecksum');
Returntype : Object a single object
Exceptions : Throws an exception if there are more than one ID linked to the
value lookup. Also thrown if additional lookups are not supported
Caller : BaseAdaptors
Status : Beta
=cut
sub put {
my ($self) = @_;
throw 'Unsupported operation';
sub get_by_additional_lookup {
my ($self, $key, $value) = @_;
my $additional_lookup = $self->_additional_lookup();
if(exists $additional_lookup->{$key}) {
if(exists $additional_lookup->{$key}->{$value}) {
my $ids = $additional_lookup->{$key}->{$value};
my $size = scalar(@{$ids});
if($size > 1) {
throw "The lookup $key and search value $value has more than one value attached. Use get_all_by_additional_lookup() instead to fetch";
}
elsif($size == 1) {
return $self->get($ids->[0]);
}
}
}
return;
}
=head2 remove
=head2 get_all_by_additional_lookup
Description: Unsupported operation since this cache is read only apart from
during the build process
Returntype : None
Exceptions : Thrown if ever called
Caller : BaseAdaptors
Status : Beta
Arg [1] : String key of the lookup to search for the value in
Arg [2] : String value to search for. We expect exact lookups in the hash
Description : Returns an array of all the objects linked to the value
in the specified lookup.
Example : my $array = $cache->get_all_by_additional_lookup('logic_name', 'xrefchecksum');
Returntype : ArrayRef of objects keyed agains the second argument
Exceptions : Throws an exception if there are more than one ID linked to the
value lookup. Also thrown if additional lookups are not supported
Caller : BaseAdaptors
Status : Beta
=cut
sub remove {
my ($self) = @_;
throw 'Unsupported operation';
sub get_all_by_additional_lookup {
my ($self, $key, $value) = @_;
my $additional_lookup = $self->_additional_lookup();
if(exists $additional_lookup->{$key}) {
if(exists $additional_lookup->{$key}->{$value}) {
my $ids = $additional_lookup->{$key}->{$value};
return $self->get_by_list($ids);
}
}
return [];
}
=head2 remove_from_additional_lookup
Arg [1] : String The lookup key to remove from the additional lookup hash
Arg [2] : Object The object to remove from the additional lookup hash
Description : Re-computes the additional keys for this object
Example : $cache->remove_Object_from_additional_lookup($lookup_key, $object);
Returntype : None
Exceptions : Thrown if we do not support additional lookups
Caller : BaseAdaptors
Status : Beta
=cut
sub remove_from_additional_lookup {
my ($self, $lookup_key, $object) = @_;
my $additional_lookup = $self->_additional_lookup();
# Compute the keys
my $keys = $self->compute_keys($object);
foreach my $key (keys %{$keys}) {
my $value = $keys->{$key};
#Only remove if we had originally stored this as an
#additional lookup
if(exists $additional_lookup->{$key}) {
if(exists $additional_lookup->{$key}->{$value}) {
#Get the object ID & lookup the array of DBIDs
my $lookup_keys = $additional_lookup->{$key}->{$value};
my $length = scalar(@{$lookup_keys});
for(my $i = 0; $i < $length; $i++) {
if($lookup_keys->[$i] == $lookup_key) {
#remove the 1 lookup key from the array and then terminate the
#loop as we found our value
splice(@{$lookup_keys}, $i, 1);
last;
}
}
#If the size has hit 0 then delete the array
if(scalar(@{$lookup_keys}) == 0) {
delete $additional_lookup->{$key}->{$value};
}
}
}
}
return;
}
1;
=head2 compute_keys
Arg [1] : Object The object to compute keys from
Description : Override to provide support for additional key lookup. The
keys of the hash should represent the lookup name and the
value is the computed key.
Example : Example of returning hash not of its usage. Proposed Analysis encoding
{ logic_name => 'xrefalignment', display_label => 'Xref Alignment'}
Returntype : HashRef key is the lookup name and value is the computed key
Exceptions : none
Caller : BaseAdaptors
Status : Beta
=cut
sub compute_keys {
my ($self, $object) = @_;
return {};
}
=head2 add_to_additional_lookups
Arg [1] : String The key used in the primary lookup hash. Normally
a DB identifier
Arg [2] : Object The object to add to the additional lookups
Description : Internally calls the C<compute_keys()> method and adds
the object to the C<_additional_lookup()> hash.
Returntype : None
Exceptions : Thrown if additional lookups are not supported
Caller : BaseAdaptors
Status : Beta
=cut
sub add_to_additional_lookups {
my ($self, $lookup_key, $object) = @_;
my $keys = $self->compute_keys($object);
my $additional_lookup = $self->_additional_lookup();
foreach my $key (keys %{$keys}) {
my $value = $keys->{$key};
push(@{$additional_lookup->{$key}->{$value}}, $lookup_key);
}
return;
}
=head2 _additional_lookup
Description : Returns the additional lookup hash
Example : Example of additional hash structure (key is
lookup name, second key is value to search for
and value is an array of dbIDs)
{
logic_name => {
xrefalignment => [1]
},
display_label => {
'Xref Alignment' => [1]
}
}
Returntype : HashRef
Exceptions : none
Caller : BaseAdaptors
Status : Beta
=cut
sub _additional_lookup {
my ($self) = @_;
$self->{_additional_lookup} ||= {};
return $self->{_additional_lookup};
}
1;
\ No newline at end of file
package TemporaryGeneCache;
use base qw/Bio::EnsEMBL::DBSQL::Support::FullIdCache/;
use strict;
use warnings;
sub support_additional_lookups {
return 1;
}
sub compute_keys {
my ($self, $object) = @_;
return {
biotype => $object->biotype(),
logic_name => $object->analysis()->logic_name(),
dbID => $object->dbID()
};
}
1;
#######################
package main;
use strict;
......@@ -21,7 +44,7 @@ sub BEGIN {
no strict 'refs'; ## no critic
*Bio::EnsEMBL::DBSQL::GeneAdaptor::_build_id_cache = sub {
my ($self) = @_;
return Bio::EnsEMBL::DBSQL::Support::FullIdCache->new($self);
return TemporaryGeneCache->new($self);
};
no warnings 'redefine';
my $original_store = \&Bio::EnsEMBL::DBSQL::GeneAdaptor::store;
......@@ -64,6 +87,39 @@ sub BEGIN {
is_deeply($first_gene_refetch, $refetched_cached_ids->[0], 'fetch_by_dbID() should return the same object as fetch_all_by_dbID_list()');
ok(! defined $adaptor->fetch_by_dbID(1), 'Fetching with a bad ID returns nothing');
is(scalar(@{$adaptor->fetch_all_by_dbID_list([1])}), 0, 'Fetching with a bad ID returns an empty array');
############ Test additional lookup code
{
my $protein_coding_genes = $cache->get_all_by_additional_lookup('biotype', 'protein_coding');
is_deeply([sort map { $_->dbID() } @{$protein_coding_genes} ], [sort map { $_->dbID() } @{$genes} ], 'Protein coding lookup returns all genes');
my $wibble_genes = $cache->get_all_by_additional_lookup('biotype', 'wibble');
is_deeply([], $wibble_genes, 'biotype is a valid lookup but wibble is a bad key. Empty array return');
my $bad_lookup = $cache->get_all_by_additional_lookup('wibble', 'wibble');
is_deeply([], $wibble_genes, 'wibble is a bad lookup. Empty array return');
my $individual_gene = $cache->get_by_additional_lookup('dbID', $gene_ids->[0]);
is($individual_gene->dbID, $gene_ids->[0], 'Lookup of dbID returns a single value');
is($cache->get($gene_ids->[0]), $individual_gene, 'Same object is returned from the main get() method and the lookup');
dies_ok { $cache->get_by_additional_lookup('biotype', 'protein_coding') } 'Expect to die as the query will return more than one value';
$cache->remove($individual_gene->dbID());
my $new_protein_coding_genes = $cache->get_all_by_additional_lookup('biotype', 'protein_coding');
ok(! defined $cache->get_by_additional_lookup('dbID', $gene_ids->[0]), 'Removed gene so lookups can no longer return an object');
ok(! exists $cache->_additional_lookup()->{dbID}->{$gene_ids->[0]}, 'Removed the resulting array from the lookup hash');
ok(exists $cache->_additional_lookup()->{biotype}->{protein_coding}, 'Biotype lookup still exists for protein_coding');
is(scalar(@{$new_protein_coding_genes}), (scalar(@{$protein_coding_genes}) -1), 'Reduced the returned number of protein coding genes by one');
$cache->put($individual_gene->dbID(), $individual_gene);
ok(defined $cache->get_by_additional_lookup('dbID', $gene_ids->[0]), 'Added the gene back in. Everything is OK again');
#Checking DBSQL based lookup works
my $sql_protein_coding_genes = $cache->get_by_sql('select gene_id from gene where biotype =?', ['protein_coding']);
is_deeply([sort map { $_->dbID() } @{$sql_protein_coding_genes} ], [sort map { $_->dbID() } @{$protein_coding_genes} ], 'SQL based protein_coding lookup returns all genes');
}
#Turn off caching; we should get a fresh object out of the cache
my $cached_obj = $adaptor->fetch_by_dbID($gene_ids->[0]);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment