Commit 86375fd8 authored by Tiago Grego's avatar Tiago Grego
Browse files

SequenceOntologyMapper no longer required and functional cache

parent fa2ad761
......@@ -72,6 +72,9 @@ my %field_callbacks = (
so_term => 'so_term'
);
# caching of biotype to SO terms to improve speed
my $so_cache = {};
=head2 new
Returntype : Bio::EnsEMBL::IO::Translator::BulkFetcherFeature
......@@ -81,7 +84,7 @@ my %field_callbacks = (
sub new {
my ($class, %args) = @_;
my @required_args = qw/version xref_mapping_file biotype_mapper adaptor/;
my @required_args = qw/version xref_mapping_file adaptor/;
my @missing_args;
map { push @missing_args, $_ unless exists $args{$_} } @required_args;
confess "Missing arguments required by Bio::EnsEMBL::IO::Translator::BulkFetcherFeature" . join(',', @missing_args)
......@@ -90,9 +93,6 @@ sub new {
# this connects Ensembl to Identifiers.org amongst other things
my $xref_mapping = Bio::EnsEMBL::Utils::RDF::Mapper->new($args{xref_mapping_file});
croak "Bio::EnsEMBL::IO::Translator::Feature requires a sequence ontology mapper"
unless $args{biotype_mapper}->isa('Bio::EnsEMBL::Utils::SequenceOntologyMapper');
croak "Bio::EnsEMBL::IO::Translator::BulkFetcherFeature requires a DBAdaptor"
unless $args{adaptor} and $args{adaptor}->isa('Bio::EnsEMBL::DBSQL::DBAdaptor');
$args{meta_adaptor} = $args{adaptor}->get_MetaContainer();
......@@ -104,9 +104,10 @@ sub new {
croak "Unable to get a transcript adaptor"
unless $args{transcript_adaptor}->isa('Bio::EnsEMBL::DBSQL::TranscriptAdaptor');
$args{biotype_adaptor} = $args{adaptor}->get_BiotypeAdaptor();
delete $args{adaptor};
$args{ontology_cache} = {};
$args{mapping} = $xref_mapping;
my $self = $class->SUPER::new(\%args);
......@@ -133,14 +134,9 @@ sub production_name {
return $self->{production_name};
}
sub ontology_cache {
my $self = shift;
return $self->{ontology_cache};
}
sub ontology_adaptor {
sub biotype_adaptor {
my $self = shift;
return $self->{ontology_adaptor};
return $self->{biotype_adaptor};
}
sub meta_adaptor {
......@@ -494,58 +490,25 @@ sub protein_features {
=cut
sub so_term {
my $self = shift;
my $object = shift;
my $so_term;
my ($type, $biotype) = ($self->type($object), $self->biotype($object));
if (!defined $biotype) {
# warn "Could not find biotype for SO term mapping\n";
return;
}
eval {
if ($type eq 'gene') {
$so_term = $self->biotype_mapper->gene_biotype_to_name($biotype);
} elsif ($type eq 'transcript') {
$so_term = $self->biotype_mapper->transcript_biotype_to_name($biotype);
} else {
$so_term = $self->_ontology_id($biotype);
}
};
# TODO: better exception handling, e.g. look up ontology_cache?!
if ($@) {
if (! exists $self->{ontology_cache}->{$biotype}) {
warn sprintf "Failed to map biotype %s to SO term\n", $biotype;
$self->{ontology_cache}->{$biotype} = undef;
}
}
return $so_term;
}
# SO terms often required for dumping RDF
sub _ontology_id {
my ($self, $term) = @_;
my $ontology_cache = $self->ontology_cache;
return $self->{$ontology_cache->{$term}} if $term and exists $self->{$ontology_cache->{$term}};
my ($typeterm) = @{ $self->ontology_adaptor->fetch_all_by_name( $term, 'SO' ) };
unless ($typeterm) {
if($term) {
warn "Can't find SO term for biotype '$term'";
$self->{$ontology_cache->{$term}} = undef;
}
return;
}
my $id = $typeterm->accession;
$self->{$ontology_cache->{$term}} = $id;
return $id;
my ($self, $object) = @_;
my $type = $self->type($object);
my $biotype = $self->biotype($object);
# Only type gene and transcript supported
return unless ($type eq 'gene' || $type eq 'transcript');
# look if term is cached
my $so_acc = $so_cache->{$type}{$biotype};
# if so return it
return $so_acc if defined $so_acc;
# else retrieve the so term using the biotype adaptor, and cache it
$so_acc = $self->biotype_adaptor->fetch_by_name_object_type($biotype, $type)->so_acc;
$so_cache->{$type}{$biotype} = $so_acc;
return $so_acc;
}
1;
......@@ -24,14 +24,9 @@ use Test::Exception;
use JSON;
use Bio::EnsEMBL::Test::MultiTestDB;
use Bio::EnsEMBL::Utils::SequenceOntologyMapper;
use_ok 'Bio::EnsEMBL::IO::Translator::BulkFetcherFeature';
my $omulti = Bio::EnsEMBL::Test::MultiTestDB->new('ontology', "$Bin/..");
my $ontology_adaptor =
$omulti->get_DBAdaptor('ontology')->get_OntologyTermAdaptor();
my $multi = Bio::EnsEMBL::Test::MultiTestDB->new(undef, "$Bin/..");
my $adaptor = $multi->get_DBAdaptor('core');
my $meta_adaptor = $adaptor->get_MetaContainer();
......@@ -41,11 +36,11 @@ my ($version, $production_name) =
$meta_adaptor->list_value_by_key('species.production_name')->[0]
);
my $translator =
Bio::EnsEMBL::IO::Translator::BulkFetcherFeature->new(version => $version,
my $translator = Bio::EnsEMBL::IO::Translator::BulkFetcherFeature->new(
version => $version,
xref_mapping_file => "$Bin/xref_LOD_mapping.json",
biotype_mapper => Bio::EnsEMBL::Utils::SequenceOntologyMapper->new($omulti->get_DBAdaptor('ontology')->get_OntologyTermAdaptor()),
adaptor => $adaptor);
adaptor => $adaptor
);
ok($translator->version == $version, 'version');
ok($translator->production_name eq $production_name, 'production name');
......@@ -58,8 +53,7 @@ my $gene = from_json(slurp_file("$Bin/gene.json"));
#
# compare gene
#
my %gene_attrs =
(
my %gene_attrs = (
type => 'gene',
id => 'ENSG00000127720',
name => 'METTL25',
......@@ -74,13 +68,16 @@ my %gene_attrs =
taxon_id => 9606,
provenance => 'ANNOTATED',
so_term => 'SO:0001217'
);
);
foreach my $attr (keys %gene_attrs) {
is($translator->$attr($gene), $gene_attrs{$attr}, "gene $attr");
}
cmp_deeply($translator->synonyms($gene), [ 'C12orf26', 'FLJ22789' ], 'gene synonyms');
cmp_deeply($translator->xrefs($gene)->[5],
{ display_id => 'METTL25',
cmp_deeply(
$translator->xrefs($gene)->[5],
{display_id => 'METTL25',
primary_id => 84190,
info_type => 'DEPENDENT',
info_text => '',
......@@ -96,8 +93,7 @@ is($translator->uri($gene), "http://rdf.ebi.ac.uk/resource/ensembl/ENSG000001277
my $transcripts = $translator->transcripts($gene);
is(scalar @{$transcripts}, 11, 'number of transcripts');
my $transcript = $transcripts->[0];
my %transcript_attrs =
(
my %transcript_attrs = (
id => 'ENST00000248306',
type => 'transcript',
name => 'METTL25-201',
......@@ -112,10 +108,12 @@ my %transcript_attrs =
taxon_id => 9606,
provenance => 'INFERRED_FROM_TRANSCRIPT',
so_term => 'SO:0000234'
);
);
foreach my $attr (keys %transcript_attrs) {
is($translator->$attr($transcript), $transcript_attrs{$attr}, "transcript $attr");
}
cmp_deeply($translator->synonyms($transcript), [], 'transcript synonyms');
is(scalar @{$translator->xrefs($transcript)}, 19, 'number of transcript xrefs');
cmp_deeply($translator->xrefs($transcript)->[13],
......@@ -132,8 +130,7 @@ is($translator->uri($transcript), "http://rdf.ebi.ac.uk/resource/ensembl.transcr
# compare exon
is(scalar @{$translator->exons($transcript)}, 12, 'number of transcript exons');
my $exon = $translator->exons($transcript)->[3];
my %exon_attrs =
(
my %exon_attrs = (
end => 82476718,
seq_region_name => '12',
coord_system_name => 'chromosome',
......@@ -144,10 +141,12 @@ my %exon_attrs =
rank => 10,
start => 82476644,
so_term => undef
);
);
foreach my $attr (keys %exon_attrs) {
is($translator->$attr($exon), $exon_attrs{$attr}, "exon $attr");
}
is($translator->uri($exon), "http://rdf.ebi.ac.uk/resource/ensembl.exon/ENSE00003483236", 'exon URI');
# compare translation, its xrefs and protein features
......
64 protein_coding gene core,otherfeatures,rnaseq,vega,presite \N \N coding SO:0001217
65 protein_coding transcript core,otherfeatures,rnaseq,vega,presite \N \N coding SO:0000234
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment