Commit 86375fd8 authored by Tiago Grego's avatar Tiago Grego
Browse files

SequenceOntologyMapper no longer required and functional cache

parent fa2ad761
...@@ -41,37 +41,40 @@ use Bio::EnsEMBL::Utils::RDF qw/feature_uri/; ...@@ -41,37 +41,40 @@ use Bio::EnsEMBL::Utils::RDF qw/feature_uri/;
use Bio::EnsEMBL::Utils::RDF::Mapper; use Bio::EnsEMBL::Utils::RDF::Mapper;
my %field_callbacks = ( my %field_callbacks = (
version => 'version', version => 'version',
production_name => 'production_name', production_name => 'production_name',
id_org_short => 'id_org_short', id_org_short => 'id_org_short',
lod_uri => 'lod_uri', lod_uri => 'lod_uri',
type => 'type', type => 'type',
id => 'id', id => 'id',
name => 'name', name => 'name',
description => 'description', description => 'description',
seq_region_name => 'seq_region_name', seq_region_name => 'seq_region_name',
cs_name => 'coord_system_name', cs_name => 'coord_system_name',
cs_version => 'coord_system_version', cs_version => 'coord_system_version',
start => 'start', start => 'start',
end => 'end', end => 'end',
strand => 'strand', strand => 'strand',
biotype => 'biotype', biotype => 'biotype',
rank => 'rank', rank => 'rank',
taxon_id => 'taxon_id', taxon_id => 'taxon_id',
uri => 'uri', uri => 'uri',
synonyms => 'synonyms', synonyms => 'synonyms',
provenance => 'provenance', provenance => 'provenance',
homologues => 'homologues', homologues => 'homologues',
xrefs => 'xrefs', xrefs => 'xrefs',
dbname => 'dbname', dbname => 'dbname',
homologues => 'homologues', homologues => 'homologues',
transcripts => 'transcripts', transcripts => 'transcripts',
exons => 'exons', exons => 'exons',
translations => 'translations', translations => 'translations',
protein_features => 'protein_features', protein_features => 'protein_features',
so_term => 'so_term' so_term => 'so_term'
); );
# caching of biotype to SO terms to improve speed
my $so_cache = {};
=head2 new =head2 new
Returntype : Bio::EnsEMBL::IO::Translator::BulkFetcherFeature Returntype : Bio::EnsEMBL::IO::Translator::BulkFetcherFeature
...@@ -81,7 +84,7 @@ my %field_callbacks = ( ...@@ -81,7 +84,7 @@ my %field_callbacks = (
sub new { sub new {
my ($class, %args) = @_; my ($class, %args) = @_;
my @required_args = qw/version xref_mapping_file biotype_mapper adaptor/; my @required_args = qw/version xref_mapping_file adaptor/;
my @missing_args; my @missing_args;
map { push @missing_args, $_ unless exists $args{$_} } @required_args; map { push @missing_args, $_ unless exists $args{$_} } @required_args;
confess "Missing arguments required by Bio::EnsEMBL::IO::Translator::BulkFetcherFeature" . join(',', @missing_args) confess "Missing arguments required by Bio::EnsEMBL::IO::Translator::BulkFetcherFeature" . join(',', @missing_args)
...@@ -89,9 +92,6 @@ sub new { ...@@ -89,9 +92,6 @@ sub new {
# this connects Ensembl to Identifiers.org amongst other things # this connects Ensembl to Identifiers.org amongst other things
my $xref_mapping = Bio::EnsEMBL::Utils::RDF::Mapper->new($args{xref_mapping_file}); my $xref_mapping = Bio::EnsEMBL::Utils::RDF::Mapper->new($args{xref_mapping_file});
croak "Bio::EnsEMBL::IO::Translator::Feature requires a sequence ontology mapper"
unless $args{biotype_mapper}->isa('Bio::EnsEMBL::Utils::SequenceOntologyMapper');
croak "Bio::EnsEMBL::IO::Translator::BulkFetcherFeature requires a DBAdaptor" croak "Bio::EnsEMBL::IO::Translator::BulkFetcherFeature requires a DBAdaptor"
unless $args{adaptor} and $args{adaptor}->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'); unless $args{adaptor} and $args{adaptor}->isa('Bio::EnsEMBL::DBSQL::DBAdaptor');
...@@ -103,10 +103,11 @@ sub new { ...@@ -103,10 +103,11 @@ sub new {
$args{transcript_adaptor} = $args{adaptor}->get_TranscriptAdaptor(); $args{transcript_adaptor} = $args{adaptor}->get_TranscriptAdaptor();
croak "Unable to get a transcript adaptor" croak "Unable to get a transcript adaptor"
unless $args{transcript_adaptor}->isa('Bio::EnsEMBL::DBSQL::TranscriptAdaptor'); unless $args{transcript_adaptor}->isa('Bio::EnsEMBL::DBSQL::TranscriptAdaptor');
$args{biotype_adaptor} = $args{adaptor}->get_BiotypeAdaptor();
delete $args{adaptor}; delete $args{adaptor};
$args{ontology_cache} = {};
$args{mapping} = $xref_mapping; $args{mapping} = $xref_mapping;
my $self = $class->SUPER::new(\%args); my $self = $class->SUPER::new(\%args);
...@@ -133,14 +134,9 @@ sub production_name { ...@@ -133,14 +134,9 @@ sub production_name {
return $self->{production_name}; return $self->{production_name};
} }
sub ontology_cache { sub biotype_adaptor {
my $self = shift; my $self = shift;
return $self->{ontology_cache}; return $self->{biotype_adaptor};
}
sub ontology_adaptor {
my $self = shift;
return $self->{ontology_adaptor};
} }
sub meta_adaptor { sub meta_adaptor {
...@@ -494,58 +490,25 @@ sub protein_features { ...@@ -494,58 +490,25 @@ sub protein_features {
=cut =cut
sub so_term { sub so_term {
my $self = shift; my ($self, $object) = @_;
my $object = shift;
my $type = $self->type($object);
my $so_term; my $biotype = $self->biotype($object);
my ($type, $biotype) = ($self->type($object), $self->biotype($object));
# Only type gene and transcript supported
if (!defined $biotype) { return unless ($type eq 'gene' || $type eq 'transcript');
# warn "Could not find biotype for SO term mapping\n";
return; # look if term is cached
} my $so_acc = $so_cache->{$type}{$biotype};
eval { # if so return it
if ($type eq 'gene') { return $so_acc if defined $so_acc;
$so_term = $self->biotype_mapper->gene_biotype_to_name($biotype);
} elsif ($type eq 'transcript') { # else retrieve the so term using the biotype adaptor, and cache it
$so_term = $self->biotype_mapper->transcript_biotype_to_name($biotype); $so_acc = $self->biotype_adaptor->fetch_by_name_object_type($biotype, $type)->so_acc;
} else { $so_cache->{$type}{$biotype} = $so_acc;
$so_term = $self->_ontology_id($biotype);
} return $so_acc;
};
# TODO: better exception handling, e.g. look up ontology_cache?!
if ($@) {
if (! exists $self->{ontology_cache}->{$biotype}) {
warn sprintf "Failed to map biotype %s to SO term\n", $biotype;
$self->{ontology_cache}->{$biotype} = undef;
}
}
return $so_term;
}
# SO terms often required for dumping RDF
sub _ontology_id {
my ($self, $term) = @_;
my $ontology_cache = $self->ontology_cache;
return $self->{$ontology_cache->{$term}} if $term and exists $self->{$ontology_cache->{$term}};
my ($typeterm) = @{ $self->ontology_adaptor->fetch_all_by_name( $term, 'SO' ) };
unless ($typeterm) {
if($term) {
warn "Can't find SO term for biotype '$term'";
$self->{$ontology_cache->{$term}} = undef;
}
return;
}
my $id = $typeterm->accession;
$self->{$ontology_cache->{$term}} = $id;
return $id;
} }
1; 1;
...@@ -24,14 +24,9 @@ use Test::Exception; ...@@ -24,14 +24,9 @@ use Test::Exception;
use JSON; use JSON;
use Bio::EnsEMBL::Test::MultiTestDB; use Bio::EnsEMBL::Test::MultiTestDB;
use Bio::EnsEMBL::Utils::SequenceOntologyMapper;
use_ok 'Bio::EnsEMBL::IO::Translator::BulkFetcherFeature'; use_ok 'Bio::EnsEMBL::IO::Translator::BulkFetcherFeature';
my $omulti = Bio::EnsEMBL::Test::MultiTestDB->new('ontology', "$Bin/..");
my $ontology_adaptor =
$omulti->get_DBAdaptor('ontology')->get_OntologyTermAdaptor();
my $multi = Bio::EnsEMBL::Test::MultiTestDB->new(undef, "$Bin/.."); my $multi = Bio::EnsEMBL::Test::MultiTestDB->new(undef, "$Bin/..");
my $adaptor = $multi->get_DBAdaptor('core'); my $adaptor = $multi->get_DBAdaptor('core');
my $meta_adaptor = $adaptor->get_MetaContainer(); my $meta_adaptor = $adaptor->get_MetaContainer();
...@@ -41,11 +36,11 @@ my ($version, $production_name) = ...@@ -41,11 +36,11 @@ my ($version, $production_name) =
$meta_adaptor->list_value_by_key('species.production_name')->[0] $meta_adaptor->list_value_by_key('species.production_name')->[0]
); );
my $translator = my $translator = Bio::EnsEMBL::IO::Translator::BulkFetcherFeature->new(
Bio::EnsEMBL::IO::Translator::BulkFetcherFeature->new(version => $version, version => $version,
xref_mapping_file => "$Bin/xref_LOD_mapping.json", xref_mapping_file => "$Bin/xref_LOD_mapping.json",
biotype_mapper => Bio::EnsEMBL::Utils::SequenceOntologyMapper->new($omulti->get_DBAdaptor('ontology')->get_OntologyTermAdaptor()), adaptor => $adaptor
adaptor => $adaptor); );
ok($translator->version == $version, 'version'); ok($translator->version == $version, 'version');
ok($translator->production_name eq $production_name, 'production name'); ok($translator->production_name eq $production_name, 'production name');
...@@ -58,8 +53,7 @@ my $gene = from_json(slurp_file("$Bin/gene.json")); ...@@ -58,8 +53,7 @@ my $gene = from_json(slurp_file("$Bin/gene.json"));
# #
# compare gene # compare gene
# #
my %gene_attrs = my %gene_attrs = (
(
type => 'gene', type => 'gene',
id => 'ENSG00000127720', id => 'ENSG00000127720',
name => 'METTL25', name => 'METTL25',
...@@ -74,30 +68,32 @@ my %gene_attrs = ...@@ -74,30 +68,32 @@ my %gene_attrs =
taxon_id => 9606, taxon_id => 9606,
provenance => 'ANNOTATED', provenance => 'ANNOTATED',
so_term => 'SO:0001217' so_term => 'SO:0001217'
); );
foreach my $attr (keys %gene_attrs) { foreach my $attr (keys %gene_attrs) {
is($translator->$attr($gene), $gene_attrs{$attr}, "gene $attr"); is($translator->$attr($gene), $gene_attrs{$attr}, "gene $attr");
} }
cmp_deeply($translator->synonyms($gene), [ 'C12orf26', 'FLJ22789' ], 'gene synonyms'); cmp_deeply($translator->synonyms($gene), [ 'C12orf26', 'FLJ22789' ], 'gene synonyms');
cmp_deeply($translator->xrefs($gene)->[5], cmp_deeply(
{ display_id => 'METTL25', $translator->xrefs($gene)->[5],
primary_id => 84190, {display_id => 'METTL25',
info_type => 'DEPENDENT', primary_id => 84190,
info_text => '', info_type => 'DEPENDENT',
description => 'methyltransferase like 25', info_text => '',
dbname => 'WikiGene' }, 'gene xref'); description => 'methyltransferase like 25',
dbname => 'WikiGene' }, 'gene xref');
cmp_deeply($translator->homologues($gene)->[1], cmp_deeply($translator->homologues($gene)->[1],
{ stable_id => 'ENSGGOG00000023239', { stable_id => 'ENSGGOG00000023239',
genome => 'gorilla_gorilla', genome => 'gorilla_gorilla',
description => 'ortholog_one2one' }, 'homolog'); description => 'ortholog_one2one' }, 'homolog');
is($translator->uri($gene), "http://rdf.ebi.ac.uk/resource/ensembl/ENSG00000127720", 'gene URI'); is($translator->uri($gene), "http://rdf.ebi.ac.uk/resource/ensembl/ENSG00000127720", 'gene URI');
# compare transcript # compare transcript
my $transcripts = $translator->transcripts($gene); my $transcripts = $translator->transcripts($gene);
is(scalar @{$transcripts}, 11, 'number of transcripts'); is(scalar @{$transcripts}, 11, 'number of transcripts');
my $transcript = $transcripts->[0]; my $transcript = $transcripts->[0];
my %transcript_attrs = my %transcript_attrs = (
(
id => 'ENST00000248306', id => 'ENST00000248306',
type => 'transcript', type => 'transcript',
name => 'METTL25-201', name => 'METTL25-201',
...@@ -112,28 +108,29 @@ my %transcript_attrs = ...@@ -112,28 +108,29 @@ my %transcript_attrs =
taxon_id => 9606, taxon_id => 9606,
provenance => 'INFERRED_FROM_TRANSCRIPT', provenance => 'INFERRED_FROM_TRANSCRIPT',
so_term => 'SO:0000234' so_term => 'SO:0000234'
); );
foreach my $attr (keys %transcript_attrs) { foreach my $attr (keys %transcript_attrs) {
is($translator->$attr($transcript), $transcript_attrs{$attr}, "transcript $attr"); is($translator->$attr($transcript), $transcript_attrs{$attr}, "transcript $attr");
} }
cmp_deeply($translator->synonyms($transcript), [], 'transcript synonyms'); cmp_deeply($translator->synonyms($transcript), [], 'transcript synonyms');
is(scalar @{$translator->xrefs($transcript)}, 19, 'number of transcript xrefs'); is(scalar @{$translator->xrefs($transcript)}, 19, 'number of transcript xrefs');
cmp_deeply($translator->xrefs($transcript)->[13], cmp_deeply($translator->xrefs($transcript)->[13],
{ {
'display_id' => 'NM_001319675.1', 'display_id' => 'NM_001319675.1',
'primary_id' => 'NM_001319675', 'primary_id' => 'NM_001319675',
'info_type' => 'DIRECT', 'info_type' => 'DIRECT',
'info_text' => 'Generated via otherfeatures', 'info_text' => 'Generated via otherfeatures',
'description' => '', 'description' => '',
'dbname' => 'RefSeq_mRNA' 'dbname' => 'RefSeq_mRNA'
}, 'transcript xref'); }, 'transcript xref');
is($translator->uri($transcript), "http://rdf.ebi.ac.uk/resource/ensembl.transcript/ENST00000248306", 'transcript URI'); is($translator->uri($transcript), "http://rdf.ebi.ac.uk/resource/ensembl.transcript/ENST00000248306", 'transcript URI');
# compare exon # compare exon
is(scalar @{$translator->exons($transcript)}, 12, 'number of transcript exons'); is(scalar @{$translator->exons($transcript)}, 12, 'number of transcript exons');
my $exon = $translator->exons($transcript)->[3]; my $exon = $translator->exons($transcript)->[3];
my %exon_attrs = my %exon_attrs = (
(
end => 82476718, end => 82476718,
seq_region_name => '12', seq_region_name => '12',
coord_system_name => 'chromosome', coord_system_name => 'chromosome',
...@@ -144,10 +141,12 @@ my %exon_attrs = ...@@ -144,10 +141,12 @@ my %exon_attrs =
rank => 10, rank => 10,
start => 82476644, start => 82476644,
so_term => undef so_term => undef
); );
foreach my $attr (keys %exon_attrs) { foreach my $attr (keys %exon_attrs) {
is($translator->$attr($exon), $exon_attrs{$attr}, "exon $attr"); is($translator->$attr($exon), $exon_attrs{$attr}, "exon $attr");
} }
is($translator->uri($exon), "http://rdf.ebi.ac.uk/resource/ensembl.exon/ENSE00003483236", 'exon URI'); is($translator->uri($exon), "http://rdf.ebi.ac.uk/resource/ensembl.exon/ENSE00003483236", 'exon URI');
# compare translation, its xrefs and protein features # compare translation, its xrefs and protein features
...@@ -157,27 +156,27 @@ is($translator->id($translation), 'ENSP00000248306', 'translation id'); ...@@ -157,27 +156,27 @@ is($translator->id($translation), 'ENSP00000248306', 'translation id');
is($translator->type($translation), 'translation', 'translation type'); is($translator->type($translation), 'translation', 'translation type');
is(scalar @{$translator->xrefs($translation)}, 18, 'number of translation xrefs'); is(scalar @{$translator->xrefs($translation)}, 18, 'number of translation xrefs');
cmp_deeply($translator->xrefs($translation)->[17], cmp_deeply($translator->xrefs($translation)->[17],
{ {
display_id => 'Q8N6Q8', display_id => 'Q8N6Q8',
primary_id => 'Q8N6Q8', primary_id => 'Q8N6Q8',
info_type => 'DIRECT', info_type => 'DIRECT',
info_text => 'Generated via direct', info_text => 'Generated via direct',
description => 'Methyltransferase-like protein 25 ', description => 'Methyltransferase-like protein 25 ',
dbname => 'Uniprot/SWISSPROT' dbname => 'Uniprot/SWISSPROT'
}, 'translation xref'); }, 'translation xref');
is($translator->uri($translation), "http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSP00000248306", 'translation URI'); is($translator->uri($translation), "http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSP00000248306", 'translation URI');
my $protein_features = $translator->protein_features($translation); my $protein_features = $translator->protein_features($translation);
is(scalar @{$protein_features}, 7, 'number of protein features'); is(scalar @{$protein_features}, 7, 'number of protein features');
cmp_deeply($protein_features->[4], cmp_deeply($protein_features->[4],
{ {
name => 'PTHR12496:SF0', name => 'PTHR12496:SF0',
translation_id => 'ENSP00000248306', translation_id => 'ENSP00000248306',
description => undef, description => undef,
interpro_ac => undef, interpro_ac => undef,
dbname => 'PANTHER', dbname => 'PANTHER',
end => 265, end => 265,
start => 1 start => 1
}, 'protein feature attributes'); }, 'protein feature attributes');
sub slurp_file { sub slurp_file {
my $file = shift; my $file = shift;
......
64 protein_coding gene core,otherfeatures,rnaseq,vega,presite \N \N coding SO:0001217
65 protein_coding transcript core,otherfeatures,rnaseq,vega,presite \N \N coding SO:0000234
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment