Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
ensembl-gh-mirror
ensembl-io
Commits
86375fd8
Commit
86375fd8
authored
Oct 11, 2018
by
Tiago Grego
Browse files
SequenceOntologyMapper no longer required and functional cache
parent
fa2ad761
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
112 additions
and
148 deletions
+112
-148
modules/Bio/EnsEMBL/IO/Translator/BulkFetcherFeature.pm
modules/Bio/EnsEMBL/IO/Translator/BulkFetcherFeature.pm
+57
-94
modules/t/rdf/bulkfetcherfeature_translator.t
modules/t/rdf/bulkfetcherfeature_translator.t
+53
-54
modules/t/test-genome-DBs/homo_sapiens/core/biotype.txt
modules/t/test-genome-DBs/homo_sapiens/core/biotype.txt
+2
-0
No files found.
modules/Bio/EnsEMBL/IO/Translator/BulkFetcherFeature.pm
View file @
86375fd8
...
...
@@ -72,6 +72,9 @@ my %field_callbacks = (
so_term
=>
'
so_term
'
);
# caching of biotype to SO terms to improve speed
my
$so_cache
=
{};
=head2 new
Returntype : Bio::EnsEMBL::IO::Translator::BulkFetcherFeature
...
...
@@ -81,7 +84,7 @@ my %field_callbacks = (
sub
new
{
my
(
$class
,
%args
)
=
@_
;
my
@required_args
=
qw/version xref_mapping_file
biotype_mapper
adaptor/
;
my
@required_args
=
qw/version xref_mapping_file adaptor/
;
my
@missing_args
;
map
{
push
@missing_args
,
$_
unless
exists
$args
{
$_
}
}
@required_args
;
confess
"
Missing arguments required by Bio::EnsEMBL::IO::Translator::BulkFetcherFeature
"
.
join
('
,
',
@missing_args
)
...
...
@@ -90,9 +93,6 @@ sub new {
# this connects Ensembl to Identifiers.org amongst other things
my
$xref_mapping
=
Bio::EnsEMBL::Utils::RDF::
Mapper
->
new
(
$args
{
xref_mapping_file
});
croak
"
Bio::EnsEMBL::IO::Translator::Feature requires a sequence ontology mapper
"
unless
$args
{
biotype_mapper
}
->
isa
('
Bio::EnsEMBL::Utils::SequenceOntologyMapper
');
croak
"
Bio::EnsEMBL::IO::Translator::BulkFetcherFeature requires a DBAdaptor
"
unless
$args
{
adaptor
}
and
$args
{
adaptor
}
->
isa
('
Bio::EnsEMBL::DBSQL::DBAdaptor
');
$args
{
meta_adaptor
}
=
$args
{
adaptor
}
->
get_MetaContainer
();
...
...
@@ -104,9 +104,10 @@ sub new {
croak
"
Unable to get a transcript adaptor
"
unless
$args
{
transcript_adaptor
}
->
isa
('
Bio::EnsEMBL::DBSQL::TranscriptAdaptor
');
$args
{
biotype_adaptor
}
=
$args
{
adaptor
}
->
get_BiotypeAdaptor
();
delete
$args
{
adaptor
};
$args
{
ontology_cache
}
=
{};
$args
{
mapping
}
=
$xref_mapping
;
my
$self
=
$class
->
SUPER::
new
(
\
%args
);
...
...
@@ -133,14 +134,9 @@ sub production_name {
return
$self
->
{
production_name
};
}
sub
ontology_cache
{
my
$self
=
shift
;
return
$self
->
{
ontology_cache
};
}
sub
ontology_adaptor
{
sub
biotype_adaptor
{
my
$self
=
shift
;
return
$self
->
{
ontology
_adaptor
};
return
$self
->
{
biotype
_adaptor
};
}
sub
meta_adaptor
{
...
...
@@ -494,58 +490,25 @@ sub protein_features {
=cut
sub
so_term
{
my
$self
=
shift
;
my
$object
=
shift
;
my
$so_term
;
my
(
$type
,
$biotype
)
=
(
$self
->
type
(
$object
),
$self
->
biotype
(
$object
));
if
(
!
defined
$biotype
)
{
# warn "Could not find biotype for SO term mapping\n";
return
;
}
eval
{
if
(
$type
eq
'
gene
')
{
$so_term
=
$self
->
biotype_mapper
->
gene_biotype_to_name
(
$biotype
);
}
elsif
(
$type
eq
'
transcript
')
{
$so_term
=
$self
->
biotype_mapper
->
transcript_biotype_to_name
(
$biotype
);
}
else
{
$so_term
=
$self
->
_ontology_id
(
$biotype
);
}
};
# TODO: better exception handling, e.g. look up ontology_cache?!
if
(
$@
)
{
if
(
!
exists
$self
->
{
ontology_cache
}
->
{
$biotype
})
{
warn
sprintf
"
Failed to map biotype %s to SO term
\n
",
$biotype
;
$self
->
{
ontology_cache
}
->
{
$biotype
}
=
undef
;
}
}
return
$so_term
;
}
# SO terms often required for dumping RDF
sub
_ontology_id
{
my
(
$self
,
$term
)
=
@_
;
my
$ontology_cache
=
$self
->
ontology_cache
;
return
$self
->
{
$ontology_cache
->
{
$term
}}
if
$term
and
exists
$self
->
{
$ontology_cache
->
{
$term
}};
my
(
$typeterm
)
=
@
{
$self
->
ontology_adaptor
->
fetch_all_by_name
(
$term
,
'
SO
'
)
};
unless
(
$typeterm
)
{
if
(
$term
)
{
warn
"
Can't find SO term for biotype '
$term
'
";
$self
->
{
$ontology_cache
->
{
$term
}}
=
undef
;
}
return
;
}
my
$id
=
$typeterm
->
accession
;
$self
->
{
$ontology_cache
->
{
$term
}}
=
$id
;
return
$id
;
my
(
$self
,
$object
)
=
@_
;
my
$type
=
$self
->
type
(
$object
);
my
$biotype
=
$self
->
biotype
(
$object
);
# Only type gene and transcript supported
return
unless
(
$type
eq
'
gene
'
||
$type
eq
'
transcript
');
# look if term is cached
my
$so_acc
=
$so_cache
->
{
$type
}{
$biotype
};
# if so return it
return
$so_acc
if
defined
$so_acc
;
# else retrieve the so term using the biotype adaptor, and cache it
$so_acc
=
$self
->
biotype_adaptor
->
fetch_by_name_object_type
(
$biotype
,
$type
)
->
so_acc
;
$so_cache
->
{
$type
}{
$biotype
}
=
$so_acc
;
return
$so_acc
;
}
1
;
modules/t/rdf/bulkfetcherfeature_translator.t
View file @
86375fd8
...
...
@@ -24,14 +24,9 @@ use Test::Exception;
use
JSON
;
use
Bio::EnsEMBL::Test::
MultiTestDB
;
use
Bio::EnsEMBL::Utils::
SequenceOntologyMapper
;
use_ok
'
Bio::EnsEMBL::IO::Translator::BulkFetcherFeature
';
my
$omulti
=
Bio::EnsEMBL::Test::
MultiTestDB
->
new
('
ontology
',
"
$Bin
/..
");
my
$ontology_adaptor
=
$omulti
->
get_DBAdaptor
('
ontology
')
->
get_OntologyTermAdaptor
();
my
$multi
=
Bio::EnsEMBL::Test::
MultiTestDB
->
new
(
undef
,
"
$Bin
/..
");
my
$adaptor
=
$multi
->
get_DBAdaptor
('
core
');
my
$meta_adaptor
=
$adaptor
->
get_MetaContainer
();
...
...
@@ -41,11 +36,11 @@ my ($version, $production_name) =
$meta_adaptor
->
list_value_by_key
('
species.production_name
')
->
[
0
]
);
my
$translator
=
Bio::EnsEMBL::IO::Translator::
BulkFetcherFeature
->
new
(
version
=>
$version
,
my
$translator
=
Bio::EnsEMBL::IO::Translator::
BulkFetcherFeature
->
new
(
version
=>
$version
,
xref_mapping_file
=>
"
$Bin
/xref_LOD_mapping.json
",
biotype_mapper
=>
Bio::EnsEMBL::Utils::
SequenceOntologyMapper
->
new
(
$omulti
->
get_DBAdaptor
('
ontology
')
->
get_OntologyTermA
daptor
()),
adaptor
=>
$adaptor
);
adaptor
=>
$a
daptor
);
ok
(
$translator
->
version
==
$version
,
'
version
');
ok
(
$translator
->
production_name
eq
$production_name
,
'
production name
');
...
...
@@ -58,8 +53,7 @@ my $gene = from_json(slurp_file("$Bin/gene.json"));
#
# compare gene
#
my
%gene_attrs
=
(
my
%gene_attrs
=
(
type
=>
'
gene
',
id
=>
'
ENSG00000127720
',
name
=>
'
METTL25
',
...
...
@@ -74,13 +68,16 @@ my %gene_attrs =
taxon_id
=>
9606
,
provenance
=>
'
ANNOTATED
',
so_term
=>
'
SO:0001217
'
);
);
foreach
my
$attr
(
keys
%gene_attrs
)
{
is
(
$translator
->
$attr
(
$gene
),
$gene_attrs
{
$attr
},
"
gene
$attr
");
}
cmp_deeply
(
$translator
->
synonyms
(
$gene
),
[
'
C12orf26
',
'
FLJ22789
'
],
'
gene synonyms
');
cmp_deeply
(
$translator
->
xrefs
(
$gene
)
->
[
5
],
{
display_id
=>
'
METTL25
',
cmp_deeply
(
$translator
->
xrefs
(
$gene
)
->
[
5
],
{
display_id
=>
'
METTL25
',
primary_id
=>
84190
,
info_type
=>
'
DEPENDENT
',
info_text
=>
'',
...
...
@@ -96,8 +93,7 @@ is($translator->uri($gene), "http://rdf.ebi.ac.uk/resource/ensembl/ENSG000001277
my
$transcripts
=
$translator
->
transcripts
(
$gene
);
is
(
scalar
@
{
$transcripts
},
11
,
'
number of transcripts
');
my
$transcript
=
$transcripts
->
[
0
];
my
%transcript_attrs
=
(
my
%transcript_attrs
=
(
id
=>
'
ENST00000248306
',
type
=>
'
transcript
',
name
=>
'
METTL25-201
',
...
...
@@ -112,10 +108,12 @@ my %transcript_attrs =
taxon_id
=>
9606
,
provenance
=>
'
INFERRED_FROM_TRANSCRIPT
',
so_term
=>
'
SO:0000234
'
);
);
foreach
my
$attr
(
keys
%transcript_attrs
)
{
is
(
$translator
->
$attr
(
$transcript
),
$transcript_attrs
{
$attr
},
"
transcript
$attr
");
}
cmp_deeply
(
$translator
->
synonyms
(
$transcript
),
[]
,
'
transcript synonyms
');
is
(
scalar
@
{
$translator
->
xrefs
(
$transcript
)},
19
,
'
number of transcript xrefs
');
cmp_deeply
(
$translator
->
xrefs
(
$transcript
)
->
[
13
],
...
...
@@ -132,8 +130,7 @@ is($translator->uri($transcript), "http://rdf.ebi.ac.uk/resource/ensembl.transcr
# compare exon
is
(
scalar
@
{
$translator
->
exons
(
$transcript
)},
12
,
'
number of transcript exons
');
my
$exon
=
$translator
->
exons
(
$transcript
)
->
[
3
];
my
%exon_attrs
=
(
my
%exon_attrs
=
(
end
=>
82476718
,
seq_region_name
=>
'
12
',
coord_system_name
=>
'
chromosome
',
...
...
@@ -144,10 +141,12 @@ my %exon_attrs =
rank
=>
10
,
start
=>
82476644
,
so_term
=>
undef
);
);
foreach
my
$attr
(
keys
%exon_attrs
)
{
is
(
$translator
->
$attr
(
$exon
),
$exon_attrs
{
$attr
},
"
exon
$attr
");
}
is
(
$translator
->
uri
(
$exon
),
"
http://rdf.ebi.ac.uk/resource/ensembl.exon/ENSE00003483236
",
'
exon URI
');
# compare translation, its xrefs and protein features
...
...
modules/t/test-genome-DBs/homo_sapiens/core/biotype.txt
0 → 100644
View file @
86375fd8
64 protein_coding gene core,otherfeatures,rnaseq,vega,presite \N \N coding SO:0001217
65 protein_coding transcript core,otherfeatures,rnaseq,vega,presite \N \N coding SO:0000234
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment