Skip to content
Snippets Groups Projects
Commit 7bc92775 authored by Alessandro Vullo's avatar Alessandro Vullo
Browse files

Modified the interface to match the SO part in BiotypeMapper.

Introduced two methods to translate to SO accessions and names.
parent 84a09471
No related branches found
No related tags found
No related merge requests found
......@@ -25,15 +25,15 @@ use Bio::EnsEMBL::Utils::SequenceOntologyMapper
my $ontology_adaptor = $registry->get_adaptor( 'Multi', 'Ontology', 'OntologyTerm' );
my $mapper = SequenceOntologyMapper->new($ontology_adaptor);
print $mapper->translate($feature);
print $mapper->to_SO_accession($feature), "\n";
print $mapper->to_SO_name($feature), "\n";
=head1 DESCRIPTION
Basic mapper from Ensembl feature objects to Sequence Ontology
Basic mapper from Ensembl feature or related objects to Sequence Ontology
(http://www.sequenceontology.org) terms.
There's no reverse mapping as there doesn't seem to be any utility
in it at the moment (would require to create empty feature objects).
The interface allows to map to SO accessions and names.
=cut
......@@ -45,6 +45,86 @@ use warnings;
use Bio::EnsEMBL::Utils::Cache;
use Bio::EnsEMBL::Utils::Exception;
my %gene_so_mapping =
(
'protein_coding' => 'SO:0001217', # protein_coding_gene
'pseudogene' => 'SO:0000336', # pseudogene
'processed_transcript' => 'SO:0001503', # processed_transcript
'lincRNA' => 'SO:0001641', # lincRNA_gene
'polymorphic_pseudogene'=> 'SO:0000336', # pseudogene
'Mt_tRNA' => 'SO:0000088', # mt_gene
'IG_D_gene' => 'SO:0000510', # D_gene
'snoRNA' => 'SO:0001267', #snoRNA_gene
'misc_RNA' => 'SO:0000356', #RNA
'miRNA' => 'SO:0001265', #miRNA_gene
'rRNA' => 'SO:0001637', #rRNA_gene
'snRNA' => 'SO:0001268', #snRNA_gene
'snRNA_pseudogene' => 'SO:0000336', # pseudogene
'tRNA_pseudogene' => 'SO:0000778', # pseudogenic_tRNA
'rRNA_pseudogene' => 'SO:0000777', # pseudogenic_rRNA
'TR_J_gene' => 'SO:0000470', # J_gene
'TR_V_gene' => 'SO:0000466', # V_gene
'TR_C_gene' => 'SO:0000478', # C_gene
'ncRNA' => 'SO:0001263', # ncRNA_gene
'tRNA' => 'SO:0001272', # tRNA_gene
'retrotransposed' => 'SO:0000569', # retrotransposed
## heavily abbreviated
);
my %transcript_so_mapping =
(
'processed_transcript' => 'SO:0001503', # processed_transcript
'nonsense_mediated_decay' => 'SO:0001621', # NMD_transcript_variant
'retained_intron' => 'SO:0000681', # aberrant_processed_transcript
'transcribed_unprocessed_pseudogene'=> 'SO:0000516', # pseudogenic_transcript
'processed_pseudogene' => 'SO:0000043', # processed_pseudogene
'unprocessed_pseudogene' => 'SO:0000336', # pseudogene
'unitary_pseudogene' => 'SO:0000336',
'pseudogene' => 'SO:0000336', # pseudogene
'transcribed_processed_pseudogene' => 'SO:0000043',
'retrotransposed' => 'SO:0000569', #retrotransposed
'ncrna_host' => 'SO:0000483',
'polymorphic_pseudogene' => 'SO:0000336',
'lincRNA' => 'SO:0001463',
'ncrna_host' => 'SO:0000483',
'3prime_overlapping_ncrna' => 'SO:0000483',
'TR_V_gene' => 'SO:0000466',
'TR_V_pseudogene' => 'SO:0000336',
'TR_J_gene' => 'SO:0000470',
'IG_C_gene' => 'SO:0000478',
'IG_C_pseudogene' => 'SO:0000336',
'TR_C_gene' => 'SO:0000478',
'IG_J_pseudogene' => 'SO:0000336',
'miRNA' => 'SO:0000276', #miRNA
'miRNA_pseudogene' => 'SO:0000336',
'disrupted_domain' => 'SO:0000681', # aberrant_processed_transcript
'rRNA' => 'SO:0000252', #rRNA
'rRNA_pseudogene' => 'SO:0000777',
'scRNA_pseudogene' => 'SO:0000336',
'snoRNA' => 'SO:0000275', # snoRNA
'snoRNA_pseudogene' => 'SO:0000336',
'snRNA' => 'SO:0000274', # snRNA
'snRNA_pseudogene' => 'SO:0000336',
);
my %feature_so_mapping =
(
'Bio::EnsEMBL::Feature' => 'SO:0000001', # region
'Bio::EnsEMBL::Gene' => 'SO:0000704', # gene
'Bio::EnsEMBL::Transcript' => 'SO:0000673', # transcript
'Bio::EnsEMBL::Exon' => 'SO:0000147', # exon
'Bio::EnsEMBL::Slice' => 'SO:0000001', # region
'Bio::EnsEMBL::SimpleFeature' => 'SO:0001411', # biological_region
'Bio::EnsEMBL::MiscFeature' => 'SO:0001411', # biological_region
'Bio::EnsEMBL::RepeatFeature' => 'SO:0000657', # repeat region
'Bio::EnsEMBL::Variation::VariationFeature' => 'SO:0001060', # sequence variant
'Bio::EnsEMBL::Variation::StructuralVariationFeature' => 'SO:0001537', # structural variant
'Bio::EnsEMBL::Compara::ConstrainedElement' => 'SO:0001009', #DNA_constraint_sequence ????
'Bio::EnsEMBL::Funcgen::RegulatoryFeature' => 'SO:0005836', # regulatory_region
);
=head1 METHODS
=head2 new
......@@ -63,21 +143,9 @@ sub new {
my $self =
{
ontology_adaptor => $oa,
feat_to_acc =>
{
'Bio::EnsEMBL::Feature' => 'SO:0000001', # region
'Bio::EnsEMBL::Gene' => 'SO:0000704', # gene
'Bio::EnsEMBL::Transcript' => 'SO:0000673', # transcript
'Bio::EnsEMBL::Exon' => 'SO:0000147', # exon
'Bio::EnsEMBL::Slice' => 'SO:0000001', # region
'Bio::EnsEMBL::SimpleFeature' => 'SO:0001411', # biological_region
'Bio::EnsEMBL::MiscFeature' => 'SO:0001411', # biological_region
'Bio::EnsEMBL::RepeatFeature' => 'SO:0000657', # repeat region
'Bio::EnsEMBL::Variation::VariationFeature' => 'SO:0001060', # sequence variant
'Bio::EnsEMBL::Variation::StructuralVariationFeature' => 'SO:0001537', # structural variant
'Bio::EnsEMBL::Compara::ConstrainedElement' => 'SO:0001009', # DNA_constraint_sequence ????
'Bio::EnsEMBL::Funcgen::RegulatoryFeature' => 'SO:0005836', # regulatory_region
}
feat_to_acc => \%feature_so_mapping,
gene_to_acc => \%gene_so_mapping,
tran_to_acc => \%transcript_so_mapping
};
$self->{ontology_adaptor}->isa('Bio::EnsEMBL::DBSQL::OntologyTermAdaptor') or
......@@ -90,59 +158,83 @@ sub new {
return $self;
}
=head2 translate
=head2 to_accession
Arg [0] : Instance of Bio::EnsEMBL::Feature, subclass or
related Storable
Description: Translates a Feature type into an SO term.
Returntype : String; the SO term
Exceptions : if argument is not an instance of Bio::EnsEMBL::Feature
or if cannot translate
Description: translates a Feature type into an SO term accession
Returntype : String; the SO accession
Exceptions : if cannot map to SO term
=cut
sub translate {
my $self = shift;
my $feature = shift;
sub to_accession {
my $self = shift;
my $feature = shift;
my $so_accession;
my $so_name;
my $ref = ref($feature);
my $so_accession;
my $ref = ref($feature);
my $mapping = $self->{feat_to_acc};
if (exists $mapping->{$ref}) {
$so_accession = $mapping->{$ref};
} elsif ($feature->can('SO_term')) {
$so_accession = $feature->SO_term();
}# else {
# $so_accession = $mapping->{'Bio::EnsEMBL::Feature'};
# }
if ($so_accession) {
$so_name = $self->_fetch_SO_name_by_accession($so_accession);
} else {
#
# WARNING
#
# This is inherited from BiotypeMapper, but there doesn't seem to be
# a class_SO_term method in class BaseVariationFeature or its ancestors
#
$so_name = $feature->class_SO_term()
if $feature->isa('Bio::EnsEMBL::Variation::BaseVariationFeature');
}
throw sprintf "%s: mapping to sequence ontology term not found", $ref
unless $so_name;
my ($gene_to_acc, $tran_to_acc, $feat_to_acc) =
($self->{gene_to_acc}, $self->{tran_to_acc}, $self->{feat_to_acc});
if ($feature->isa('Bio::EnsEMBL::Gene') and
exists $gene_to_acc->{$feature->biotype}) {
$so_accession = $gene_to_acc->{$feature->biotype};
} elsif ($feature->isa('Bio::EnsEMBL::Transcript') and
exists $tran_to_acc->{$feature->biotype}) {
$so_accession = $tran_to_acc->{$feature->biotype};
}
if (not $so_accession and exists $feat_to_acc->{$ref}) {
$so_accession = $feat_to_acc->{$ref};
} else {
$so_accession = $feature->SO_term()
if $feature->can('SO_term');
}
throw sprintf "%s: mapping to sequence ontology accession not found", $ref
unless $so_accession;
return $so_name;
return $so_accession;
}
=head2 to_name
Arg [0] : Instance of Bio::EnsEMBL::Feature, subclass or
related Storable
Description: translates a Feature type into an SO term name
Returntype : String; the SO term name
Exceptions : if cannot map to an SO term
=cut
sub to_name {
my $self = shift;
my $feature = shift;
my $so_name;
my $so_accession = eval {
$self->to_accession($feature);
};
if ($@) {
$so_name = $feature->class_SO_term()
if $feature->isa('Bio::EnsEMBL::Variation::BaseVariationFeature');
} else {
$so_name = $self->_fetch_SO_name_by_accession($so_accession);
}
throw sprintf "%s: mapping to sequence ontology name not found", ref($feature)
unless $so_name;
return $so_name;
}
=head1 PRIVATE METHODS
=head2 _fetch_SO_name_by_accession
Arg [0] : Sequence Ontology accession
Arg [0] : String; Sequence Ontology accession
Description: Returns the name linked to the given accession. These are
internally cached for speed.
Returntype : String; the name of the given accession
......@@ -159,6 +251,7 @@ sub _fetch_SO_name_by_accession {
$so_name = $so_term->name();
$self->{cache}->{$so_accession} = $so_name;
}
return $so_name;
}
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment