Commit 5cc8e4fc authored by Andy Yates's avatar Andy Yates
Browse files

[ENSCORESW-262]. Customised the Exon summary as hash, gene and transcript...

[ENSCORESW-262]. Customised the Exon summary as hash, gene and transcript summaries have been improved. Biotype mapper can ask objects for their SO term and knows Variation objects are already annotated with a code. BioType mapper caches SO retrieval so it is a massive speed increase for large result sets.
parent bbc30a4e
......@@ -1575,6 +1575,25 @@ sub load {
$self->get_all_supporting_features();
}
=head2 summary_as_hash
Example : $exon_summary = $exon->summary_as_hash();
Description : Extends Feature::summary_as_hash
Retrieves a summary of this Exon.
Returns : hashref of descriptive strings
Status : Intended for internal use
=cut
sub summary_as_hash {
my $self = shift;
my $summary_ref = $self->SUPER::summary_as_hash;
$summary_ref->{'constitutive'} = $self->is_constitutive;
$summary_ref->{'ensembl_phase'} = $self->phase;
$summary_ref->{'ensembl_end_phase'} = $self->end_phase;
$summary_ref->{'phase'} = 0;
return $summary_ref;
}
=head1 DEPRECATED METHODS
=cut
......
......@@ -1486,6 +1486,7 @@ sub summary_as_hash {
$summary_ref->{'description'} = $self->description;
$summary_ref->{'biotype'} = $self->biotype;
$summary_ref->{'external_name'} = $self->external_name;
$summary_ref->{'logic_name'} = $self->analysis->logic_name();
return $summary_ref;
}
......
......@@ -2843,8 +2843,9 @@ sub summary_as_hash {
my $summary_ref = $self->SUPER::summary_as_hash;
$summary_ref->{'description'} = $self->description;
$summary_ref->{'biotype'} = $self->biotype;
$summary_ref->{'logic_name'} = $self->analysis->logic_name();
my $parent_gene = $self->get_Gene();
$summary_ref->{'Parent'} = $parent_gene->display_id;
$summary_ref->{'Parent'} = $parent_gene->stable_id;
return $summary_ref;
}
......
......@@ -50,6 +50,7 @@ use strict;
use warnings;
use Bio::EnsEMBL::Utils::Exception;
use Bio::EnsEMBL::Utils::Cache;
my %gene_so_mapping = (
'protein_coding' => 'SO:0001217', # protein_coding_gene
......@@ -116,11 +117,12 @@ my %transcript_so_mapping = (
my %feature_so_mapping = (
'Bio::EnsEMBL::Gene' => 'SO:0000704', # gene
'Bio::EnsEMBL::Transcript' => 'SO:0000673', # transcript
'Bio::EnsEMBL::Exon' => 'SO:0000147',
'Bio::EnsEMBL::Slice' => 'SO:0000001', # region
'Bio::EnsEMBL::Variation::VariationFeature' => 'SO:0001060', # sequence variant
'Bio::EnsEMBL::Variation::StructuralVariationFeature' => 'SO:0001537', # structural variant
'Bio::EnsEMBL::Compara::ConstrainedElement' => 'SO:0001009', #DNA_constraint_sequence ????
'Bio::EnsEMBL::Funcgen::RegulatoryFeature' => 'SO:0001679', # transcription_regulatory_region
'Bio::EnsEMBL::Compara::ConstrainedElement' => 'SO:0001009', #DNA_constraint_sequence ????
'Bio::EnsEMBL::Funcgen::RegulatoryFeature' => 'SO:0005836', # regulatory_region
);
my %grouping_of_biotypes = (
......@@ -169,6 +171,9 @@ sub new {
my $self = {
ontology_adaptor => shift,
};
tie my %cache, 'Bio::EnsEMBL::Utils::Cache', 100;
$self->{cache} = \%cache;
bless $self, $class;
return $self;
......@@ -188,25 +193,54 @@ sub translate_feature_to_SO_term {
my $self = shift;
my $feature = shift;
my $so_accession;
my $so_term;
if (ref($feature) eq "Bio::EnsEMBL::Gene" and exists $gene_so_mapping{$feature->biotype}) {
my $so_name;
my $ref = ref($feature);
if ($feature->isa('Bio::EnsEMBL::Gene') && exists $gene_so_mapping{$feature->biotype}) {
$so_accession = $gene_so_mapping{$feature->biotype};
}
elsif (ref($feature) eq "Bio::EnsEMBL::Transcription" and exists $transcript_so_mapping{$feature->biotype}) {
elsif ($feature->isa('Bio::EnsEMBL::Transcript') && exists $transcript_so_mapping{$feature->biotype}) {
$so_accession = $transcript_so_mapping{$feature->biotype};
}
else {
$so_accession = $feature_so_mapping{ref($feature)};
elsif ($feature->isa('Bio::EnsEMBL::Variation::BaseVariationFeature')) {
$so_name = $feature->class_SO_term();
}
if (defined($so_accession)) {
$so_term = $self->{'ontology_adaptor'}->fetch_by_accession($so_accession);
if (! $so_accession && ! $so_name && exists $feature_so_mapping{$ref}) {
$so_accession = $feature_so_mapping{$ref};
}
else {
throw ("Ontology mapping not found for ".ref($feature));
return "????????";
if($feature->can('SO_term')) {
$so_accession = $feature->SO_term();
}
}
if ($so_accession) {
$so_name = $self->fetch_SO_name_by_accession($so_accession);
}
throw ("Ontology mapping not found for ".ref($feature)) unless $so_name;
return $so_name;
}
=head2 fetch_SO_name_by_accession
Arg [0] : Sequence Ontology accession
Description: Returns the name linked to the given accession. These are
internally cached for speed.
Returntype : The name of the given accession.
=cut
return $so_term->name;
sub fetch_SO_name_by_accession {
my ($self, $so_accession) = @_;
my $so_name = $self->{cache}->{$so_accession};
if(!$so_name) {
my $so_term = $self->{'ontology_adaptor'}->fetch_by_accession($so_accession);
$so_name = $so_term->name();
$self->{cache}->{$so_accession} = $so_name;
}
return $so_name;
}
......
......@@ -24,8 +24,7 @@ use Bio::EnsEMBL::Utils::IO::GFFSerializer;
use Bio::EnsEMBL::Utils::BiotypeMapper;
my $ontology_adaptor = $registry->get_adaptor( 'Multi', 'Ontology', 'OntologyTerm' );
my $biotype_mapper = new BiotypeMapper($ontology_adaptor);
my $serializer = new GFFSerializer($biotype_mapper,$output_fh);
my $serializer = Bio::EnsEMBL::Utils::IO::GFFSerializer->new($ontology_adaptor,$output_fh);
my $variation_feature_adaptor = $registry->get_adaptor( $config{'species'}, 'variation', 'variationfeature' );
$serializer->print_metadata("Variation Features:");
......@@ -145,8 +144,13 @@ sub print_feature {
$row .= ".\t";
}
# Column 8 - reading frame, necessary only for Exons
$row .= ".\t";
# Column 8 - reading phase, necessary only for Exons
if (exists($summary{'phase'})) {
$row .= $summary{'phase'}."\t";
}
else {
$row .= ".\t";
}
# Column 9 - the 'other' section for all GFF and GVF compliant attributes
# We include Stable ID and biotype where possible to supplement the information in the other columns
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment