Skip to content
Snippets Groups Projects
Commit a7720235 authored by Glenn Proctor's avatar Glenn Proctor
Browse files

Use the translations of the _canonical_ transcripts of the source and target...

Use the translations of the _canonical_ transcripts of the source and target genes as the source of GO terms to project, rather than simply the longest translation in each, which was a bit arbitrary and didn't alway work.

Also make the print option for GO term projection a bit more verbose - aids debugging.
parent 536bd1ee
No related branches found
No related tags found
No related merge requests found
...@@ -346,12 +346,11 @@ sub project_go_terms { ...@@ -346,12 +346,11 @@ sub project_go_terms {
my ($to_ga, $to_dbea, $ma, $from_gene, $to_gene) = @_; my ($to_ga, $to_dbea, $ma, $from_gene, $to_gene) = @_;
# GO xrefs are linked to translations, not genes # GO xrefs are linked to translations, not genes
# For historical reasons we only project GO terms between the longest translations of each gene # Project GO terms between the translations of the canonical transcripts of each gene
# TODO - consider projecting *all* GO terms from *all* source translations to one translation of target? my $from_translation = get_canonical_translation($from_gene);
# TODO - getting the translation's length seem to involve lots of database accesses - some way to do my $to_translation = get_canonical_translation($to_gene);
# this quicker? Via SQL?
my $from_translation = get_longest_translation($from_gene); return if (!$from_translation || !$to_translation);
my $to_translation = get_longest_translation($to_gene);
my $from_latin_species = ucfirst(Bio::EnsEMBL::Registry->get_alias($from_species)); my $from_latin_species = ucfirst(Bio::EnsEMBL::Registry->get_alias($from_species));
...@@ -384,7 +383,7 @@ sub project_go_terms { ...@@ -384,7 +383,7 @@ sub project_go_terms {
$to_translation->add_DBEntry($dbEntry); $to_translation->add_DBEntry($dbEntry);
print $to_translation->stable_id() . " --> " . $dbEntry->display_id() . "\n" if ($print); print $from_gene->stable_id() . " " . $from_translation->stable_id() . " " . $dbEntry->display_id() . " --> " . $to_gene->stable_id() . " " . $to_translation->stable_id() . "\n" if ($print);
$to_dbea->store($dbEntry, $to_translation->dbID(), 'Translation', 1) if (!$print); $to_dbea->store($dbEntry, $to_translation->dbID(), 'Translation', 1) if (!$print);
...@@ -735,26 +734,20 @@ sub homology_type_allowed { ...@@ -735,26 +734,20 @@ sub homology_type_allowed {
} }
# ---------------------------------------------------------------------- # ----------------------------------------------------------------------
# Get the translation associated with the gene's canonical transcript
sub get_longest_translation { sub get_canonical_translation {
my $gene = shift; my $gene = shift;
my $longest_translation; my $canonical_transcript = $gene->canonical_transcript();
my $max_length = -1;
foreach my $transcript (@{$gene->get_all_Transcripts()}) {
my $translation = $transcript->translation();
if ($translation && $translation->length() > $max_length) {
$longest_translation = $translation;
}
if (!$canonical_transcript) {
warn("Can't get canonical transcript for " . $gene->stable_id() . ", skipping this homology");
return undef;
} }
warn("Can't find longest translation for " . $gene->stable_id()) if (!$longest_translation); return $canonical_transcript->translation();;
return $longest_translation;
} }
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment