From 84352fcfb01df6d6017231f6e76424328e4b07c4 Mon Sep 17 00:00:00 2001
From: Magali Ruffier <mr6@ebi.ac.uk>
Date: Wed, 5 Sep 2018 08:57:52 +0100
Subject: [PATCH] ENSCORESW-2553: retrieve correct accession for peptides

---
 .../xref_mapping/XrefParser/RefSeqCoordinateParser.pm | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/misc-scripts/xref_mapping/XrefParser/RefSeqCoordinateParser.pm b/misc-scripts/xref_mapping/XrefParser/RefSeqCoordinateParser.pm
index 03376c42f6..8e88e0b2a5 100644
--- a/misc-scripts/xref_mapping/XrefParser/RefSeqCoordinateParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/RefSeqCoordinateParser.pm
@@ -240,7 +240,7 @@ sub run_script {
 
 # Create a range registry for all the exons of the refseq transcript
       foreach my $transcript_of (sort { $a->start() <=> $b->start() } @$transcripts_of) {
-	my $id;
+	my ($id, $tl_id);
 	# We're moving to RefSeq accessions being stored as xrefs rather than
 	# stable ids. But we also need to maintain backwards compatbility.
 	# If it's the new kind, where there's a display_xref use that,
@@ -405,12 +405,17 @@ sub run_script {
 # Also store refseq protein as direct xref for ensembl translation, if translation exists
           if (defined $tl && defined $tl_of) {
             if ($tl_of->seq eq $tl->seq) {
-              ($acc, $version) = split(/\./, $tl_of->stable_id());
+              $tl_id = $tl_of->stable_id();
+              my @xrefs = grep {$_->{dbname} eq 'GenBank'} @{$tl_of->get_all_DBEntries};
+              if(scalar @xrefs == 1) {
+                $tl_id = $xrefs[0]->primary_id();
+              }
+              ($acc, $version) = split(/\./, $tl_id);
               $source_id = $peptide_source_id;
               $source_id = $pred_peptide_source_id if $acc =~ /^XP_/;
               my $tl_xref_id = $self->add_xref({ acc => $acc,
                                               version => $version,
-                                              label => $acc,
+                                              label => $tl_id,
                                               desc => undef,
                                               source_id => $source_id,
                                               species_id => $species_id,
-- 
GitLab