From 84352fcfb01df6d6017231f6e76424328e4b07c4 Mon Sep 17 00:00:00 2001 From: Magali Ruffier <mr6@ebi.ac.uk> Date: Wed, 5 Sep 2018 08:57:52 +0100 Subject: [PATCH] ENSCORESW-2553: retrieve correct accession for peptides --- .../xref_mapping/XrefParser/RefSeqCoordinateParser.pm | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/misc-scripts/xref_mapping/XrefParser/RefSeqCoordinateParser.pm b/misc-scripts/xref_mapping/XrefParser/RefSeqCoordinateParser.pm index 03376c42f6..8e88e0b2a5 100644 --- a/misc-scripts/xref_mapping/XrefParser/RefSeqCoordinateParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/RefSeqCoordinateParser.pm @@ -240,7 +240,7 @@ sub run_script { # Create a range registry for all the exons of the refseq transcript foreach my $transcript_of (sort { $a->start() <=> $b->start() } @$transcripts_of) { - my $id; + my ($id, $tl_id); # We're moving to RefSeq accessions being stored as xrefs rather than # stable ids. But we also need to maintain backwards compatbility. # If it's the new kind, where there's a display_xref use that, @@ -405,12 +405,17 @@ sub run_script { # Also store refseq protein as direct xref for ensembl translation, if translation exists if (defined $tl && defined $tl_of) { if ($tl_of->seq eq $tl->seq) { - ($acc, $version) = split(/\./, $tl_of->stable_id()); + $tl_id = $tl_of->stable_id(); + my @xrefs = grep {$_->{dbname} eq 'GenBank'} @{$tl_of->get_all_DBEntries}; + if(scalar @xrefs == 1) { + $tl_id = $xrefs[0]->primary_id(); + } + ($acc, $version) = split(/\./, $tl_id); $source_id = $peptide_source_id; $source_id = $pred_peptide_source_id if $acc =~ /^XP_/; my $tl_xref_id = $self->add_xref({ acc => $acc, version => $version, - label => $acc, + label => $tl_id, desc => undef, source_id => $source_id, species_id => $species_id, -- GitLab