diff --git a/misc-scripts/xref_mapping/XrefParser/RefSeqCoordinateParser.pm b/misc-scripts/xref_mapping/XrefParser/RefSeqCoordinateParser.pm index 03376c42f67ec4bceed679780fc1c5c5473b6e39..8e88e0b2a514bc15c97db20340065f898d73eefb 100644 --- a/misc-scripts/xref_mapping/XrefParser/RefSeqCoordinateParser.pm +++ b/misc-scripts/xref_mapping/XrefParser/RefSeqCoordinateParser.pm @@ -240,7 +240,7 @@ sub run_script { # Create a range registry for all the exons of the refseq transcript foreach my $transcript_of (sort { $a->start() <=> $b->start() } @$transcripts_of) { - my $id; + my ($id, $tl_id); # We're moving to RefSeq accessions being stored as xrefs rather than # stable ids. But we also need to maintain backwards compatbility. # If it's the new kind, where there's a display_xref use that, @@ -405,12 +405,17 @@ sub run_script { # Also store refseq protein as direct xref for ensembl translation, if translation exists if (defined $tl && defined $tl_of) { if ($tl_of->seq eq $tl->seq) { - ($acc, $version) = split(/\./, $tl_of->stable_id()); + $tl_id = $tl_of->stable_id(); + my @xrefs = grep {$_->{dbname} eq 'GenBank'} @{$tl_of->get_all_DBEntries}; + if(scalar @xrefs == 1) { + $tl_id = $xrefs[0]->primary_id(); + } + ($acc, $version) = split(/\./, $tl_id); $source_id = $peptide_source_id; $source_id = $pred_peptide_source_id if $acc =~ /^XP_/; my $tl_xref_id = $self->add_xref({ acc => $acc, version => $version, - label => $acc, + label => $tl_id, desc => undef, source_id => $source_id, species_id => $species_id,