From 1f2a64f873c857ec596365d942f24890165b50cf Mon Sep 17 00:00:00 2001
From: Glenn Proctor <gp1@sanger.ac.uk>
Date: Wed, 21 Sep 2005 13:49:08 +0000
Subject: [PATCH] Updated for new file format

>name|description

Now also loads xref descriptions
---
 .../xref_mapping/XrefParser/AnophelesSymbolParser.pm         | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/misc-scripts/xref_mapping/XrefParser/AnophelesSymbolParser.pm b/misc-scripts/xref_mapping/XrefParser/AnophelesSymbolParser.pm
index a78fd8d089..0fec6fd2ee 100644
--- a/misc-scripts/xref_mapping/XrefParser/AnophelesSymbolParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/AnophelesSymbolParser.pm
@@ -10,7 +10,7 @@ use vars qw(@ISA);
 
 # AnophelesSymbol database dump for anopheles - FASTA format
 #
-# >ANXB10B,ANXB10B,ANXB10B,protein,Annexin B10B
+# >ANXB10B|Annexin B10B
 # MSWYYTPHPTVVPAEDFDASADANALRKAMKGFGTDEQAIIDILCARSNGQRQEIAEAFKRELGRDLIDDLKSELGGKFEDVILGLMLRPEAYLCKQLHKAMDGIGTDEKSLIEII
 # CPQTNDQIRAIVDCYEEMYSRPLAEHLCSETSGSFRRLLTMIIVGSRDPQGTVDPELAVEQAKQLYDAGEGKLGTDEEVFYKILAHASFDQLEIVFEEYKSLSGRTIEQALKAELS
 # GELYDALSAIVECVQMAPHFFAKRLHKAMDGVGTDDATLIRIIVSRSEIDLQNIKDEFEQMYNKTLVSAVRSETSGDYKRALCALIGNA
@@ -34,7 +34,7 @@ sub run {
     my ($header, $sequence) = $_ =~ /^>?(.+?)\n([^>]*)/s or warn("Can't parse FASTA entry: $_\n");
 
     # deconstruct header - just use first part
-    my ($accession, @rest) = split /,/, $header;
+    my ($accession, $description) = split /\|/, $header;
 
     # make sequence into one long string
     $sequence =~ s/\n//g;
@@ -42,6 +42,7 @@ sub run {
     # build the xref object and store it
     $xref->{ACCESSION}     = $accession;
     $xref->{LABEL}         = $accession;
+    $xref->{DESCRITPION}   = $description;
     $xref->{SEQUENCE}      = $sequence;
     $xref->{SOURCE_ID}     = $source_id;
     $xref->{SPECIES_ID}    = $species_id;
-- 
GitLab