From cd590f405bbd3c671c8b266ff2556ce22b568439 Mon Sep 17 00:00:00 2001
From: Glenn Proctor <gp1@sanger.ac.uk>
Date: Mon, 26 Jul 2004 14:40:49 +0000
Subject: [PATCH] Tweak to regexp to stop parser missing the first line of
 protein sequence.

---
 misc-scripts/xref_mapping/RefSeqGPFFParser.pm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/misc-scripts/xref_mapping/RefSeqGPFFParser.pm b/misc-scripts/xref_mapping/RefSeqGPFFParser.pm
index 3a341857bc..d257b5d924 100644
--- a/misc-scripts/xref_mapping/RefSeqGPFFParser.pm
+++ b/misc-scripts/xref_mapping/RefSeqGPFFParser.pm
@@ -91,7 +91,7 @@ sub create_xrefs {
       my @seq_lines = split /\n/, $seq;
       my $parsed_seq = "";
       foreach my $x (@seq_lines) {
-        my ($seq_only) = $x =~ /\s+\d+\s+(.*)/;
+        my ($seq_only) = $x =~ /\s*\d+\s+(.*)/;
         $parsed_seq .= $seq_only;
       }
       $parsed_seq =~ s/\/\///g;   # remove trailing end-of-record character
-- 
GitLab