From 11903754177ea5ffa840e01570b3b0b391691f60 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Kusalananda=20K=C3=A4h=C3=A4ri?=
 <ak4@sanger.ac.uk>
Date: Mon, 12 Mar 2007 11:34:48 +0000
Subject: [PATCH] Add a parser for Vega fasta files.

---
 .../xref_mapping/XrefParser/VegaParser.pm     | 77 +++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 misc-scripts/xref_mapping/XrefParser/VegaParser.pm

diff --git a/misc-scripts/xref_mapping/XrefParser/VegaParser.pm b/misc-scripts/xref_mapping/XrefParser/VegaParser.pm
new file mode 100644
index 0000000000..e27e896791
--- /dev/null
+++ b/misc-scripts/xref_mapping/XrefParser/VegaParser.pm
@@ -0,0 +1,77 @@
+# $Id$
+
+package XrefParser::VegaParser;
+
+use warnings;
+use strict;
+
+use base qw( XrefParser::BaseParser );
+
+# Parses the Vega Fasta file format:
+#
+# >OTTMUSP00000002157 pep:known chromosome:VEGA:1:60690904:60717905:1 Gene:OTTMUSG00000002254 Transcript:OTTMUST00000004499
+# MTLRLLFLALNFFSVQVTENKILVKQSPLLVVDSNEVSLSCRYSYNLLAKEFRASLYKGV
+# NSDVEVCVGNGNFTYQPQFRSNAEFNCDGDFDNETVTFRLWNLHVNHTDIYFCKIEFMYP
+# PPYLDNERSNGTIIHIKEKHLCHTQSSPKLFWALVVVAGVLFCYGLLVTVALCVIWTNSR
+# RNRLLQSDYMNMTPRRPGLTRKPYQPYAPARDFAAYRP
+
+sub run
+{
+    my $self = shift;
+    my ( $source_id, $species_id, $file_name ) = @_;
+
+    my $file_io = $self->get_filehandle($file_name);
+
+    if ( !defined $file_io ) {
+        return 1;    # Failed.
+    }
+
+    my @xrefs;
+    while ( defined( my $line = $file_io->getline() ) ) {
+        chomp $line;
+
+        if ( substr( $line, 0, 1 ) eq '>' ) {
+            # New sequence header.
+            my (
+                $vega_protein_id, $vega_type,
+                $vega_position,   $vega_gene_id,
+                $vega_transcript_id
+            ) = split / /, $line;
+
+            substr( $vega_protein_id, 0, 1, '' ); # Remove initial '>',
+            substr( $vega_gene_id,    0, 5, '' ); # initial 'Gene:', and
+            substr( $vega_transcript_id, 0, 11, '' );  #  'Transcript:'.
+
+            my ( $vega_alphabet, $vega_status ) =
+              ( $vega_type =~ /(.*):(.*)/ );
+
+            my %xref = (
+                'ACCESSION' => $vega_transcript_id,
+                'LABEL'     => $vega_transcript_id,
+                'DESCRIPTION' =>
+                  sprintf( "%s %s", $vega_type, $vega_position ),
+                'SEQUENCE'   => '',
+                'SOURCE_ID'  => $source_id,
+                'SPECIES_ID' => $species_id,
+                'SEQUENCE_TYPE' =>
+                  ( $vega_alphabet eq 'pep' ? 'peptide' : 'dna' ),
+                'STATUS' => $vega_status,
+            );
+
+            push @xrefs, \%xref;
+
+        } else {
+            $xrefs[-1]->{'SEQUENCE'} .= $line;
+        }
+    }
+
+    print scalar(@xrefs) . " Vega Fasta Xrefs successfully parsed\n";
+
+    $self->upload_xref_object_graphs( \@xrefs );
+
+    print "Done\n";
+
+    return 0;    # Successful.
+}
+
+1;
-- 
GitLab