From e58730a7dc8b66b268af922c5f9b7e744cc31f7a Mon Sep 17 00:00:00 2001
From: Karyn Megy <kmegy@sanger.ac.uk>
Date: Fri, 11 Dec 2009 13:00:08 +0000
Subject: [PATCH] New Xrefs parser for Aedes aegypti (CAP annotation)

---
 .../xref_mapping/XrefParser/AedesCAPParser.pm | 82 +++++++++++++++++++
 1 file changed, 82 insertions(+)
 create mode 100644 misc-scripts/xref_mapping/XrefParser/AedesCAPParser.pm

diff --git a/misc-scripts/xref_mapping/XrefParser/AedesCAPParser.pm b/misc-scripts/xref_mapping/XrefParser/AedesCAPParser.pm
new file mode 100644
index 0000000000..33e72cefbb
--- /dev/null
+++ b/misc-scripts/xref_mapping/XrefParser/AedesCAPParser.pm
@@ -0,0 +1,82 @@
+package XrefParser::AedesCAPParser;
+
+use strict;
+use File::Basename;
+
+use base qw( XrefParser::BaseParser );
+
+# Aedes CAP database dump - FASTA format
+# >...
+# 
+#
+#
+
+
+# Anopheles one:
+# >ANXB10B|Annexin B10B
+# MSWYYTPHPTVVPAEDFDASADANALRKAMKGFGTDEQAIIDILCARSNGQRQEIAEAFKRELGRDLIDDLKSELGGKFEDVILGLMLRPEAYLCKQLHKAMDGIGTDEKSLIEII
+# CPQTNDQIRAIVDCYEEMYSRPLAEHLCSETSGSFRRLLTMIIVGSRDPQGTVDPELAVEQAKQLYDAGEGKLGTDEEVFYKILAHASFDQLEIVFEEYKSLSGRTIEQALKAELS
+# GELYDALSAIVECVQMAPHFFAKRLHKAMDGVGTDDATLIRIIVSRSEIDLQNIKDEFEQMYNKTLVSAVRSETSGDYKRALCALIGNA
+
+sub run {
+
+  my $self = shift if (defined(caller(1)));
+
+  my $source_id = shift;
+  my $species_id = shift;
+  my $files       = shift;
+  my $release_file   = shift;
+  my $verbose       = shift;
+
+  my $file = @{$files}[0];
+
+  next if (/^File:/);   # skip header
+
+  my @xrefs;
+
+  local $/ = "\n>";
+
+  my $file_io = $self->get_filehandle($file);
+
+  if ( !defined $file_io ) {
+      print STDERR "Could not open $file\n";
+      return 1;
+  }
+
+  while ( $_ = $file_io->getline() ) {
+    my $xref;
+
+    my ($header, $sequence) = $_ =~ /^>?(.+?)\n([^>]*)/s or warn("Can't parse FASTA entry: $_\n");
+
+    # deconstruct header - just use first part
+    my ($accession, $symbol, $description, $chr, $start, $end) = split /\|/, $header;
+    if ($symbol eq "") { $symbol = "$accession" ; }
+
+    # make sequence into one long string
+    $sequence =~ s/\n//g;
+
+    # build the xref object and store it
+    $xref->{ACCESSION}     = $accession;
+    $xref->{LABEL}         = $symbol;
+    $xref->{DESCRIPTION}   = $description;
+    $xref->{SEQUENCE}      = $sequence;
+    $xref->{SOURCE_ID}     = $source_id;
+    $xref->{SPECIES_ID}    = $species_id;
+    $xref->{SEQUENCE_TYPE} = 'peptide';
+    $xref->{STATUS}        = 'manual annotation';
+
+    push @xrefs, $xref;
+
+  }
+
+  $file_io->close();
+
+
+  XrefParser::BaseParser->upload_xref_object_graphs(\@xrefs);
+
+  print scalar(@xrefs) . " Aedes CAP xrefs succesfully parsed\n" if($verbose);
+
+  return 0;
+}
+
+1;
-- 
GitLab