From e58730a7dc8b66b268af922c5f9b7e744cc31f7a Mon Sep 17 00:00:00 2001 From: Karyn Megy <kmegy@sanger.ac.uk> Date: Fri, 11 Dec 2009 13:00:08 +0000 Subject: [PATCH] New Xrefs parser for Aedes aegypti (CAP annotation) --- .../xref_mapping/XrefParser/AedesCAPParser.pm | 82 +++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 misc-scripts/xref_mapping/XrefParser/AedesCAPParser.pm diff --git a/misc-scripts/xref_mapping/XrefParser/AedesCAPParser.pm b/misc-scripts/xref_mapping/XrefParser/AedesCAPParser.pm new file mode 100644 index 0000000000..33e72cefbb --- /dev/null +++ b/misc-scripts/xref_mapping/XrefParser/AedesCAPParser.pm @@ -0,0 +1,82 @@ +package XrefParser::AedesCAPParser; + +use strict; +use File::Basename; + +use base qw( XrefParser::BaseParser ); + +# Aedes CAP database dump - FASTA format +# >... +# +# +# + + +# Anopheles one: +# >ANXB10B|Annexin B10B +# MSWYYTPHPTVVPAEDFDASADANALRKAMKGFGTDEQAIIDILCARSNGQRQEIAEAFKRELGRDLIDDLKSELGGKFEDVILGLMLRPEAYLCKQLHKAMDGIGTDEKSLIEII +# CPQTNDQIRAIVDCYEEMYSRPLAEHLCSETSGSFRRLLTMIIVGSRDPQGTVDPELAVEQAKQLYDAGEGKLGTDEEVFYKILAHASFDQLEIVFEEYKSLSGRTIEQALKAELS +# GELYDALSAIVECVQMAPHFFAKRLHKAMDGVGTDDATLIRIIVSRSEIDLQNIKDEFEQMYNKTLVSAVRSETSGDYKRALCALIGNA + +sub run { + + my $self = shift if (defined(caller(1))); + + my $source_id = shift; + my $species_id = shift; + my $files = shift; + my $release_file = shift; + my $verbose = shift; + + my $file = @{$files}[0]; + + next if (/^File:/); # skip header + + my @xrefs; + + local $/ = "\n>"; + + my $file_io = $self->get_filehandle($file); + + if ( !defined $file_io ) { + print STDERR "Could not open $file\n"; + return 1; + } + + while ( $_ = $file_io->getline() ) { + my $xref; + + my ($header, $sequence) = $_ =~ /^>?(.+?)\n([^>]*)/s or warn("Can't parse FASTA entry: $_\n"); + + # deconstruct header - just use first part + my ($accession, $symbol, $description, $chr, $start, $end) = split /\|/, $header; + if ($symbol eq "") { $symbol = "$accession" ; } + + # make sequence into one long string + $sequence =~ s/\n//g; + + # build the xref object and store it + $xref->{ACCESSION} = $accession; + $xref->{LABEL} = $symbol; + $xref->{DESCRIPTION} = $description; + $xref->{SEQUENCE} = $sequence; + $xref->{SOURCE_ID} = $source_id; + $xref->{SPECIES_ID} = $species_id; + $xref->{SEQUENCE_TYPE} = 'peptide'; + $xref->{STATUS} = 'manual annotation'; + + push @xrefs, $xref; + + } + + $file_io->close(); + + + XrefParser::BaseParser->upload_xref_object_graphs(\@xrefs); + + print scalar(@xrefs) . " Aedes CAP xrefs succesfully parsed\n" if($verbose); + + return 0; +} + +1; -- GitLab