From 54966aa4cb100cac0726b353d583d05c4b382696 Mon Sep 17 00:00:00 2001 From: Karyn Megy <kmegy@sanger.ac.uk> Date: Wed, 5 Jul 2006 16:46:44 +0000 Subject: [PATCH] New parser for AedesGenBank xrefs --- .../XrefParser/AedesGenBankParser.pm | 95 +++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 misc-scripts/xref_mapping/XrefParser/AedesGenBankParser.pm diff --git a/misc-scripts/xref_mapping/XrefParser/AedesGenBankParser.pm b/misc-scripts/xref_mapping/XrefParser/AedesGenBankParser.pm new file mode 100644 index 0000000000..5769b8da38 --- /dev/null +++ b/misc-scripts/xref_mapping/XrefParser/AedesGenBankParser.pm @@ -0,0 +1,95 @@ +package XrefParser::AedesGenBankParser; + +use strict; +use File::Basename; + +use XrefParser::BaseParser; + +use vars qw(@ISA); +@ISA = qw(XrefParser::BaseParser); + +#Aedes GenBank protein - because not yet in UniProt +#>EAT48991.1 +#MGKSKAHRIKGLTGPKMSLGDQITEGRVSKKPKAPKIRLRAEEEEFVDSRTTKKILQQAR +#KQQAELNLLDDSFGPSLAESAAAASVGKRRHRLGDAASSDESDEEYREEADVDGQDFFDD +#IKINEEDERALEMFQNKDGVKTRTLADLIMDKITEKQTEIQTQFSDTGSLKMEEIDPRVR + +sub run { + + my ($self, $file, $source_id, $species_id) = @_; + my $cpt = 0 ; + + next if (/^File:/); # skip header + + my @xrefs; + + local $/ = "\n>"; + + if(!open(FILE,"<".$file)){ + print "Could not open $file\n"; + return 1; + } + while (<FILE>) { + + my $xref; + + my ($header, $sequence) = $_ =~ /^>?(.+?)\n([^>]*)/s or warn("Can't parse FASTA entry: $_\n"); + #print "My header is -$header-\n" ; + #print "My sequence is -$sequence-\n" ; + + if ($header eq "") { + $header = "Aedes_GenBank".$cpt ; + print STDERR "One sequence with a random name ... \n" ; + $cpt++ ; + } + + # deconstruct header - just use first part + #my ($accession, $description) = split /\|/, $header; #if description + my $accession = $header; #if no description + + + + # make sequence into one long string + $sequence =~ s/\n//g; + + # build the xref object and store it + #print "ACCESSION & LABEL are $accession\n" ; + #print "SEQUENCE is $sequence\n" ; + #print "SOURCE_ID is $source_id\n" ; + #print "SPECIES_ID is $species_id\n" ; + #print "SEQUENCE_TYPE is peptide!\n"; + #print "STATUS is experimental!\n" ; + + $xref->{ACCESSION} = $accession; + $xref->{LABEL} = $accession; + #$xref->{DESCRIPTION} = $description; + $xref->{SEQUENCE} = $sequence; + $xref->{SOURCE_ID} = $source_id; + $xref->{SPECIES_ID} = $species_id; + $xref->{SEQUENCE_TYPE} = 'peptide'; + $xref->{STATUS} = 'experimental'; + + push @xrefs, $xref; + + } + + close (FILE); + + print scalar(@xrefs) . " AedesGenBank xrefs succesfully parsed\n"; + + XrefParser::BaseParser->upload_xref_object_graphs(\@xrefs); + + print "Done\n"; + return 0; +} + + +sub new { + + my $self = {}; + bless $self, "XrefParser::AedesGenBankParser"; + return $self; + +} + +1; -- GitLab