From ed29fc7abb76f2306dbb29ac0994aaaf1a836fc9 Mon Sep 17 00:00:00 2001
From: Karyn Megy <kmegy@sanger.ac.uk>
Date: Fri, 17 Dec 2010 17:17:21 +0000
Subject: [PATCH] VectorBase Ribo.Prot.Gen DB parser

---
 .../XrefParser/VBribosomalParser.pm           | 81 +++++++++++++++++++
 1 file changed, 81 insertions(+)
 create mode 100644 misc-scripts/xref_mapping/XrefParser/VBribosomalParser.pm

diff --git a/misc-scripts/xref_mapping/XrefParser/VBribosomalParser.pm b/misc-scripts/xref_mapping/XrefParser/VBribosomalParser.pm
new file mode 100644
index 0000000000..19331b83e3
--- /dev/null
+++ b/misc-scripts/xref_mapping/XrefParser/VBribosomalParser.pm
@@ -0,0 +1,81 @@
+package XrefParser::VBribosomalParser;
+
+use strict;
+use POSIX qw(strftime);
+use File::Basename;
+use base qw( XrefParser::BaseParser );
+
+# Parse the external description file
+#
+# RPG_DB ID     stable_id       feature Name    Description
+# MOS10001	AGAP011777	gene	RpsA	40S ribosomal protein SA
+# MOS10002	AGAP003768	gene	Rps2	40S ribosomal protein S2
+# MOS10003	AGAP001910	gene	Rps3	40S ribosomal protein S3
+# ...
+
+if (!defined(caller())) {
+
+  if (scalar(@ARGV) != 1) {
+    print STDERR "\nUsage: VBribosomalParser.pm.pm file <source_id> <species_id>\n\n";
+    exit(1);
+  }
+
+  run($ARGV[0]);
+
+}
+
+sub run {
+
+  my $self = shift if (defined(caller(1)));
+
+  my $source_id = shift;
+  my $species_id = shift;
+  my $files       = shift;
+  my $release_file   = shift;
+  my $verbose       = shift;
+
+  my $file = @{$files}[0];
+
+  print "source_id = $source_id, species= $species_id, file = $file\n" if($verbose);
+
+  if(!defined($source_id)){
+    $source_id = XrefParser::BaseParser->get_source_id_for_filename($file);
+  }
+  if(!defined($species_id)){
+    $species_id = XrefParser::BaseParser->get_species_id_for_filename($file);
+  }
+
+  my $added = 0;
+  my $count = 0;
+
+  my $file_io = $self->get_filehandle($file);
+
+  if ( !defined $file_io ) {
+    print STDERR "ERROR: Could not open file $file\n";
+    return 1;
+  }
+
+  while ( my $line = $file_io->getline() ) {
+    if ($line !~ /^#/) {
+      chomp $line;
+      my ($DB_id, $gene_id, $rien, $name, $description) = split("\t",$line);  #and use the gene_id as accession
+      my $descr_full = "$DB_id - $description ($name)\n" ;
+
+      my $xref_id = $self->get_xref($gene_id,$source_id, $species_id);
+      if(!defined($xref_id)){
+	$xref_id = $self->add_xref($gene_id,"", $gene_id, $descr_full, $source_id, $species_id, "DIRECT");
+	$count++;
+      }
+      if(defined($gene_id) and $gene_id ne "-"){
+	$self->add_direct_xref($xref_id, $gene_id, "Gene", "") ;
+	$added++;
+      }	
+    }
+
+    $file_io->close();
+
+    print "Added $count xrefs and $added Direct xrefs to genes for VBribosomal\n" if($verbose);
+    return 0;
+  }
+}
+1; 
-- 
GitLab