From 5f610bb0ef69e965c1135c1578f8354dd96f6df4 Mon Sep 17 00:00:00 2001
From: Karyn Megy <kmegy@sanger.ac.uk>
Date: Tue, 5 Oct 2010 14:46:54 +0000
Subject: [PATCH] Parse ImmunoDB data

---
 .../xref_mapping/XrefParser/ImmunoDBParser.pm | 79 +++++++++++++++++++
 1 file changed, 79 insertions(+)
 create mode 100644 misc-scripts/xref_mapping/XrefParser/ImmunoDBParser.pm

diff --git a/misc-scripts/xref_mapping/XrefParser/ImmunoDBParser.pm b/misc-scripts/xref_mapping/XrefParser/ImmunoDBParser.pm
new file mode 100644
index 0000000000..8f647862b9
--- /dev/null
+++ b/misc-scripts/xref_mapping/XrefParser/ImmunoDBParser.pm
@@ -0,0 +1,79 @@
+package XrefParser::ImmunoDBParser;
+
+use strict;
+use POSIX qw(strftime);
+use File::Basename; 
+use base qw( XrefParser::BaseParser );
+
+# --------------------------------------------------------------------------------
+# Parse command line and run if being run directly
+
+if (!defined(caller())) {
+
+  if (scalar(@ARGV) != 1) {
+    print STDERR "\nUsage: ImmunoDBParser.pm file <source_id> <species_id>\n\n";
+    exit(1);
+  }
+
+  run($ARGV[0]);
+
+}
+
+
+sub run {
+  my $self = shift if (defined(caller(1)));
+
+  my $source_id = shift;
+  my $species_id = shift;
+  my $files       = shift;
+  my $release_file   = shift;
+  my $verbose       = shift;
+
+  my $file = @{$files}[0];
+
+  print "source_id = $source_id, species = $species_id, file = $file\n" if($verbose);
+
+  if(!defined($source_id)){
+    $source_id = XrefParser::BaseParser->get_source_id_for_filename($file);
+  }
+  if(!defined($species_id)){
+    $species_id = XrefParser::BaseParser->get_species_id_for_filename($file);
+  }
+
+  my $added = 0;
+  my $count = 0;
+
+  my $file_io = $self->get_filehandle($file);
+
+  if ( !defined $file_io ) {
+    print STDERR "ERROR: Could not open file $file\n";
+    return 1;
+  }
+
+  while ( my $line = $file_io->getline() ) {
+    chomp $line;
+
+    my ($SPECIES,$gene_id, $acc, $family, $subfamily, $description) = split(",",$line);
+
+    my $full_description = $description."($family)" ;
+    if ($subfamily ne $family) { $full_description .= ", subfamily $subfamily" ;}
+    #$subfamily ~= /1-3-beta-D/1,3-beta-D/ ;
+
+    my $xref_id = $self->get_xref($acc,$source_id, $species_id);
+
+    if(!defined($xref_id)){
+      $xref_id = $self->add_xref($acc,"", $acc, $full_description, $source_id, $species_id, "DIRECT");
+      $count++;
+    }
+    if(defined($gene_id) and $gene_id ne "-"){
+      $self->add_direct_xref($xref_id, $gene_id, "Gene", "") ;
+      $added++;
+    }	
+  }
+
+  $file_io->close();
+
+  print "Added $count xrefs and $added Direct xrefs to genes for ImmunoDB\n" if($verbose);
+  return 0;
+}
+1;
-- 
GitLab