From 8abf16e483b8f09637bbf247bac54129f8fc54c5 Mon Sep 17 00:00:00 2001
From: Arnaud Kerhornou <arnaud@ebi.ac.uk>
Date: Wed, 3 Oct 2012 14:26:33 +0000
Subject: [PATCH] PArser to import Phytozome xrefs for Glycine max

---
 .../XrefParser/PhytozomeGmaxParser.pm         | 92 +++++++++++++++++++
 1 file changed, 92 insertions(+)
 create mode 100644 misc-scripts/xref_mapping/XrefParser/PhytozomeGmaxParser.pm

diff --git a/misc-scripts/xref_mapping/XrefParser/PhytozomeGmaxParser.pm b/misc-scripts/xref_mapping/XrefParser/PhytozomeGmaxParser.pm
new file mode 100644
index 0000000000..e2a59886ea
--- /dev/null
+++ b/misc-scripts/xref_mapping/XrefParser/PhytozomeGmaxParser.pm
@@ -0,0 +1,92 @@
+package XrefParser::PhytozomeGmaxParser;
+
+use strict;
+use warnings;
+use Carp;
+use POSIX qw(strftime);
+use File::Basename;
+
+use base qw( XrefParser::BaseParser );
+
+# --------------------------------------------------------------------------------
+# Parse command line and run if being run directly
+
+sub run {
+
+ my ($self, $ref_arg) = @_;
+  my $source_id    = $ref_arg->{source_id};
+  my $species_id   = $ref_arg->{species_id};
+  my $files        = $ref_arg->{files};
+  my $verbose      = $ref_arg->{verbose};
+
+  if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){
+    croak "Need to pass source_id, species_id and  files as pairs";
+  }
+  $verbose |=0;
+
+  my $file = @{$files}[0];
+
+  my $gene_source_id = $self->get_source_id_for_source_name("PHYTOZOME_GMAX_GENE");
+
+  my $gmax_io = $self->get_filehandle($file);
+
+  if ( !defined $gmax_io ) {
+    print STDERR "ERROR: Could not open $file\n";
+    return 1;    # 1 is an error
+  }
+
+  my $xref_count =0;
+  my $syn_count =0;
+
+  while ( $_ = $gmax_io->getline() ) {
+
+    chomp;
+
+    if ($_ =~ /^([^\t]+)\t[^\t]*\t[^\t]*\t[^\t]*\t[^\t]*\t[^\t]*\t[^\t]*\t[^\t]*\t([^\t]*)/) {
+	my ($gmax_id, $desc) = ($1,$2);
+	
+	if ($verbose) {
+	    #print STDERR "gmax_id, $gmax_id\n";
+	    #print STDERR "desc: $desc\n";
+	}
+	
+	my $locus_name = $gmax_id;
+	my $gene_xref_id = undef;
+	
+	if ((defined $desc) && ($desc ne "")) {
+	    $gene_xref_id = $self->add_xref({ acc        => $gmax_id,
+						 label      => $locus_name,
+						 desc       => $desc,
+						 source_id  => $gene_source_id,
+						 species_id => $species_id,
+						 info_type  => "DIRECT"} );
+	}
+	else {
+	    
+	    # no description given
+	    
+	    $gene_xref_id = $self->add_xref({ acc        => $gmax_id,
+						 label      => $locus_name,
+						 source_id  => $gene_source_id,
+						 species_id => $species_id,
+						 info_type  => "DIRECT"} );
+	}
+	$self->add_direct_xref($gene_xref_id, $gmax_id, "Gene", "DIRECT");
+	
+	$xref_count++;
+	
+    }
+    else {
+	if ($verbose) {
+	    print STDERR "failed to parse line, $_\n\n";
+	}
+    }
+  }
+ 
+ $gmax_io->close();
+ 
+ print $xref_count." Phytozome_GMAX Xrefs added\n" if($verbose);
+ return 0; #successful
+}
+
+1;
-- 
GitLab