From 1d06c3ed1d5bfc45b2a568481f6def43e4a3b6ee Mon Sep 17 00:00:00 2001
From: Ian Longden <ianl@sanger.ac.uk>
Date: Tue, 19 Dec 2006 15:18:55 +0000
Subject: [PATCH] SGD stuff for yeast added

---
 .../xref_mapping/XrefParser/SGDParser.pm      | 77 +++++++++++++++++++
 .../xref_mapping/sql/populate_metadata.sql    | 13 +++-
 2 files changed, 86 insertions(+), 4 deletions(-)
 create mode 100644 misc-scripts/xref_mapping/XrefParser/SGDParser.pm

diff --git a/misc-scripts/xref_mapping/XrefParser/SGDParser.pm b/misc-scripts/xref_mapping/XrefParser/SGDParser.pm
new file mode 100644
index 0000000000..d6beb15afd
--- /dev/null
+++ b/misc-scripts/xref_mapping/XrefParser/SGDParser.pm
@@ -0,0 +1,77 @@
+package XrefParser::SGDParser;
+
+use strict;
+use POSIX qw(strftime);
+use File::Basename;
+
+use XrefParser::BaseParser;
+
+use vars qw(@ISA);
+@ISA = qw(XrefParser::BaseParser);
+
+
+# --------------------------------------------------------------------------------
+# Parse command line and run if being run directly
+
+if (!defined(caller())) {
+
+  if (scalar(@ARGV) != 1) {
+    print "\nUsage: SGDParser.pm file <source_id> <species_id>\n\n";
+    exit(1);
+  }
+
+  run($ARGV[0]);
+
+}
+
+sub run {
+
+  my $self = shift if (defined(caller(1)));
+  my $file = shift;
+  my $source_id = shift;
+  my $species_id = shift;
+
+  if(!defined($source_id)){
+    $source_id = XrefParser::BaseParser->get_source_id_for_filename($file);
+  }
+  if(!defined($species_id)){
+    $species_id = XrefParser::BaseParser->get_species_id_for_filename($file);
+  }
+  
+  
+
+  if(!open(SGD,"<".$file)){
+    print  "ERROR: Could not open $file\n";
+    return 1; # 1 is an error
+  }
+
+  my $xref_count =0;
+  my $syn_count =0;
+
+  while (<SGD>) {
+    chomp;
+    my ($locus_name, $alias_name, $desc, $gene_prod, $phenotype, $orf_name, $sgd_id) = split(/\t/,$_);
+
+    my (@syn) = split(/\|/,$alias_name);
+    $self->add_xref($sgd_id,"",$locus_name,$desc,$source_id,$species_id);
+    $xref_count++;
+    foreach my $synonym (@syn){
+      $self->add_to_syn($sgd_id, $source_id, $synonym);
+      $syn_count++;
+    }
+  }
+  print $xref_count." SGD Xrefs added with $syn_count synonyms\n";
+  return 0; #successful
+}
+
+
+
+sub new {
+
+  my $self = {};
+  bless $self, "XrefParser::SGDParser";
+  return $self;
+
+}
+ 
+1;
diff --git a/misc-scripts/xref_mapping/sql/populate_metadata.sql b/misc-scripts/xref_mapping/sql/populate_metadata.sql
index 8acb51841c..8a912cd1ea 100644
--- a/misc-scripts/xref_mapping/sql/populate_metadata.sql
+++ b/misc-scripts/xref_mapping/sql/populate_metadata.sql
@@ -51,6 +51,8 @@ INSERT INTO species (species_id, taxonomy_id, name, aliases) VALUES (8090, 8090,
 INSERT INTO source VALUES (1020, 'MIM', 1, 'Y', 10, 1, "");
 INSERT INTO source VALUES (2000, 'CCDS', 1, 'Y', 10, 1, "");
 INSERT INTO source VALUES (1110, 'EntrezGene', 1, 'Y', 10, 1, "");
+INSERT INTO source VALUES (1250, 'SGD', 1, 'Y',10, 1, "");
+
 
 
 INSERT INTO source VALUES (1,  "Uniprot/SWISSPROT", 1, 'Y',20,1, "" );
@@ -96,9 +98,6 @@ INSERT INTO source VALUES (1200, 'RGD', 1, 'Y',30, 1, "");
 INSERT INTO source VALUES (1300, 'Interpro', 1, 'Y', 30, 1, "");
 INSERT INTO source VALUES (1400, 'ZFIN_ID', 1, 'Y', 30, 1, "");
 
-INSERT INTO source VALUES (1250, 'SGD', 1, 'N',30, 1, "");
-
-
 #INSERT INTO source VALUES (2400, 'WormBase', 1, 'Y',50, 1, "");
 INSERT INTO source VALUES (2400, 'wormpep_id', 1, 'Y', 50, 1, "");
 INSERT INTO source VALUES (2410, 'wormbase_gene', 1, 'N',50, 1, "");
@@ -169,6 +168,7 @@ INSERT INTO source VALUES (5010, 'Illumina', 1, 'Y', 50, 1, "");
 # Codelink
 INSERT INTO source VALUES (5020, 'Codelink', 1, 'Y', 50, 1, "");
 
+
 ################################################################################
 # Files to fetch data from
 
@@ -1284,10 +1284,15 @@ INSERT INTO source_url (source_id, species_id, url, file_modified_date, upload_d
 # -----------------------------------------------------------------------------------
 #### Yeast
 
-##      EmtrezGene
+##      EntrezGene
 INSERT INTO source_url (source_id, species_id, url, file_modified_date, upload_date, parser)\
 VALUES (1110, 4932, 'ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene_info.gz', now(), now(), "EntrezGeneParser");
 
+INSERT INTO source_url (source_id, species_id, url, file_modified_date, upload_date, parser)\
+VALUES (1250, 4932, 'ftp://genome-ftp.stanford.edu/pub/yeast/gene_registry/registry.genenames.tab', now(), now(), "SGDParser");
+
+
+
 ## Uniprot
 INSERT INTO source_url (source_id, species_id, url, file_modified_date, upload_date, parser) VALUES\
  (1, 4932, 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/knowledgebase/uniprot_sprot.dat.gz', now(), now(), "UniProtParser");
-- 
GitLab