From 1d06c3ed1d5bfc45b2a568481f6def43e4a3b6ee Mon Sep 17 00:00:00 2001 From: Ian Longden <ianl@sanger.ac.uk> Date: Tue, 19 Dec 2006 15:18:55 +0000 Subject: [PATCH] SGD stuff for yeast added --- .../xref_mapping/XrefParser/SGDParser.pm | 77 +++++++++++++++++++ .../xref_mapping/sql/populate_metadata.sql | 13 +++- 2 files changed, 86 insertions(+), 4 deletions(-) create mode 100644 misc-scripts/xref_mapping/XrefParser/SGDParser.pm diff --git a/misc-scripts/xref_mapping/XrefParser/SGDParser.pm b/misc-scripts/xref_mapping/XrefParser/SGDParser.pm new file mode 100644 index 0000000000..d6beb15afd --- /dev/null +++ b/misc-scripts/xref_mapping/XrefParser/SGDParser.pm @@ -0,0 +1,77 @@ +package XrefParser::SGDParser; + +use strict; +use POSIX qw(strftime); +use File::Basename; + +use XrefParser::BaseParser; + +use vars qw(@ISA); +@ISA = qw(XrefParser::BaseParser); + + +# -------------------------------------------------------------------------------- +# Parse command line and run if being run directly + +if (!defined(caller())) { + + if (scalar(@ARGV) != 1) { + print "\nUsage: SGDParser.pm file <source_id> <species_id>\n\n"; + exit(1); + } + + run($ARGV[0]); + +} + +sub run { + + my $self = shift if (defined(caller(1))); + my $file = shift; + my $source_id = shift; + my $species_id = shift; + + if(!defined($source_id)){ + $source_id = XrefParser::BaseParser->get_source_id_for_filename($file); + } + if(!defined($species_id)){ + $species_id = XrefParser::BaseParser->get_species_id_for_filename($file); + } + + + + if(!open(SGD,"<".$file)){ + print "ERROR: Could not open $file\n"; + return 1; # 1 is an error + } + + my $xref_count =0; + my $syn_count =0; + + while (<SGD>) { + chomp; + my ($locus_name, $alias_name, $desc, $gene_prod, $phenotype, $orf_name, $sgd_id) = split(/\t/,$_); + + my (@syn) = split(/\|/,$alias_name); + $self->add_xref($sgd_id,"",$locus_name,$desc,$source_id,$species_id); + $xref_count++; + foreach my $synonym (@syn){ + $self->add_to_syn($sgd_id, $source_id, $synonym); + $syn_count++; + } + } + print $xref_count." SGD Xrefs added with $syn_count synonyms\n"; + return 0; #successful +} + + + +sub new { + + my $self = {}; + bless $self, "XrefParser::SGDParser"; + return $self; + +} + +1; diff --git a/misc-scripts/xref_mapping/sql/populate_metadata.sql b/misc-scripts/xref_mapping/sql/populate_metadata.sql index 8acb51841c..8a912cd1ea 100644 --- a/misc-scripts/xref_mapping/sql/populate_metadata.sql +++ b/misc-scripts/xref_mapping/sql/populate_metadata.sql @@ -51,6 +51,8 @@ INSERT INTO species (species_id, taxonomy_id, name, aliases) VALUES (8090, 8090, INSERT INTO source VALUES (1020, 'MIM', 1, 'Y', 10, 1, ""); INSERT INTO source VALUES (2000, 'CCDS', 1, 'Y', 10, 1, ""); INSERT INTO source VALUES (1110, 'EntrezGene', 1, 'Y', 10, 1, ""); +INSERT INTO source VALUES (1250, 'SGD', 1, 'Y',10, 1, ""); + INSERT INTO source VALUES (1, "Uniprot/SWISSPROT", 1, 'Y',20,1, "" ); @@ -96,9 +98,6 @@ INSERT INTO source VALUES (1200, 'RGD', 1, 'Y',30, 1, ""); INSERT INTO source VALUES (1300, 'Interpro', 1, 'Y', 30, 1, ""); INSERT INTO source VALUES (1400, 'ZFIN_ID', 1, 'Y', 30, 1, ""); -INSERT INTO source VALUES (1250, 'SGD', 1, 'N',30, 1, ""); - - #INSERT INTO source VALUES (2400, 'WormBase', 1, 'Y',50, 1, ""); INSERT INTO source VALUES (2400, 'wormpep_id', 1, 'Y', 50, 1, ""); INSERT INTO source VALUES (2410, 'wormbase_gene', 1, 'N',50, 1, ""); @@ -169,6 +168,7 @@ INSERT INTO source VALUES (5010, 'Illumina', 1, 'Y', 50, 1, ""); # Codelink INSERT INTO source VALUES (5020, 'Codelink', 1, 'Y', 50, 1, ""); + ################################################################################ # Files to fetch data from @@ -1284,10 +1284,15 @@ INSERT INTO source_url (source_id, species_id, url, file_modified_date, upload_d # ----------------------------------------------------------------------------------- #### Yeast -## EmtrezGene +## EntrezGene INSERT INTO source_url (source_id, species_id, url, file_modified_date, upload_date, parser)\ VALUES (1110, 4932, 'ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene_info.gz', now(), now(), "EntrezGeneParser"); +INSERT INTO source_url (source_id, species_id, url, file_modified_date, upload_date, parser)\ +VALUES (1250, 4932, 'ftp://genome-ftp.stanford.edu/pub/yeast/gene_registry/registry.genenames.tab', now(), now(), "SGDParser"); + + + ## Uniprot INSERT INTO source_url (source_id, species_id, url, file_modified_date, upload_date, parser) VALUES\ (1, 4932, 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/knowledgebase/uniprot_sprot.dat.gz', now(), now(), "UniProtParser"); -- GitLab