Skip to content
Snippets Groups Projects
Commit 1d06c3ed authored by Ian Longden's avatar Ian Longden
Browse files

SGD stuff for yeast added

parent c1325273
No related branches found
No related tags found
No related merge requests found
package XrefParser::SGDParser;
use strict;
use POSIX qw(strftime);
use File::Basename;
use XrefParser::BaseParser;
use vars qw(@ISA);
@ISA = qw(XrefParser::BaseParser);
# --------------------------------------------------------------------------------
# Parse command line and run if being run directly
if (!defined(caller())) {
if (scalar(@ARGV) != 1) {
print "\nUsage: SGDParser.pm file <source_id> <species_id>\n\n";
exit(1);
}
run($ARGV[0]);
}
sub run {
my $self = shift if (defined(caller(1)));
my $file = shift;
my $source_id = shift;
my $species_id = shift;
if(!defined($source_id)){
$source_id = XrefParser::BaseParser->get_source_id_for_filename($file);
}
if(!defined($species_id)){
$species_id = XrefParser::BaseParser->get_species_id_for_filename($file);
}
if(!open(SGD,"<".$file)){
print "ERROR: Could not open $file\n";
return 1; # 1 is an error
}
my $xref_count =0;
my $syn_count =0;
while (<SGD>) {
chomp;
my ($locus_name, $alias_name, $desc, $gene_prod, $phenotype, $orf_name, $sgd_id) = split(/\t/,$_);
my (@syn) = split(/\|/,$alias_name);
$self->add_xref($sgd_id,"",$locus_name,$desc,$source_id,$species_id);
$xref_count++;
foreach my $synonym (@syn){
$self->add_to_syn($sgd_id, $source_id, $synonym);
$syn_count++;
}
}
print $xref_count." SGD Xrefs added with $syn_count synonyms\n";
return 0; #successful
}
sub new {
my $self = {};
bless $self, "XrefParser::SGDParser";
return $self;
}
1;
......@@ -51,6 +51,8 @@ INSERT INTO species (species_id, taxonomy_id, name, aliases) VALUES (8090, 8090,
INSERT INTO source VALUES (1020, 'MIM', 1, 'Y', 10, 1, "");
INSERT INTO source VALUES (2000, 'CCDS', 1, 'Y', 10, 1, "");
INSERT INTO source VALUES (1110, 'EntrezGene', 1, 'Y', 10, 1, "");
INSERT INTO source VALUES (1250, 'SGD', 1, 'Y',10, 1, "");
INSERT INTO source VALUES (1, "Uniprot/SWISSPROT", 1, 'Y',20,1, "" );
......@@ -96,9 +98,6 @@ INSERT INTO source VALUES (1200, 'RGD', 1, 'Y',30, 1, "");
INSERT INTO source VALUES (1300, 'Interpro', 1, 'Y', 30, 1, "");
INSERT INTO source VALUES (1400, 'ZFIN_ID', 1, 'Y', 30, 1, "");
INSERT INTO source VALUES (1250, 'SGD', 1, 'N',30, 1, "");
#INSERT INTO source VALUES (2400, 'WormBase', 1, 'Y',50, 1, "");
INSERT INTO source VALUES (2400, 'wormpep_id', 1, 'Y', 50, 1, "");
INSERT INTO source VALUES (2410, 'wormbase_gene', 1, 'N',50, 1, "");
......@@ -169,6 +168,7 @@ INSERT INTO source VALUES (5010, 'Illumina', 1, 'Y', 50, 1, "");
# Codelink
INSERT INTO source VALUES (5020, 'Codelink', 1, 'Y', 50, 1, "");
################################################################################
# Files to fetch data from
......@@ -1284,10 +1284,15 @@ INSERT INTO source_url (source_id, species_id, url, file_modified_date, upload_d
# -----------------------------------------------------------------------------------
#### Yeast
## EmtrezGene
## EntrezGene
INSERT INTO source_url (source_id, species_id, url, file_modified_date, upload_date, parser)\
VALUES (1110, 4932, 'ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene_info.gz', now(), now(), "EntrezGeneParser");
INSERT INTO source_url (source_id, species_id, url, file_modified_date, upload_date, parser)\
VALUES (1250, 4932, 'ftp://genome-ftp.stanford.edu/pub/yeast/gene_registry/registry.genenames.tab', now(), now(), "SGDParser");
## Uniprot
INSERT INTO source_url (source_id, species_id, url, file_modified_date, upload_date, parser) VALUES\
(1, 4932, 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/knowledgebase/uniprot_sprot.dat.gz', now(), now(), "UniProtParser");
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment