Skip to content
Snippets Groups Projects
Commit 94ce4a0b authored by Glenn Proctor's avatar Glenn Proctor
Browse files

Moved here from parent dir

parent 3a060cc3
No related branches found
No related tags found
No related merge requests found
# Populate the appropriate tables in an xref metadata database
################################################################################
# SPECIES
INSERT INTO species (taxonomy_id, name, aliases) VALUES (9606, 'homo_sapiens', 'human,hsapiens,homosapiens');
INSERT INTO species (taxonomy_id, name, aliases) VALUES (10090, 'mus_musculus', 'mouse,mmusculus,musmusculus');
INSERT INTO species (taxonomy_id, name, aliases) VALUES (10116, 'rattus_norvegicus', 'rat,rnovegicus,rattusnorvegicus');
INSERT INTO species (taxonomy_id, name, aliases) VALUES (31033, 'fugu_rubripes', 'pufferfish,fugu,frubripes,fugurubripes');
INSERT INTO species (taxonomy_id, name, aliases) VALUES (7165, 'anopheles_gambiae', 'mosquito,anopheles,agambiae,anophelesgambiae');
INSERT INTO species (taxonomy_id, name, aliases) VALUES (7227, 'drosophila_melanogaster', 'drosophila,dmelongaster,drosophilamelanogaster' );
INSERT INTO species (taxonomy_id, name, aliases) VALUES (6239, 'caenorhabditis_elegans', 'elegans,celegans,caenorhabditiselegans');
INSERT INTO species (taxonomy_id, name, aliases) VALUES (6238, 'caenorhabditis_briggsae', 'briggsae,cbriggsae,caenorhabditisbriggsae');
INSERT INTO species (taxonomy_id, name, aliases) VALUES (7955, 'danio_rerio', 'zebrafish,danio,drerio,daniorerio' );
INSERT INTO species (taxonomy_id, name, aliases) VALUES (9598, 'pan_troglodytes', 'chimp,chimpanzee,ptroglodytes,pantroglodytes');
INSERT INTO species (taxonomy_id, name, aliases) VALUES (9031, 'gallus_gallus', 'chicken,chick,ggallus,gallusgallus' );
INSERT INTO species (taxonomy_id, name, aliases) VALUES (99883, 'tetraodon_nigroviridis', 'tetraodon,tnigroviridis,tetraodonnigroviridis');
INSERT INTO species (taxonomy_id, name, aliases) VALUES (9913, 'bos_taurus', 'cow,btaurus,bostaurus');
INSERT INTO species (taxonomy_id, name, aliases) VALUES (9615, 'canis_familaris', 'dog,doggy,cfamiliaris,canisfamiliaris');
################################################################################
# SOURCES - types of data we can read
# "High level" sources that we will also download from (via source_url)
INSERT INTO source VALUES (1, "UniProtSwissProt", 1, 'Y',1);
INSERT INTO source VALUES (2, "UniProtSPTrEMBL", 1, 'Y',1);
INSERT INTO source VALUES (3, "RefSeq", 1, 'Y',1);
# Other sources - used to create dependent xrefs, but not to upload from
INSERT INTO source VALUES (1010, 'EMBL', 1, 'N', 2);
INSERT INTO source VALUES (1020, 'MIM', 1, 'N', 2);
INSERT INTO source VALUES (1030, 'PDB', 1, 'N', 2);
INSERT INTO source VALUES (1040, 'protein_id', 1, 'N', 2);
INSERT INTO source VALUES (1050, 'PUBMED', 1, 'N', 2);
INSERT INTO source VALUES (1060, 'MEDLINE', 1, 'N', 2);
INSERT INTO source VALUES (1100, 'LocusLink', 1, 'N', 2);
INSERT INTO source VALUES (1070, 'GO', 1, 'Y',2);
INSERT INTO source VALUES (1080, 'MarkerSymbol', 1, 'Y',2);
INSERT INTO source VALUES (1090, 'HUGO', 1, 'Y',2);
################################################################################
# Files to fetch data from
# --------------------------------------------------------------------------------
# UniProt (SwissProt & SPTrEMBL)
# Note currently no UniProt data for fugu, anopheles, c.briggsae or chicken.
###HUMAN
## uniprot
INSERT INTO source_url (source_id, species_id, url, checksum, file_modified_date, upload_date, parser) VALUES (1, 1,'ftp://ftp.ebi.ac.uk/pub/databases/SPproteomes/swissprot_files/proteomes/9606.SPC', '', now(), now(), "UniProtParser");
## refseq
INSERT INTO source_url (source_id, species_id, url, checksum, file_modified_date, upload_date, parser) VALUES (3, 1,'ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/mRNA_Prot/human.protein.gpff.gz', '', now(), now(), "RefSeqGPFFParser");
## refseq
INSERT INTO source_url (source_id, species_id, url, checksum, file_modified_date, upload_date, parser) VALUES (3, 1,'ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/mRNA_Prot/human.rna.fna.gz', '', now(), now(), "RefSeqParser");
## GO
INSERT INTO source_url (source_id, species_id, url, checksum, file_modified_date, upload_date, parser) VALUES (1070, 1,'ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/HUMAN/gene_association.goa_human.gz', '', now(), now(), "GOParser");
## HUGO
INSERT INTO source_url (source_id, species_id, url, checksum, file_modified_date, upload_date, parser) VALUES (1090, 1,'http://www.gene.ucl.ac.uk/public-files/nomen/ens4.txt http://www.gene.ucl.ac.uk/public-files/nomen/ens1.txt', '', now(), now(), "HUGOParser");
###MOUSE
## uniprot
INSERT INTO source_url (source_id, species_id, url, checksum, file_modified_date, upload_date, parser) VALUES (1, 2, 'ftp://ftp.ebi.ac.uk/pub/databases/SPproteomes/swissprot_files/proteomes/10090.SPC', '', now(), now(), "UniProtParser");
## refseq
INSERT INTO source_url (source_id, species_id, url, checksum, file_modified_date, upload_date, parser) VALUES (3, 2,'ftp://ftp.ncbi.nih.gov/refseq/M_musculus/mRNA_Prot/mouse.protein.gpff.gz', '', now(), now(), "RefSeqGPFFParser");
## refseq
INSERT INTO source_url (source_id, species_id, url, checksum, file_modified_date, upload_date, parser) VALUES (3, 2,'ftp://ftp.ncbi.nih.gov/refseq/M_musculus/mRNA_Prot/mouse.rna.fna.gz', '', now(), now(), "RefSeqParser");
## mgd (MGI -- MarkerSymbol)
INSERT INTO source_url (source_id, species_id, url, checksum, file_modified_date, upload_date, parser) VALUES (1080, 2,'ftp://ftp.informatics.jax.org/pub/reports/MRK_SwissProt_TrEMBL.rpt', '', now(), now(), "MGDParser");
## GO
INSERT INTO source_url (source_id, species_id, url, checksum, file_modified_date, upload_date, parser) VALUES (1070, 2,'ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/MOUSE/gene_association.goa_mouse.gz', '', now(), now(), "GOParser");
################################################################################
# Schema for internal-external database mappings (xrefs)
################################################################################
#
# General external annotation.
CREATE TABLE xref (
xref_id int unsigned not null auto_increment,
accession varchar(255) not null,
label varchar(255),
description varchar(255),
source_id int unsigned not null,
species_id int unsigned not null,
PRIMARY KEY (xref_id),
UNIQUE acession_idx(accession,source_id)
);
################################################################################
CREATE TABLE primary_xref (
xref_id int unsigned not null,
sequence mediumtext,
sequence_type enum('dna','peptide'),
status enum('experimental','predicted'),
source_id int unsigned not null,
PRIMARY KEY (xref_id)
);
################################################################################
CREATE TABLE dependent_xref (
master_xref_id int unsigned not null,
dependent_xref_id int unsigned not null,
linkage_annotation varchar(255),
source_id int unsigned not null,
KEY master_idx(master_xref_id),
KEY dependent_idx(dependent_xref_id)
);
################################################################################
CREATE TABLE synonym (
xref_id int unsigned not null,
synonym_xref_id int unsigned not null,
source_id int unsigned not null,
KEY xref_idx(xref_id)
);
################################################################################
CREATE TABLE source (
source_id int unsigned not null auto_increment,
name varchar(255) not null,
release varchar(255),
download enum('Y', 'N') default 'Y',
ordered int unsigned not null,
PRIMARY KEY (source_id),
KEY name_idx(name)
);
################################################################################
CREATE TABLE source_url (
source_url_id int unsigned not null auto_increment,
source_id int unsigned not null,
species_id int unsigned not null,
url varchar(255),
checksum varchar(255),
file_modified_date datetime,
upload_date datetime,
parser varchar(255),
PRIMARY KEY (source_url_id),
KEY source_idx(source_id)
);
################################################################################
CREATE TABLE direct_xref (
general_xref_id int unsigned not null,
ensembl_stable_id varchar(255),
type enum('gene','transcript','translation'),
linkage_xref varchar(255),
KEY primary_idx(general_xref_id),
KEY ensembl_idx(ensembl_stable_id)
);
################################################################################
CREATE TABLE species (
species_id int unsigned not null auto_increment,
taxonomy_id int unsigned not null,
name varchar(255) not null,
aliases varchar(255),
PRIMARY KEY(species_id),
KEY taxonomy_idx(taxonomy_id),
KEY name_idx(name)
);
################################################################################
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment