-
Glenn Proctor authored
Renamed external_db.display_label to db_display_name to distinguish it from similarly-named column in xref.
41512af6
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
table.sql 40.82 KiB
# revisited schema naming issues
# Author: Arne Stabenau
# Date: 12.11.2001
#
# Glenn Proctor July 2003 - adapted for new schema structure
#
# Note that more information about each table can be found in
# ensembl/docs/schema_description/
# Conventions:
# - use lower case and underscores
# - internal ids are integers named tablename_id
# - same name is given in foreign key relations
CREATE TABLE affy_feature (
affy_feature_id INT NOT NULL auto_increment,
seq_region_id INT UNSIGNED NOT NULL,
seq_region_start INT NOT NULL,
seq_region_end INT NOT NULL,
seq_region_strand TINYINT NOT NULL,
mismatches TINYINT,
affy_probe_id INT NOT NULL,
analysis_id INT NOT NULL,
PRIMARY KEY (affy_feature_id),
KEY seq_region_idx( seq_region_id, seq_region_start ),
KEY probe_idx( affy_probe_id )
) COLLATE=latin1_swedish_ci;
CREATE TABLE affy_probe (
affy_probe_id INT NOT NULL auto_increment,
affy_array_id INT NOT NULL,
probeset VARCHAR(40),
name VARCHAR(20),
PRIMARY KEY ( affy_probe_id, affy_array_id ),
KEY probeset_idx( probeset ),
KEY array_idx( affy_array_id )
) COLLATE=latin1_swedish_ci;
CREATE TABLE affy_array (
affy_array_id INT NOT NULL auto_increment,
parent_array_id INT,
probe_setsize TINYINT NOT NULL,
name VARCHAR(40) NOT NULL,
PRIMARY KEY( affy_array_id )
) COLLATE=latin1_swedish_ci;
CREATE TABLE alt_allele (
alt_allele_id INT NOT NULL auto_increment,
gene_id INT NOT NULL,
UNIQUE gene_idx( gene_id ),
UNIQUE allele_idx( alt_allele_id, gene_id )
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'analysis'
#
# semantics:
# analysis_id - internal id
# created - date to distinguish newer and older versions off the
# same analysis. Not well maintained so far.
# logic_name string to identify the analysis. Used mainly inside pipeline.
# db, db_version, db_file
# - db should be a database name, db version the version of that db
# db_file the file system location of that database,
# probably wiser to generate from just db and configurations
# program, program_version,program_file
# - The binary used to create a feature. Similar semantic to above
# module, module_version
# - Perl module names (RunnableDBS usually) executing this analysis.
# parameters a paramter string which is processed by the perl module
# gff_source, gff_feature
# - how to make a gff dump from features with this analysis
CREATE TABLE analysis (
analysis_id int(10) unsigned NOT NULL auto_increment,
created datetime DEFAULT '0000-00-00 00:00:00' NOT NULL,
logic_name varchar(40) not null,
db varchar(120),
db_version varchar(40),
db_file varchar(120),
program varchar(80),
program_version varchar(40),
program_file varchar(80),
parameters varchar(255),
module varchar(80),
module_version varchar(40),
gff_source varchar(40),
gff_feature varchar(40),
PRIMARY KEY (analysis_id),
KEY logic_name_idx( logic_name ),
UNIQUE(logic_name)
) COLLATE=latin1_swedish_ci;
CREATE TABLE analysis_description (
analysis_id int(10) unsigned NOT NULL,
description text,
display_label varchar(255),
KEY analysis_idx( analysis_id )
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'dna'
#
# This table stores DNA sequence.
CREATE TABLE dna (
seq_region_id int unsigned NOT NULL,
sequence mediumtext NOT NULL,
PRIMARY KEY (seq_region_id)
) MAX_ROWS = 750000 AVG_ROW_LENGTH = 19000;
################################################################################
#
# Table structure for table 'dnac'
#
# Contains equivalent data to dna table, but 4 letters of DNA code are represented
# by a single binary character, based on 2 bit encoding
# do not need to worry about ambiguity of length, since this is stored in contig.length
# n_line column contains start-end pairs of coordinates in the string that are really Ns
CREATE TABLE dnac (
seq_region_id int unsigned NOT NULL,
sequence mediumblob NOT NULL,
n_line text,
PRIMARY KEY (seq_region_id)
) MAX_ROWS = 750000 AVG_ROW_LENGTH = 19000;
################################################################################
#
# Table structure for table 'exon'
#
# Note seq_region_start always less that seq_region_end, i.e.
# when the exon is on the other strand the seq_region_start
# is specifying the 3prime end of the exon.
CREATE TABLE exon (
exon_id int unsigned NOT NULL auto_increment,
seq_region_id int(10) unsigned NOT NULL, # foreign key, seq_region:seq_region_id
seq_region_start int(10) unsigned NOT NULL, # start of exon within seq_region
seq_region_end int(10) unsigned NOT NULL, # end of exon within specified seq_region
seq_region_strand tinyint(2) NOT NULL, # 1 or -1 depending on the strand of the exon
phase tinyint(2) NOT NULL,
end_phase tinyint(2) NOT NULL,
PRIMARY KEY (exon_id),
KEY seq_region_idx (seq_region_id, seq_region_start )
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'exon_stable_id'
#
CREATE TABLE exon_stable_id (
exon_id int unsigned not null, # foreign key exon:exon_id
stable_id VARCHAR(128) not null,
version int(10),
created_date DATETIME NOT NULL,
modified_date DATETIME NOT NULL,
PRIMARY KEY( exon_id ),
UNIQUE( stable_id, version )
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'exon_transcript'
#
# Note that the rank column indicates the 5prime to 3prime position of the
# exon within the transcript ie rank of 1 means the exon is the 5prime most
# within this transcript
CREATE TABLE exon_transcript (
exon_id INT unsigned NOT NULL, # foreign key exon:exon_id
transcript_id INT unsigned NOT NULL, # foregin key transcript:transcript_id
rank int(10) NOT NULL,
PRIMARY KEY (exon_id,transcript_id,rank),
KEY transcript (transcript_id),
KEY exon ( exon_id )
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'simple_feature'
#
CREATE TABLE simple_feature (
simple_feature_id int unsigned not null auto_increment,
seq_region_id int(10) unsigned NOT NULL,
seq_region_start int(10) unsigned NOT NULL,
seq_region_end int(10) unsigned NOT NULL,
seq_region_strand tinyint(1) NOT NULL,
display_label varchar(40) NOT NULL,
analysis_id int(10) unsigned NOT NULL,
score double,
PRIMARY KEY ( simple_feature_id ),
KEY seq_region_idx (seq_region_id, seq_region_start ),
KEY analysis_idx( analysis_id ),
KEY hit_idx( display_label )
) MAX_ROWS=100000000 AVG_ROW_LENGTH=80;
################################################################################
#
# Table structure for table 'protein_align_feature'
#
CREATE TABLE protein_align_feature (
protein_align_feature_id int unsigned not null auto_increment,
seq_region_id int(10) unsigned NOT NULL,
seq_region_start int(10) unsigned NOT NULL,
seq_region_end int(10) unsigned NOT NULL,
seq_region_strand tinyint(1) DEFAULT '1' NOT NULL,
hit_start int(10) NOT NULL,
hit_end int(10) NOT NULL,
hit_name varchar(40) NOT NULL,
analysis_id int(10) unsigned NOT NULL,
score double,
evalue double,
perc_ident float,
cigar_line text,
PRIMARY KEY ( protein_align_feature_id ),
KEY seq_region_idx( seq_region_id, analysis_id, seq_region_start, score ),
KEY seq_region_idx_2( seq_region_id, seq_region_start),
KEY hit_idx( hit_name ),
KEY analysis_idx( analysis_id )
) MAX_ROWS=100000000 AVG_ROW_LENGTH=80;
################################################################################
#
# Table structure for table 'dna_align_feature'
#
CREATE TABLE dna_align_feature (
dna_align_feature_id int unsigned not null auto_increment,
seq_region_id int(10) unsigned NOT NULL,
seq_region_start int(10) unsigned NOT NULL,
seq_region_end int(10) unsigned NOT NULL,
seq_region_strand tinyint(1) NOT NULL,
hit_start int NOT NULL,
hit_end int NOT NULL,
hit_strand tinyint(1) NOT NULL,
hit_name varchar(40) NOT NULL,
analysis_id int(10) unsigned NOT NULL,
score double,
evalue double,
perc_ident float,
cigar_line text,
PRIMARY KEY ( dna_align_feature_id ),
KEY seq_region_idx( seq_region_id, analysis_id, seq_region_start, score ),
KEY seq_region_idx_2( seq_region_id, seq_region_start),
KEY hit_idx( hit_name ),
KEY analysis_idx( analysis_id )
) MAX_ROWS=100000000 AVG_ROW_LENGTH=80;
################################################################################
#
# Table structure for table 'repeat_consensus'
#
CREATE TABLE repeat_consensus (
repeat_consensus_id int unsigned NOT NULL auto_increment,
repeat_name varchar(255) NOT NULL,
repeat_class varchar(100) NOT NULL, # eg: SINE, LINE, DNA Transposon,
# Retroviral LTR, Satellite,Tandem
repeat_type varchar(40) NOT NULL,
repeat_consensus text,
PRIMARY KEY( repeat_consensus_id ),
KEY name (repeat_name),
KEY class (repeat_class),
KEY consensus(repeat_consensus(10)),
KEY type( repeat_type )
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'repeat_feature'
#
CREATE TABLE repeat_feature (
repeat_feature_id int unsigned NOT NULL auto_increment,
seq_region_id int(10) unsigned NOT NULL,
seq_region_start int(10) unsigned NOT NULL,
seq_region_end int(10) unsigned NOT NULL,
seq_region_strand tinyint(1) DEFAULT '1' NOT NULL,
repeat_start int(10) NOT NULL,
repeat_end int(10) NOT NULL,
repeat_consensus_id int(10) unsigned NOT NULL,
analysis_id int(10) unsigned NOT NULL,
score double,
PRIMARY KEY ( repeat_feature_id ),
KEY seq_region_idx( seq_region_id, seq_region_start ),
KEY repeat_idx( repeat_consensus_id ),
KEY analysis_idx( analysis_id )
) MAX_ROWS=100000000 AVG_ROW_LENGTH=80;
################################################################################
#
# Table structure for table 'gene'
#
CREATE TABLE gene (
gene_id int unsigned NOT NULL auto_increment,
biotype VARCHAR(40) NOT NULL,
analysis_id int,
seq_region_id int(10) unsigned NOT NULL,
seq_region_start int(10) unsigned NOT NULL,
seq_region_end int(10) unsigned NOT NULL,
seq_region_strand tinyint(2) NOT NULL,
display_xref_id int unsigned,
source VARCHAR(20) NOT NULL,
confidence enum( 'KNOWN', 'NOVEL', 'PUTATIVE', 'PREDICTED' ),
description text,
PRIMARY KEY (gene_id),
KEY seq_region_idx( seq_region_id, seq_region_start ),
KEY xref_id_index ( display_xref_id ),
KEY analysis_idx( analysis_id )
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'gene_stable_id'
#
CREATE TABLE gene_stable_id (
gene_id int unsigned not null, # foreign key gene:gene_id
stable_id VARCHAR(128) not null,
version int(10),
created_date DATETIME NOT NULL,
modified_date DATETIME NOT NULL,
PRIMARY KEY( gene_id ),
UNIQUE( stable_id, version )
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'supporting_feature'
#
CREATE TABLE supporting_feature (
exon_id int(11) DEFAULT '0' NOT NULL,
feature_type enum('dna_align_feature','protein_align_feature'),
feature_id int(11) DEFAULT '0' NOT NULL,
UNIQUE all_idx (exon_id,feature_type,feature_id),
KEY feature_idx (feature_type,feature_id)
) MAX_ROWS=100000000 AVG_ROW_LENGTH=80;
################################################################################
#
# Table structure for table 'transcript_supporting_feature'
#
CREATE TABLE transcript_supporting_feature (
transcript_id int(11) DEFAULT '0' NOT NULL,
feature_type enum('dna_align_feature','protein_align_feature'),
feature_id int(11) DEFAULT '0' NOT NULL,
UNIQUE all_idx (transcript_id,feature_type,feature_id),
KEY feature_idx (feature_type,feature_id)
) MAX_ROWS=100000000 AVG_ROW_LENGTH=80;
################################################################################
#
# Table structure for table 'transcript'
#
CREATE TABLE transcript (
transcript_id INT UNSIGNED NOT NULL auto_increment,
gene_id INT UNSIGNED NOT NULL, # foreign key gene:gene_id
seq_region_id int(10) unsigned NOT NULL,
seq_region_start int(10) unsigned NOT NULL,
seq_region_end int(10) unsigned NOT NULL,
seq_region_strand tinyint(2) NOT NULL,
display_xref_id int unsigned,
biotype VARCHAR(40) NOT NULL,
confidence enum( 'KNOWN', 'NOVEL', 'PUTATIVE', 'PREDICTED' ),
description text,
PRIMARY KEY (transcript_id),
KEY seq_region_idx( seq_region_id, seq_region_start ),
KEY gene_index (gene_id),
KEY xref_id_index ( display_xref_id )
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'transcript_stable_id'
#
CREATE TABLE transcript_stable_id (
transcript_id int unsigned not null, # foreign key transcript:transcript_id
stable_id VARCHAR(128) not null,
version int(10),
created_date DATETIME NOT NULL,
modified_date DATETIME NOT NULL,
PRIMARY KEY( transcript_id ),
UNIQUE( stable_id, version )
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'translation'
#
# The seq_start and seq_end are 1-based offsets into the *relative* coordinate
# system of start_exon_id and end_exon_id. i.e. if the translation starts at the
# first base of the exon, seq_start would be 1
CREATE TABLE translation (
translation_id INT UNSIGNED NOT NULL auto_increment,
transcript_id INT UNSIGNED NOT NULL,
seq_start INT(10) NOT NULL, # relative to exon start
start_exon_id INT UNSIGNED NOT NULL, # foreign key exon:exon_id
seq_end INT(10) NOT NULL, # relative to exon start
end_exon_id INT UNSIGNED NOT NULL, # foreign key exon:exon_id
PRIMARY KEY (translation_id),
KEY (transcript_id)
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'translation_stable_id'
#
CREATE TABLE translation_stable_id (
translation_id INT unsigned NOT NULL, # foreign key translation:translation_id
stable_id VARCHAR(128) NOT NULL,
version INT(10),
created_date DATETIME NOT NULL,
modified_date DATETIME NOT NULL,
PRIMARY KEY( translation_id ),
UNIQUE( stable_id, version )
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'assembly'
#
# This is a denormalised golden path.
# The data in this table defines the "static golden path", i.e. the
# best effort draft full genome sequence as determined by the UCSC or NCBI
# (depending which assembly you are using)
#
# Each row represents a component, e.g. a contig, (comp_seq_region_id,
# FK from seq_region table) at least part of which is present in the golden path.
# The part of the component that is in the path is delimited by fields cmp_start
# and cmp_end (start < end), and the absolute position within the golden path
# chromosome (or other appropriate assembled structure) (asm_seq_region_id) is
# given by asm_start and asm_end.
#
CREATE TABLE assembly (
asm_seq_region_id int unsigned NOT NULL,
cmp_seq_region_id int(10) unsigned NOT NULL,
asm_start int(10) NOT NULL,
asm_end int(10) NOT NULL,
cmp_start int(10) NOT NULL,
cmp_end int(10) NOT NULL,
ori tinyint NOT NULL,
KEY(cmp_seq_region_id),
KEY(asm_seq_region_id, asm_start)
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'protein_feature'
#
CREATE TABLE protein_feature (
protein_feature_id int(10) unsigned NOT NULL auto_increment,
translation_id int NOT NULL,
seq_start int(10) NOT NULL,
seq_end int(10) NOT NULL,
hit_start int(10) NOT NULL,
hit_end int(10) NOT NULL,
hit_id varchar(40) NOT NULL,
analysis_id int(10) unsigned NOT NULL,
score double NOT NULL,
evalue double,
perc_ident float,
PRIMARY KEY (protein_feature_id),
KEY (translation_id),
KEY hid_index ( hit_id ),
KEY analysis_idx( analysis_id )
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'interpro'
#
CREATE TABLE interpro (
interpro_ac varchar(40) NOT NULL,
id varchar(40) NOT NULL,
UNIQUE (interpro_ac, id),
KEY (id)
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'karyotype'
#
CREATE TABLE karyotype (
karyotype_id int unsigned NOT NULL auto_increment,
seq_region_id int unsigned NOT NULL,
seq_region_start int(10) NOT NULL,
seq_region_end int(10) NOT NULL,
band varchar(40) NOT NULL,
stain varchar(40) NOT NULL,
PRIMARY KEY (karyotype_id),
KEY region_band_idx (seq_region_id,band)
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'object_xref'
#
CREATE TABLE object_xref (
object_xref_id INT not null auto_increment,
ensembl_id int unsigned not null,
ensembl_object_type ENUM( 'RawContig', 'Transcript', 'Gene', 'Translation' ) not null,
xref_id INT unsigned not null,
UNIQUE ( ensembl_object_type, ensembl_id, xref_id ),
KEY oxref_idx( object_xref_id, xref_id, ensembl_object_type, ensembl_id ),
KEY xref_idx(xref_id, ensembl_object_type)
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'identity_xref'
#
CREATE TABLE identity_xref(
object_xref_id INT unsigned not null ,
query_identity int(5),
target_identity int(5),
hit_start int,
hit_end int,
translation_start int,
translation_end int,
cigar_line text,
score double,
evalue double,
analysis_id int,
PRIMARY KEY (object_xref_id),
KEY analysis_idx( analysis_id )
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'go_xref'
#
CREATE TABLE go_xref (
object_xref_id int(10) unsigned DEFAULT '0' NOT NULL,
linkage_type enum('IC', 'IDA', 'IEA', 'IEP', 'IGI', 'IMP',
'IPI', 'ISS', 'NAS', 'ND', 'TAS', 'NR') NOT NULL,
KEY (object_xref_id),
UNIQUE(object_xref_id, linkage_type)
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'xref'
#
CREATE TABLE xref (
xref_id INT unsigned not null auto_increment,
external_db_id int not null,
dbprimary_acc VARCHAR(40) not null,
display_label VARCHAR(40) not null,
version VARCHAR(10) DEFAULT '' NOT NULL,
description VARCHAR(255),
PRIMARY KEY( xref_id ),
UNIQUE KEY id_index( dbprimary_acc, external_db_id ),
KEY display_index ( display_label )
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'external_synonym'
#
CREATE TABLE external_synonym (
xref_id INT unsigned not null,
synonym VARCHAR(40) not null,
PRIMARY KEY( xref_id, synonym ),
KEY name_index( synonym )
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'external_db'
#
CREATE TABLE external_db (
external_db_id INT not null,
db_name VARCHAR(27) NOT NULL,
release VARCHAR(40) NOT NULL,
status ENUM ('KNOWNXREF','KNOWN','XREF','PRED','ORTH', 'PSEUDO') not null,
dbprimary_acc_linkable BOOLEAN DEFAULT 1 NOT NULL,
display_label_linkable BOOLEAN DEFAULT 0 NOT NULL,
priority INT NOT NULL,
db_display_name VARCHAR(255),
PRIMARY KEY( external_db_id )
) COLLATE=latin1_swedish_ci;
CREATE TABLE prediction_exon (
prediction_exon_id int unsigned not null auto_increment,
prediction_transcript_id int unsigned not null,
exon_rank smallint unsigned not null,
seq_region_id int unsigned not null,
seq_region_start int unsigned not null,
seq_region_end int unsigned not null,
seq_region_strand tinyint not null,
start_phase tinyint not null,
score double,
p_value double,
PRIMARY KEY( prediction_exon_id ),
KEY (prediction_transcript_id),
KEY ( seq_region_id, seq_region_start )
) COLLATE=latin1_swedish_ci;
CREATE TABLE prediction_transcript (
prediction_transcript_id int unsigned not null auto_increment,
seq_region_id int unsigned not null,
seq_region_start int unsigned not null,
seq_region_end int unsigned not null,
seq_region_strand tinyint not null,
analysis_id int,
display_label varchar(255),
PRIMARY KEY( prediction_transcript_id ),
KEY ( seq_region_id, seq_region_start ),
KEY analysis_idx( analysis_id )
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'meta'
#
CREATE TABLE meta (
meta_id INT not null auto_increment,
meta_key varchar( 40 ) not null,
meta_value varchar( 255 ) not null,
PRIMARY KEY( meta_id ),
KEY meta_key_index ( meta_key ),
KEY meta_value_index ( meta_value )
) COLLATE=latin1_swedish_ci;
# Auto add schema version to database
INSERT INTO meta (meta_key, meta_value) VALUES ("schema_version", "$Revision$");
################################################################################
#
# Table structure for table 'marker_synonym'
CREATE TABLE marker_synonym (
marker_synonym_id int unsigned not null auto_increment,
marker_id int unsigned not null, # foreign key marker:marker_id
source varchar(20),
name varchar(30),
PRIMARY KEY (marker_synonym_id),
KEY marker_synonym_idx (marker_synonym_id, name),
KEY marker_idx (marker_id)
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'marker'
CREATE TABLE marker (
marker_id int unsigned not null auto_increment,
display_marker_synonym_id int unsigned, #foreign key marker_synonym:marker_synonym_id
left_primer varchar(100) not null,
right_primer varchar(100) not null,
min_primer_dist int(10) unsigned not null,
max_primer_dist int(10) unsigned not null,
priority int,
type enum('est', 'microsatellite'),
PRIMARY KEY (marker_id),
KEY marker_idx (marker_id, priority)
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'marker_feature'
CREATE TABLE marker_feature (
marker_feature_id int unsigned not null auto_increment,
marker_id int unsigned not null, #foreign key marker:marker_id
seq_region_id int(10) unsigned NOT NULL, #foreign key contig:seq_region_id
seq_region_start int(10) unsigned NOT NULL,
seq_region_end int(10) unsigned NOT NULL,
analysis_id int(10) unsigned NOT NULL, #foreign key analysis:analysis_id
map_weight int(10) unsigned,
PRIMARY KEY (marker_feature_id),
KEY seq_region_idx (seq_region_id, seq_region_start ),
KEY analysis_idx( analysis_id )
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'marker_map_location'
CREATE TABLE marker_map_location (
marker_id int unsigned not null, #foreign key marker:marker_id
map_id int unsigned not null, #foreign key map:map_id
chromosome_name varchar(15) not null,
marker_synonym_id int unsigned not null, #foreign key marker_synonym:marker_synonym_id
position varchar(15) not null,
lod_score double,
PRIMARY KEY (marker_id, map_id),
KEY map_idx( map_id, chromosome_name, position)
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'map'
CREATE TABLE map (
map_id int unsigned not null auto_increment,
map_name varchar(30) not null,
PRIMARY KEY (map_id)
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'misc_feature'
#
CREATE TABLE misc_feature (
misc_feature_id int(10) unsigned NOT NULL auto_increment,
seq_region_id int(10) unsigned NOT NULL default '0',
seq_region_start int(10) unsigned NOT NULL default '0',
seq_region_end int(10) unsigned NOT NULL default '0',
seq_region_strand tinyint(4) NOT NULL default '0',
PRIMARY KEY (misc_feature_id),
KEY seq_region_idx( seq_region_id, seq_region_start )
) COLLATE=latin1_swedish_ci TYPE=MyISAM;
################################################################################
#
# Table structure for table 'misc_attrib'
#
CREATE TABLE misc_attrib (
misc_feature_id int(10) unsigned NOT NULL default '0',
attrib_type_id smallint(5) unsigned NOT NULL default '0',
value varchar(255) NOT NULL default '',
KEY type_val_idx( attrib_type_id, value ),
KEY misc_feature_idx( misc_feature_id )
) COLLATE=latin1_swedish_ci TYPE=MyISAM;
################################################################################
#
# Table structure for table 'translation_attrib'
#
CREATE TABLE translation_attrib (
translation_id int(10) unsigned NOT NULL default '0',
attrib_type_id smallint(5) unsigned NOT NULL default '0',
value varchar(255) NOT NULL default '',
KEY type_val_idx( attrib_type_id, value ),
KEY translation_idx( translation_id )
) COLLATE=latin1_swedish_ci TYPE=MyISAM;
################################################################################
#
# Table structure for table 'transcript_attrib'
#
CREATE TABLE transcript_attrib (
transcript_id int(10) unsigned NOT NULL default '0',
attrib_type_id smallint(5) unsigned NOT NULL default '0',
value varchar(255) NOT NULL default '',
KEY type_val_idx( attrib_type_id, value ),
KEY transcript_idx( transcript_id )
) COLLATE=latin1_swedish_ci TYPE=MyISAM;
################################################################################
#
# Table structure for table 'seq_region_attrib'
#
CREATE TABLE seq_region_attrib (
seq_region_id int(10) unsigned NOT NULL default '0',
attrib_type_id smallint(5) unsigned NOT NULL default '0',
value varchar(255) NOT NULL default '',
KEY type_val_idx( attrib_type_id, value),
KEY seq_region_idx (seq_region_id)
) COLLATE=latin1_swedish_ci TYPE=MyISAM;
################################################################################
#
# Table structure for table 'attrib_type'
#
CREATE TABLE attrib_type (
attrib_type_id smallint(5) unsigned NOT NULL auto_increment,
code varchar(15) NOT NULL default '',
name varchar(255) NOT NULL default '',
description text,
PRIMARY KEY ( attrib_type_id),
UNIQUE KEY c(code)
) COLLATE=latin1_swedish_ci TYPE=MyISAM;
################################################################################
#
# Table structure for table 'misc_set'
#
CREATE TABLE misc_set (
misc_set_id smallint(5) unsigned NOT NULL auto_increment,
code varchar(25) NOT NULL default '',
name varchar(255) NOT NULL default '',
description text NOT NULL,
max_length int unsigned not null,
PRIMARY KEY (misc_set_id),
UNIQUE KEY c(code)
) COLLATE=latin1_swedish_ci TYPE=MyISAM;
################################################################################
#
# Table structure for table 'misc_feature_misc_set'
#
CREATE TABLE misc_feature_misc_set (
misc_feature_id int(10) unsigned NOT NULL default '0',
misc_set_id smallint(5) unsigned NOT NULL default '0',
PRIMARY KEY ( misc_feature_id, misc_set_id ),
KEY reverse_idx( misc_set_id, misc_feature_id )
) COLLATE=latin1_swedish_ci TYPE=MyISAM;
################################################################################
#
# Tables for QTLs
#
################################################################################
################################################################################
#
# Table structure for table 'qtl'
#
CREATE TABLE qtl (
qtl_id int unsigned auto_increment not null,
trait varchar(255) not null,
lod_score float,
flank_marker_id_1 int,
flank_marker_id_2 int,
peak_marker_id int,
PRIMARY KEY ( qtl_id ),
KEY trait_idx( trait )
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'qtl_synonym'
#
CREATE TABLE qtl_synonym (
qtl_synonym_id int unsigned auto_increment not null,
qtl_id int unsigned not null,
source_database enum("rat genome database", "ratmap") not null,
source_primary_id varchar(255) not null,
PRIMARY KEY (qtl_synonym_id),
KEY qtl_idx(qtl_id)
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Table structure for table 'qtl_feature'
#
CREATE TABLE qtl_feature (
seq_region_id int not null,
seq_region_start int not null,
seq_region_end int not null,
qtl_id int not null,
analysis_id int not null,
KEY( qtl_id ),
KEY loc_idx( seq_region_id, seq_region_start ),
KEY analysis_idx( analysis_id )
) COLLATE=latin1_swedish_ci;
################################################################################
#
# Tables for stable ID mapping tracking
#
################################################################################
################################################################################
#
# Table structure for table 'mapping_session'
#
CREATE TABLE mapping_session (
mapping_session_id int(11) NOT NULL auto_increment,
old_db_name varchar(80) NOT NULL default '',
new_db_name varchar(80) NOT NULL default '',
created timestamp(14) NOT NULL,
PRIMARY KEY (mapping_session_id)
) COLLATE=latin1_swedish_ci TYPE=MyISAM;
################################################################################
#
# Table structure for table 'stable_id_event'
#
CREATE TABLE stable_id_event (
old_stable_id varchar(128),
old_version smallint,
new_stable_id varchar(128),
new_version smallint,
mapping_session_id int(11) NOT NULL default '0',
type ENUM('gene', 'transcript', 'translation') NOT NULL,
UNIQUE KEY uni_idx (mapping_session_id, old_stable_id, old_version, new_stable_id, new_version, type),
KEY new_idx (new_stable_id),
KEY old_idx (old_stable_id)
) COLLATE=latin1_swedish_ci TYPE=MyISAM;
################################################################################
#
# Table structure for table 'gene_archive'
#
CREATE TABLE gene_archive (
gene_stable_id VARCHAR(128) NOT NULL,
gene_version smallint NOT NULL,
transcript_stable_id VARCHAR(128) NOT NULL,
transcript_version smallint NOT NULL,
translation_stable_id VARCHAR(128) NOT NULL,
translation_version smallint NOT NULL,
mapping_session_id int NOT NULL,
KEY gene_idx( gene_stable_id, gene_version ),
KEY transcript_idx( transcript_stable_id, transcript_version ),
KEY translation_idx( translation_stable_id, translation_version )
) COLLATE=latin1_swedish_ci TYPE=MyISAM;
################################################################################
#
# Table structure for table 'peptide_archive'
#
CREATE TABLE peptide_archive (
translation_stable_id VARCHAR(128) NOT NULL,
translation_version smallint NOT NULL,
peptide_seq mediumtext NOT NULL,
PRIMARY KEY( translation_stable_id, translation_version )
) COLLATE=latin1_swedish_ci TYPE=MyISAM;
################################################################################
#
# Table structure for table 'seq_region'
#
CREATE TABLE seq_region (
seq_region_id INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
name VARCHAR(40) NOT NULL,
coord_system_id INT(10) NOT NULL,
length INT(10) NOT NULL,
UNIQUE(coord_system_id, name),
PRIMARY KEY (seq_region_id),
KEY name_idx(name)
) COLLATE=latin1_swedish_ci TYPE=MyISAM;
################################################################################
#
# Table structure for table 'assembly_exception'
#
CREATE TABLE assembly_exception (
assembly_exception_id INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
seq_region_id INT NOT NULL,
seq_region_start INT NOT NULL,
seq_region_end INT NOT NULL,
exc_type ENUM('HAP', 'PAR') NOT NULL,
exc_seq_region_id INT NOT NULL,
exc_seq_region_start INT NOT NULL,
exc_seq_region_end INT NOT NULL,
ori INT NOT NULL,
PRIMARY KEY (assembly_exception_id),
KEY sr_idx (seq_region_id, seq_region_start),
KEY ex_idx (exc_seq_region_id, exc_seq_region_start)
) COLLATE=latin1_swedish_ci TYPE=MyISAM;
################################################################################
#
# Table structure for table 'coord_system'
#
CREATE TABLE coord_system (
coord_system_id INT NOT NULL auto_increment,
name VARCHAR(40) NOT NULL,
version VARCHAR(40),
rank INT NOT NULL,
attrib SET ('default_version', 'sequence_level'),
UNIQUE(name, version),
UNIQUE(rank),
PRIMARY KEY (coord_system_id)
) COLLATE=latin1_swedish_ci TYPE=MyISAM;
################################################################################
#
# Table structure for table 'meta_coord'
#
CREATE TABLE meta_coord (
table_name VARCHAR(40) NOT NULL,
coord_system_id INT NOT NULL,
max_length INT,
UNIQUE(table_name, coord_system_id)
) COLLATE=latin1_swedish_ci TYPE=MyISAM;
CREATE TABLE density_feature (
density_feature_id INT NOT NULL auto_increment,
density_type_id INT NOT NULL, #FK refs density_type
seq_region_id INT NOT NULL, #FK refs seq_region
seq_region_start INT NOT NULL,
seq_region_end INT NOT NULL,
density_value FLOAT NOT NULL,
PRIMARY KEY(density_feature_id),
KEY seq_region_idx (density_type_id, seq_region_id, seq_region_start),
KEY seq_region_id_idx (seq_region_id)
) COLLATE=latin1_swedish_ci TYPE=MyISAM;
CREATE TABLE density_type (
density_type_id INT NOT NULL auto_increment,
analysis_id INT NOT NULL, #FK refs analysis
block_size INT NOT NULL,
value_type ENUM('sum','ratio') NOT NULL,
PRIMARY KEY(density_type_id),
UNIQUE(analysis_id, block_size)
) COLLATE=latin1_swedish_ci TYPE=MyISAM;
################################################################################
#
# Table structure for table 'regulatory_feature'
#
# Describes instances of regulatory_factor binding to the genome.
CREATE TABLE regulatory_feature (
regulatory_feature_id INT NOT NULL auto_increment,
name VARCHAR(255) NOT NULL,
seq_region_id INT NOT NULL, # FK refs seq_region
seq_region_start INT NOT NULL,
seq_region_end INT NOT NULL,
seq_region_strand TINYINT NOT NULL,
analysis_id INT NOT NULL, # FK refs analysis
regulatory_factor_id INT, # FK refs regulatory_motif
PRIMARY KEY(regulatory_feature_id)
) COLLATE=latin1_swedish_ci TYPE=MyISAM;
################################################################################
#
# Table structure for table 'regulatory_factor'
#
CREATE TABLE regulatory_factor (
regulatory_factor_id INT NOT NULL auto_increment,
name VARCHAR(255) NOT NULL,
type ENUM('miRNA_target', 'transcription_factor', 'transcription_factor_complex'),
PRIMARY KEY(regulatory_factor_id)
) COLLATE=latin1_swedish_ci TYPE=MyISAM;
################################################################################
#
# Table structure for table 'regulatory_feature_object'
#
# Relates regulatory regions to the Ensembl objects they influence. Many-many.
CREATE TABLE regulatory_feature_object (
regulatory_feature_id INT NOT NULL, # FK to regulatory_feature
ensembl_object_type ENUM( 'Transcript', 'Translation', 'Gene') NOT NULL,
ensembl_object_id INT NOT NULL, # FK to transcript,gene etc
influence ENUM('positive', 'negative', 'mixed', 'unknown'),
evidence VARCHAR(255),
KEY regulatory_feature_idx (regulatory_feature_id),
KEY ensembl_object_idx (ensembl_object_type, ensembl_object_id)
) COLLATE=latin1_swedish_ci TYPE=MyISAM;
################################################################################
#
# Table structure for table 'regulatory_factor_transcript'
#
# Describes which transcripts code for particular regulatory factors.
CREATE TABLE regulatory_factor_transcript (
transcript_id INT NOT NULL, # FK to transcript
regulatory_factor_id INT NOT NULL, # FK to regulatory_feature
KEY translation_idx (transcript_id),
KEY regulatory_factor_idx (regulatory_factor_id)
) COLLATE=latin1_swedish_ci TYPE=MyISAM;