Commit a920bb82 authored by Leo Gordon's avatar Leo Gordon
Browse files

hive schema drawing added to docs

parent ccc876c0
-- The first 3 tables are from the ensembl core schema: meta, analysis and analysis_description.
-- We create them with the 'IF NOT EXISTS' option in case they already exist in the DB.
################################################################################
#
# Table structure for table 'meta' (FROM THE CORE SCHEMA)
#
CREATE TABLE IF NOT EXISTS meta (
meta_id INT NOT NULL AUTO_INCREMENT,
species_id INT UNSIGNED DEFAULT 1,
meta_key VARCHAR(40) NOT NULL,
meta_value VARCHAR(255) BINARY NOT NULL,
PRIMARY KEY (meta_id),
UNIQUE KEY species_key_value_idx (species_id, meta_key, meta_value),
KEY species_value_idx (species_id, meta_value)
) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
################################################################################
#
# Table structure for table 'analysis' (FROM THE CORE SCHEMA)
#
# semantics:
#
# analysis_id - internal id
# created
# - date to distinguish newer and older versions off the same analysis. Not
# well maintained so far.
# logic_name - string to identify the analysis. Used mainly inside pipeline.
# db, db_version, db_file
# - db should be a database name, db version the version of that db
# db_file the file system location of that database,
# probably wiser to generate from just db and configurations
# program, program_version,program_file
# - The binary used to create a feature. Similar semantic to above
# module, module_version
# - Perl module names (RunnableDBS usually) executing this analysis.
# parameters - a paramter string which is processed by the perl module
# gff_source, gff_feature
# - how to make a gff dump from features with this analysis
CREATE TABLE IF NOT EXISTS analysis (
analysis_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
created datetime DEFAULT '0000-00-00 00:00:00' NOT NULL,
logic_name VARCHAR(40) NOT NULL,
db VARCHAR(120),
db_version VARCHAR(40),
db_file VARCHAR(120),
program VARCHAR(80),
program_version VARCHAR(40),
program_file VARCHAR(80),
parameters TEXT,
module VARCHAR(80),
module_version VARCHAR(40),
gff_source VARCHAR(40),
gff_feature VARCHAR(40),
PRIMARY KEY (analysis_id),
KEY logic_name_idx (logic_name),
UNIQUE (logic_name)
) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
################################################################################
#
# Table structure for table 'analysis_description' (FROM THE CORE SCHEMA)
#
CREATE TABLE IF NOT EXISTS analysis_description (
analysis_id SMALLINT UNSIGNED NOT NULL,
description TEXT,
display_label VARCHAR(255),
displayable BOOLEAN NOT NULL DEFAULT 1,
web_data TEXT,
FOREIGN KEY (analysis_id) REFERENCES analysis(analysis_id),
UNIQUE KEY analysis_idx (analysis_id)
) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
#################### now, to the 'proper' Hive tables: ##############################
-- ----------------------------------------------------------------------------------
--
-- Table structure for table 'hive'
......@@ -24,6 +118,9 @@ CREATE TABLE hive (
last_check_in datetime NOT NULL,
died datetime DEFAULT NULL,
cause_of_death enum('', 'NO_WORK', 'JOB_LIMIT', 'HIVE_OVERLOAD', 'LIFESPAN', 'CONTAMINATED', 'KILLED_BY_USER', 'MEMLIMIT', 'RUNLIMIT', 'FATALITY') DEFAULT '' NOT NULL,
FOREIGN KEY (analysis_id) REFERENCES analysis(analysis_id),
PRIMARY KEY (worker_id),
INDEX analysis_status (analysis_id, status)
) ENGINE=InnoDB;
......@@ -63,6 +160,8 @@ CREATE TABLE dataflow_rule (
branch_code int(10) default 1 NOT NULL,
input_id_template TEXT DEFAULT NULL,
FOREIGN KEY (from_analysis_id) REFERENCES analysis(analysis_id),
PRIMARY KEY (dataflow_rule_id),
UNIQUE KEY (from_analysis_id, to_analysis_url, branch_code, input_id_template(512))
);
......@@ -91,6 +190,8 @@ CREATE TABLE analysis_ctrl_rule (
condition_analysis_url varchar(255) default '' NOT NULL,
ctrled_analysis_id int(10) unsigned NOT NULL,
FOREIGN KEY (ctrled_analysis_id) REFERENCES analysis(analysis_id),
UNIQUE (condition_analysis_url, ctrled_analysis_id)
);
......@@ -138,6 +239,10 @@ CREATE TABLE analysis_job (
semaphore_count int(10) NOT NULL default 0,
semaphored_job_id int(10) DEFAULT NULL,
FOREIGN KEY (analysis_id) REFERENCES analysis(analysis_id),
FOREIGN KEY (worker_id) REFERENCES hive(worker_id),
FOREIGN KEY (prev_analysis_job_id) REFERENCES analysis_job(analysis_job_id),
PRIMARY KEY (analysis_job_id),
UNIQUE KEY input_id_analysis (input_id, analysis_id),
INDEX claim_analysis_status (job_claim, analysis_id, status, semaphore_count),
......@@ -174,6 +279,10 @@ CREATE TABLE job_message (
msg text,
is_error boolean,
FOREIGN KEY (analysis_id) REFERENCES analysis(analysis_id),
FOREIGN KEY (analysis_job_id) REFERENCES analysis_job(analysis_job_id),
FOREIGN KEY (worker_id) REFERENCES hive(worker_id),
PRIMARY KEY (analysis_job_id, worker_id, moment),
INDEX worker_id (worker_id),
INDEX analysis_job_id (analysis_job_id)
......@@ -203,6 +312,9 @@ CREATE TABLE analysis_job_file (
retry int(10) NOT NULL,
type varchar(16) NOT NULL default '',
path varchar(255) NOT NULL,
FOREIGN KEY (worker_id) REFERENCES hive(worker_id),
FOREIGN KEY (analysis_job_id) REFERENCES analysis_job(analysis_job_id),
UNIQUE KEY job_hive_type (analysis_job_id, worker_id, type),
INDEX worker_id (worker_id)
......@@ -231,6 +343,15 @@ CREATE TABLE analysis_data (
);
CREATE TABLE resource_description (
rc_id int(10) unsigned DEFAULT 0 NOT NULL,
meadow_type enum('LSF', 'LOCAL') DEFAULT 'LSF' NOT NULL,
parameters varchar(255) DEFAULT '' NOT NULL,
description varchar(255),
PRIMARY KEY(rc_id, meadow_type)
) ENGINE=InnoDB;
-- ---------------------------------------------------------------------------------
--
-- Table structure for table 'analysis_stats'
......@@ -272,16 +393,12 @@ CREATE TABLE analysis_stats (
sync_lock int(10) default 0 NOT NULL,
rc_id int(10) unsigned default 0 NOT NULL,
FOREIGN KEY (analysis_id) REFERENCES analysis(analysis_id),
FOREIGN KEY (rc_id) REFERENCES resource_description(rc_id),
UNIQUE KEY (analysis_id)
) ENGINE=InnoDB;
CREATE TABLE resource_description (
rc_id int(10) unsigned DEFAULT 0 NOT NULL,
meadow_type enum('LSF', 'LOCAL') DEFAULT 'LSF' NOT NULL,
parameters varchar(255) DEFAULT '' NOT NULL,
description varchar(255),
PRIMARY KEY(rc_id, meadow_type)
) ENGINE=InnoDB;
CREATE TABLE analysis_stats_monitor (
time datetime NOT NULL default '0000-00-00 00:00:00',
......@@ -308,6 +425,10 @@ CREATE TABLE analysis_stats_monitor (
last_update datetime NOT NULL,
sync_lock int(10) default 0 NOT NULL,
rc_id int(10) unsigned default 0 NOT NULL
FOREIGN KEY (analysis_id) REFERENCES analysis(analysis_id),
FOREIGN KEY (rc_id) REFERENCES resource_description(rc_id),
) ENGINE=InnoDB;
-- ---------------------------------------------------------------------------------
......@@ -332,95 +453,10 @@ CREATE TABLE monitor (
throughput float default NULL,
per_worker float default NULL,
analysis varchar(255) default NULL
) ENGINE=InnoDB;
-- The last 3 tables are from the ensembl core schema: meta, analysis and analysis_description.
-- We create them with the 'IF NOT EXISTS' option in case they already exist in the DB.
################################################################################
#
# Table structure for table 'meta' (FROM THE CORE SCHEMA)
#
CREATE TABLE IF NOT EXISTS meta (
meta_id INT NOT NULL AUTO_INCREMENT,
species_id INT UNSIGNED DEFAULT 1,
meta_key VARCHAR(40) NOT NULL,
meta_value VARCHAR(255) BINARY NOT NULL,
PRIMARY KEY (meta_id),
UNIQUE KEY species_key_value_idx (species_id, meta_key, meta_value),
KEY species_value_idx (species_id, meta_value)
) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
################################################################################
#
# Table structure for table 'analysis' (FROM THE CORE SCHEMA)
#
# semantics:
#
# analysis_id - internal id
# created
# - date to distinguish newer and older versions off the same analysis. Not
# well maintained so far.
# logic_name - string to identify the analysis. Used mainly inside pipeline.
# db, db_version, db_file
# - db should be a database name, db version the version of that db
# db_file the file system location of that database,
# probably wiser to generate from just db and configurations
# program, program_version,program_file
# - The binary used to create a feature. Similar semantic to above
# module, module_version
# - Perl module names (RunnableDBS usually) executing this analysis.
# parameters - a paramter string which is processed by the perl module
# gff_source, gff_feature
# - how to make a gff dump from features with this analysis
FOREIGN KEY (analysis) REFERENCES analysis(logic_name),
CREATE TABLE IF NOT EXISTS analysis (
analysis_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
created datetime DEFAULT '0000-00-00 00:00:00' NOT NULL,
logic_name VARCHAR(40) NOT NULL,
db VARCHAR(120),
db_version VARCHAR(40),
db_file VARCHAR(120),
program VARCHAR(80),
program_version VARCHAR(40),
program_file VARCHAR(80),
parameters TEXT,
module VARCHAR(80),
module_version VARCHAR(40),
gff_source VARCHAR(40),
gff_feature VARCHAR(40),
PRIMARY KEY (analysis_id),
KEY logic_name_idx (logic_name),
UNIQUE (logic_name)
) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
################################################################################
#
# Table structure for table 'analysis_description' (FROM THE CORE SCHEMA)
#
CREATE TABLE IF NOT EXISTS analysis_description (
analysis_id SMALLINT UNSIGNED NOT NULL,
description TEXT,
display_label VARCHAR(255),
displayable BOOLEAN NOT NULL DEFAULT 1,
web_data TEXT,
UNIQUE KEY analysis_idx (analysis_id)
) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
) ENGINE=InnoDB;
# Auto add schema version to database (should be overridden by Compara's table.sql)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment