From 7f47b961debd0b25452aa74165b4f53ee386afce Mon Sep 17 00:00:00 2001
From: Andrew Yates <ayates@ebi.ac.uk>
Date: Fri, 2 Mar 2012 09:14:19 +0000
Subject: [PATCH] Patches for 67. Index addition on transcript, new status on
 transcript & gene and a new supporting evidence table for introns

---
 sql/patch_66_67_c.sql | 24 ++++++++++++++++++++++++
 sql/patch_66_67_d.sql | 16 ++++++++++++++++
 sql/patch_66_67_e.sql | 13 +++++++++++++
 sql/table.sql         | 40 ++++++++++++++++++++++++++++++++++------
 4 files changed, 87 insertions(+), 6 deletions(-)
 create mode 100644 sql/patch_66_67_c.sql
 create mode 100644 sql/patch_66_67_d.sql
 create mode 100644 sql/patch_66_67_e.sql

diff --git a/sql/patch_66_67_c.sql b/sql/patch_66_67_c.sql
new file mode 100644
index 0000000000..b0c825664a
--- /dev/null
+++ b/sql/patch_66_67_c.sql
@@ -0,0 +1,24 @@
+# patch_66_67_c.sql
+#
+# Title: Adding intron_supporting_evidence table
+#
+# Description: Introns can be supported by an external feature. This gives a 
+# weight to how much we believe the intron
+# 
+
+CREATE TABLE intron_supporting_evidence (
+  intron_supporting_evidence_id INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
+  previous_exon_id INT(10) UNSIGNED NOT NULL,
+  next_exon_id INT(10) UNSIGNED NOT NULL,
+  hit_name VARCHAR(100) NOT NULL,
+  score DECIMAL(10,3),
+  score_type ENUM('NONE', 'DEPTH') DEFAULT 'NONE',
+  
+  PRIMARY KEY (intron_supporting_evidence_id),
+  
+  UNIQUE KEY (previous_exon_id, next_exon_id)
+) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
+
+# Patch identifier:
+INSERT INTO meta (species_id, meta_key, meta_value)
+  VALUES (NULL, 'patch', 'patch_66_67_c.sql|adding_intron_supporting_evidence');
diff --git a/sql/patch_66_67_d.sql b/sql/patch_66_67_d.sql
new file mode 100644
index 0000000000..288f86bce1
--- /dev/null
+++ b/sql/patch_66_67_d.sql
@@ -0,0 +1,16 @@
+# patch_66_67_d.sql
+#
+# Title: Adding new status type ANNOTATED
+#
+# Description: Including a new status type for genes and transcript
+# 
+
+ALTER TABLE gene MODIFY COLUMN status 
+  ENUM('KNOWN', 'NOVEL', 'PUTATIVE', 'PREDICTED', 'KNOWN_BY_PROJECTION', 'UNKNOWN', 'ANNOTATED');
+  
+ALTER TABLE transcript MODIFY COLUMN status 
+  ENUM('KNOWN', 'NOVEL', 'PUTATIVE', 'PREDICTED', 'KNOWN_BY_PROJECTION', 'UNKNOWN', 'ANNOTATED');
+
+# Patch identifier:
+INSERT INTO meta (species_id, meta_key, meta_value)
+  VALUES (NULL, 'patch', 'patch_66_67_d.sql|adding_gene_transcript_annotated');
diff --git a/sql/patch_66_67_e.sql b/sql/patch_66_67_e.sql
new file mode 100644
index 0000000000..b108806097
--- /dev/null
+++ b/sql/patch_66_67_e.sql
@@ -0,0 +1,13 @@
+# patch_66_67_e.sql
+#
+# Title: Adding an index to gene canonical transcript id
+#
+# Description: This is a lookup which is normally fast but when we have a lot
+# of genes in a core schema (multispecies DBs) then these queries slow down 
+# 
+
+ALTER TABLE gene ADD KEY canonical_transcript_id_idx (canonical_transcript_id);
+
+# Patch identifier:
+INSERT INTO meta (species_id, meta_key, meta_value)
+  VALUES (NULL, 'patch', 'patch_66_67_e.sql|index_canonical_transcript_id');
diff --git a/sql/table.sql b/sql/table.sql
index 90489bf69e..4a489bb0ea 100755
--- a/sql/table.sql
+++ b/sql/table.sql
@@ -259,6 +259,32 @@ CREATE TABLE exon (
 ) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
 
 
+/**
+@table intron_supporting_evidence
+@desc Provides the evidence which we have used to declare an intronic region
+
+@column intron_supporting_evidence_id Surrogate primary key
+@column previous_exon_id              Indicates the exon flanking upstream of the intron. Foreign key references to the @link exon table.
+@column next_exon_id                  Indicates the exon flanking downstream of the intron. Foreign key references to the @link exon table.
+@column score                         Score supporting the intron 
+@column score_type                    The type of score e.g. NONE
+
+@see exon
+
+*/
+
+CREATE TABLE intron_supporting_evidence (
+  intron_supporting_evidence_id INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
+  previous_exon_id INT(10) UNSIGNED NOT NULL,
+  next_exon_id INT(10) UNSIGNED NOT NULL,
+  hit_name VARCHAR(100) NOT NULL,
+  score DECIMAL(10,3),
+  score_type ENUM('NONE', 'DEPTH') DEFAULT 'NONE',
+  
+  PRIMARY KEY (intron_supporting_evidence_id),
+  
+  UNIQUE KEY (previous_exon_id, next_exon_id)
+) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
 
 /**
 @table exon_transcript
@@ -271,7 +297,6 @@ CREATE TABLE exon (
 @see exon
 @see transcript
 
-
 */
 
 CREATE TABLE exon_transcript (
@@ -327,7 +352,7 @@ CREATE TABLE gene (
   seq_region_strand           TINYINT(2) NOT NULL,
   display_xref_id             INT(10) UNSIGNED,
   source                      VARCHAR(20) NOT NULL,
-  status                      ENUM('KNOWN', 'NOVEL', 'PUTATIVE', 'PREDICTED', 'KNOWN_BY_PROJECTION', 'UNKNOWN'),
+  status                      ENUM('KNOWN', 'NOVEL', 'PUTATIVE', 'PREDICTED', 'KNOWN_BY_PROJECTION', 'UNKNOWN', 'ANNOTATED'),
   description                 TEXT,
   is_current                  BOOLEAN NOT NULL DEFAULT 1,
   canonical_transcript_id     INT(10) UNSIGNED NOT NULL,
@@ -341,7 +366,8 @@ CREATE TABLE gene (
   KEY seq_region_idx (seq_region_id, seq_region_start),
   KEY xref_id_index (display_xref_id),
   KEY analysis_idx (analysis_id),
-  KEY stable_id_idx (stable_id, version)
+  KEY stable_id_idx (stable_id, version),
+  KEY canonical_transcript_id_idx (canonical_transcript_id)
 
 ) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
 
@@ -447,7 +473,10 @@ INSERT INTO meta (species_id, meta_key, meta_value) VALUES
 # NOTE: Avoid line-breaks in values.
 INSERT INTO meta (species_id, meta_key, meta_value) VALUES
   (NULL, 'patch', 'patch_66_67_a.sql|schema_version'),
-  (NULL, 'patch', 'patch_66_67_b.sql|drop_stable_id_views')
+  (NULL, 'patch', 'patch_66_67_b.sql|drop_stable_id_views'),
+  (NULL, 'patch', 'patch_66_67_c.sql|adding_intron_supporting_evidence'),
+  (NULL, 'patch', 'patch_66_67_d.sql|adding_gene_transcript_annotated'),
+  (NULL, 'patch', 'patch_66_67_e.sql|index_canonical_transcript_id'),
  ;
 
 /**
@@ -672,7 +701,7 @@ CREATE TABLE transcript (
   seq_region_strand           TINYINT(2) NOT NULL,
   display_xref_id             INT(10) UNSIGNED,
   biotype                     VARCHAR(40) NOT NULL,
-  status                      ENUM('KNOWN', 'NOVEL', 'PUTATIVE', 'PREDICTED', 'KNOWN_BY_PROJECTION', 'UNKNOWN'),
+  status                      ENUM('KNOWN', 'NOVEL', 'PUTATIVE', 'PREDICTED', 'KNOWN_BY_PROJECTION', 'UNKNOWN', 'ANNOTATED'),
   description                 TEXT,
   is_current                  BOOLEAN NOT NULL DEFAULT 1,
   canonical_translation_id    INT(10) UNSIGNED,
@@ -2516,4 +2545,3 @@ CREATE TABLE data_file (
   INDEX df_analysis_idx(analysis_id)
 ) ENGINE=MyISAM;
 
-
-- 
GitLab