table.sql 97.8 KB
Newer Older
Magali Ruffier's avatar
Magali Ruffier committed
1
-- Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
premanand17's avatar
premanand17 committed
2
-- Copyright [2016-2018] EMBL-European Bioinformatics Institute
Tiago Grego's avatar
Tiago Grego committed
3
--
4
5
6
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
Tiago Grego's avatar
Tiago Grego committed
7
--
8
--      http://www.apache.org/licenses/LICENSE-2.0
Tiago Grego's avatar
Tiago Grego committed
9
--
10
11
12
13
14
15
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.

16
# Ensembl core table definitions
17
#
18

19
20
21
22
# Conventions:
#  - use lower case and underscores
#  - internal ids are integers named tablename_id
#  - same name is given in foreign key relations
23

24
25

/**
26
@header Assembly Tables
27
@colour #C70C09
28
29
30
31

*/


Monika Komorowska's avatar
Monika Komorowska committed
32
33
/**
@table assembly
34
@colour #C70C09
35
36
37
38
39
40
41
42
@desc The assembly table states, which parts of seq_regions are exactly equal. It enables to transform coordinates between seq_regions.
Typically this contains how chromosomes are made of contigs, clones out of contigs, and chromosomes out of supercontigs.
It allows you to artificially chunk chromosome sequence into smaller parts.

The data in this table defines the "static golden path", i.e. the best effort draft full genome sequence as determined by the UCSC or NCBI (depending which assembly you are using).
Each row represents a component, e.g. a contig,  (comp_seq_region_id, FK from seq_region table) at least part of which is present in the golden path.
The part of the component that is in the path is delimited by fields cmp_start and cmp_end (start < end), and the absolute position within the golden path chromosome (or other appropriate assembled structure) (asm_seq_region_id) is given by asm_start and asm_end.

43
44
@column asm_seq_region_id            Assembly sequence region id. Primary key, internal identifier. Foreign key references to the @link seq_region table.
@column cmp_seq_region_id            Component sequence region id. Foreign key references to the @link seq_region table.
45
46
47
48
49
50
51
@column asm_start                    Start absolute position within the golden path chromosome.
@column asm_end                      End absolute position within the golden path chromosome.
@column cmp_start                    Component start position within the golden path chromosome.
@column cmp_end                      Component start position within the golden path chromosome.
@column ori                          Orientation: 1 - sense; -1 - antisense.


Monika Komorowska's avatar
Monika Komorowska committed
52
@see seq_region
53
@see supercontigs
54
55

*/
Monika Komorowska's avatar
Monika Komorowska committed
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74


CREATE TABLE assembly (

  asm_seq_region_id           INT(10) UNSIGNED NOT NULL,
  cmp_seq_region_id           INT(10) UNSIGNED NOT NULL,
  asm_start                   INT(10) NOT NULL,
  asm_end                     INT(10) NOT NULL,
  cmp_start                   INT(10) NOT NULL,
  cmp_end                     INT(10) NOT NULL,
  ori                         TINYINT  NOT NULL,

  KEY cmp_seq_region_idx (cmp_seq_region_id),
  KEY asm_seq_region_idx (asm_seq_region_id, asm_start),
  UNIQUE KEY all_idx (asm_seq_region_id, cmp_seq_region_id, asm_start, asm_end, cmp_start, cmp_end, ori)

) COLLATE=latin1_swedish_ci ENGINE=MyISAM;


Monika Komorowska's avatar
Monika Komorowska committed
75
76
/**
@table assembly_exception
77
@colour #C70C09
78
79
80
@desc Allows multiple sequence regions to point to the same sequence, analogous to a symbolic link in a filesystem pointing to the actual file.
This mechanism has been implemented specifically to support haplotypes and PARs, but may be useful for other similar structures in the future.

81
@column assembly_exception_id       Assembly exception sequence region id. Primary key, internal identifier.
82
83
84
85
86
@column seq_region_id               Sequence region id. Foreign key references to the @link seq_region table.
@column seq_region_start            Sequence start position.
@column seq_region_end              Sequence end position.
@column exc_type                    Exception type, e.g. PAR, HAP - haplotype.
@column exc_seq_region_id           Exception sequence region id. Foreign key references to the @link seq_region table.
87
88
@column exc_seq_region_start        Exception sequence start position.
@column exc_seq_region_end          Exception sequence end position.
89
90
91
92
93
94
95
@column ori                         Orientation: 1 - sense; -1 - antisense.


@see assembly

*/

Monika Komorowska's avatar
Monika Komorowska committed
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116

CREATE TABLE assembly_exception (

  assembly_exception_id       INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
  seq_region_id               INT(10) UNSIGNED NOT NULL,
  seq_region_start            INT(10) UNSIGNED NOT NULL,
  seq_region_end              INT(10) UNSIGNED NOT NULL,
  exc_type                    ENUM('HAP', 'PAR',
                                'PATCH_FIX', 'PATCH_NOVEL') NOT NULL,
  exc_seq_region_id           INT(10) UNSIGNED NOT NULL,
  exc_seq_region_start        INT(10) UNSIGNED NOT NULL,
  exc_seq_region_end          INT(10) UNSIGNED NOT NULL,
  ori                         INT NOT NULL,

  PRIMARY KEY (assembly_exception_id),
  KEY sr_idx (seq_region_id, seq_region_start),
  KEY ex_idx (exc_seq_region_id, exc_seq_region_start)

) COLLATE=latin1_swedish_ci ENGINE=MyISAM;


Monika Komorowska's avatar
Monika Komorowska committed
117
118
/**
@table coord_system
119
@colour #C70C09
120
121
122
123
@desc Stores information about the available co-ordinate systems for the species identified through the species_id field.
Note that for each species, there must be one co-ordinate system that has the attribute "top_level" and one that has the attribute "sequence_level".

@column coord_system_id      Primary key, internal identifier.
124
125
126
127
128
@column species_id           Indentifies the species for multi-species databases.
@column name                 Co-oridinate system name, e.g. 'chromosome', 'contig', 'scaffold' etc.
@column version              Assembly.
@column rank                 Co-oridinate system rank.
@column attrib               Co-oridinate system attrib (e.g. "top_level", "sequence_level").
129

130
@see seq_region
Nathan Johnson's avatar
Nathan Johnson committed
131
@see meta_coord
Nathan Johnson's avatar
Nathan Johnson committed
132
@see meta
133
134

*/
Monika Komorowska's avatar
Monika Komorowska committed
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153


CREATE TABLE coord_system (

  coord_system_id             INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
  species_id                  INT(10) UNSIGNED NOT NULL DEFAULT 1,
  name                        VARCHAR(40) NOT NULL,
  version                     VARCHAR(255) DEFAULT NULL,
  rank                        INT NOT NULL,
  attrib                      SET('default_version', 'sequence_level'),

  PRIMARY   KEY (coord_system_id),
  UNIQUE    KEY rank_idx (rank, species_id),
  UNIQUE    KEY name_idx (name, version, species_id),
            KEY species_idx (species_id)

) COLLATE=latin1_swedish_ci ENGINE=MyISAM;


154
155
/**
@table data_file
156
@colour #C70C09
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
@desc Allows the storage of flat file locations used to store large quanitities of data currently unsuitable in a traditional database table.

@column data_file_id      Auto-increment surrogate primary key
@column coord_system_id   Coordinate system this file is linked to. Used to decipher the assembly version it was mapped to
@column analysis_id       Analysis this file is linked to
@column name              Name of the file
@column version_lock      Indicates that this file is only compatible with the current Ensembl release version
@column absolute          Flags that the URL given is fully resolved and should be used without question
@column url               Optional path to the file (can be absolute or relative)
@column file_type         Type of file e.g. BAM, BIGBED, BIGWIG and VCF
*/

CREATE TABLE data_file (
  data_file_id      INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
  coord_system_id   INT(10) UNSIGNED NOT NULL,
  analysis_id       SMALLINT UNSIGNED NOT NULL,
  name              VARCHAR(100) NOT NULL,
  version_lock      TINYINT(1) DEFAULT 0 NOT NULL,
  absolute          TINYINT(1) DEFAULT 0 NOT NULL,
  url               TEXT,
177
  file_type         ENUM('BAM','BAMCOV','BIGBED','BIGWIG','VCF'),
178
179
180
181
182
183
184
185

  PRIMARY KEY (data_file_id),
  UNIQUE KEY df_unq_idx(coord_system_id, analysis_id, name, file_type),
  INDEX df_name_idx(name),
  INDEX df_analysis_idx(analysis_id)
) COLLATE=latin1_swedish_ci ENGINE=MyISAM;


Monika Komorowska's avatar
Monika Komorowska committed
186
/**
Monika Komorowska's avatar
Monika Komorowska committed
187
@table dna
188
@colour #C70C09
189
@desc Contains DNA sequence. This table has a 1:1 relationship with the seq_region table.
190

Monika Komorowska's avatar
Monika Komorowska committed
191
192
@column seq_region_id           Primary key, internal identifier. Foreign key references to the @link seq_region table.
@column sequence                DNA sequence.
193

Monika Komorowska's avatar
Monika Komorowska committed
194
195
@see seq_region
*/
Monika Komorowska's avatar
Monika Komorowska committed
196
197


Monika Komorowska's avatar
Monika Komorowska committed
198
CREATE TABLE dna (
Monika Komorowska's avatar
Monika Komorowska committed
199

Monika Komorowska's avatar
Monika Komorowska committed
200
201
  seq_region_id       INT(10) UNSIGNED NOT NULL,
  sequence            LONGTEXT NOT NULL,
Monika Komorowska's avatar
Monika Komorowska committed
202

Monika Komorowska's avatar
Monika Komorowska committed
203
  PRIMARY KEY (seq_region_id)
Monika Komorowska's avatar
Monika Komorowska committed
204

Monika Komorowska's avatar
Monika Komorowska committed
205
) COLLATE=latin1_swedish_ci ENGINE=MyISAM MAX_ROWS=750000 AVG_ROW_LENGTH=19000;
Monika Komorowska's avatar
Monika Komorowska committed
206

207

Monika Komorowska's avatar
Monika Komorowska committed
208

Magali Ruffier's avatar
Magali Ruffier committed
209
210
/**
@table genome_statistics
211
@colour #C70C09
Magali Ruffier's avatar
Magali Ruffier committed
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
@desc Contains genome and assembly related statistics
      These include but are not limited to: feature counts, sequence lengths

@column genome_statistics_id  Primary key, internal identifier.
@column statistic            Name of the statistics
@column value                 Corresponding value of the statistics (count/length)
@column species_id            Indentifies the species for multi-species databases.
@column attrib_type_id        To distinguish similar statistics for different cases
@column timestamp             Date the statistics was generated

*/


CREATE TABLE genome_statistics(

  genome_statistics_id     INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
  statistic                VARCHAR(128) NOT NULL,
229
  value                    BIGINT(11) UNSIGNED DEFAULT '0' NOT NULL,
Magali Ruffier's avatar
Magali Ruffier committed
230
231
  species_id               INT UNSIGNED DEFAULT 1,
  attrib_type_id           INT(10) UNSIGNED DEFAULT NULL,
232
  timestamp                DATETIME DEFAULT NULL,
Magali Ruffier's avatar
Magali Ruffier committed
233
234

  PRIMARY KEY (genome_statistics_id),
Magali Ruffier's avatar
Magali Ruffier committed
235
  UNIQUE KEY stats_uniq(statistic, attrib_type_id, species_id)
Magali Ruffier's avatar
Magali Ruffier committed
236
237
238
239
240
241

) COLLATE=latin1_swedish_ci ENGINE=MyISAM;




Monika Komorowska's avatar
Monika Komorowska committed
242
/**
243
@table karyotype
244
@colour #C70C09
245
@desc Describes bands that can be stained on the chromosome.
246

247
248
249
250
251
252
@column karyotype_id            Primary key, internal identifier.
@column seq_region_id           Foreign key references to the @link seq_region table.
@column seq_region_start        Sequence start position.
@column seq_region_end          Sequence end position.
@column band                    Band.
@column stain                   Stain.
253
254

*/
Monika Komorowska's avatar
Monika Komorowska committed
255
256


257
258
CREATE TABLE karyotype (
  karyotype_id                INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
Monika Komorowska's avatar
Monika Komorowska committed
259
260
261
  seq_region_id               INT(10) UNSIGNED NOT NULL,
  seq_region_start            INT(10) UNSIGNED NOT NULL,
  seq_region_end              INT(10) UNSIGNED NOT NULL,
262
263
  band                        VARCHAR(40) DEFAULT NULL,
  stain                       VARCHAR(40) DEFAULT NULL,
Monika Komorowska's avatar
Monika Komorowska committed
264

265
266
  PRIMARY KEY (karyotype_id),
  KEY region_band_idx (seq_region_id,band)
Monika Komorowska's avatar
Monika Komorowska committed
267

Monika Komorowska's avatar
Monika Komorowska committed
268
) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
Monika Komorowska's avatar
Monika Komorowska committed
269
270


271
/**
272
@table meta
273
@colour #C70C09
274
275
@desc Stores data about the data in the current schema. Taxonomy information, version information and the default value for the type column in the assembly table are stored here.
Unlike other tables, data in the meta table is stored as key-value pairs. Also stores (via assembly.mapping keys) the relationships between co-ordinate systems in the assembly table.
276

277
278
279
280
The species_id field of the meta table is used in multi-species databases and makes it possible to have species-specific meta key-value pairs.
The species-specific meta key-value pairs needs to be repeated for each species_id.
Entries in the meta table that are not specific to any one species, such as the schema_version key and any other schema-related information must have their species_id field set to NULL.
The default species_id, and the only species_id value allowed in single-species databases, is 1.
281
282


283
284
285
286
@column meta_id                    Primary key, internal identifier.
@column species_id                 Indentifies the species for multi-species databases.
@column meta_key                   Name of the meta entry, e.g. "schema_version".
@column meta_value                 Corresponding value of the key, e.g. "61".
287

288
289
@see assembly
@see coord_system
290

291
*/
292
293


294
CREATE TABLE IF NOT EXISTS meta (
295

296
297
298
  meta_id                     INT NOT NULL AUTO_INCREMENT,
  species_id                  INT UNSIGNED DEFAULT 1,
  meta_key                    VARCHAR(40) NOT NULL,
299
  meta_value                  VARCHAR(255) NOT NULL,
300

301
302
303
  PRIMARY   KEY (meta_id),
  UNIQUE    KEY species_key_value_idx (species_id, meta_key, meta_value),
            KEY species_value_idx (species_id, meta_value)
304

305
) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
Monika Komorowska's avatar
Monika Komorowska committed
306

307

308
309
# Add schema type and schema version to the meta table.
INSERT INTO meta (species_id, meta_key, meta_value) VALUES
Tiago Grego's avatar
Tiago Grego committed
310
  (NULL, 'schema_type', 'core'),
311
  (NULL, 'schema_version', '95');
312
313
314
315
316

# Patches included in this schema file:
# NOTE: At start of release cycle, remove patch entries from last release.
# NOTE: Avoid line-breaks in values.
INSERT INTO meta (species_id, meta_key, meta_value)
317
  VALUES (NULL, 'patch', 'patch_94_95_a.sql|schema_version');
318

Tiago Grego's avatar
Tiago Grego committed
319

Monika Komorowska's avatar
Monika Komorowska committed
320
/**
321
@table meta_coord
322
@colour #C70C09
323
@desc Describes which co-ordinate systems the different feature tables use.
324

325
326
327
@column table_name              Ensembl database table name.
@column coord_system_id         Foreign key references to the @link coord_system table.
@column max_length              Longest sequence length.
328

329
@see coord_system
330
331

*/
332

333

334
CREATE TABLE meta_coord (
335

336
337
338
  table_name                  VARCHAR(40) NOT NULL,
  coord_system_id             INT(10) UNSIGNED NOT NULL,
  max_length                  INT,
339

340
  UNIQUE KEY cs_table_name_idx (coord_system_id, table_name)
341

342
) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
343

344

Monika Komorowska's avatar
Monika Komorowska committed
345
/**
346
@table seq_region
347
@colour #C70C09
348
349
350
@desc Stores information about sequence regions. The primary key is used as a pointer into the dna table so that actual sequence can be obtained, and the coord_system_id allows sequence regions of multiple types to be stored.
Clones, contigs and chromosomes are all now stored in the seq_region table. Contigs are stored with the co-ordinate system 'contig'.
The relationship between contigs and clones is stored in the assembly table. The relationships between contigs and chromosomes, and between contigs and supercontigs, are stored in the assembly table.
351

352
353
354
355
@column seq_region_id             Primary key, internal identifier.
@column name                      Sequence region name.
@column coord_system_id           Foreign key references to the @link coord_system table.
@column length                    Sequence length.
356

357
358
359

@see dna
@see coord_system
360
361

*/
Monika Komorowska's avatar
Monika Komorowska committed
362
363


364
CREATE TABLE seq_region (
Monika Komorowska's avatar
Monika Komorowska committed
365

366
  seq_region_id               INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
367
  name                        VARCHAR(255) NOT NULL,
368
369
  coord_system_id             INT(10) UNSIGNED NOT NULL,
  length                      INT(10) UNSIGNED NOT NULL,
Monika Komorowska's avatar
Monika Komorowska committed
370

371
372
373
  PRIMARY KEY (seq_region_id),
  UNIQUE KEY name_cs_idx (name, coord_system_id),
  KEY cs_idx (coord_system_id)
Monika Komorowska's avatar
Monika Komorowska committed
374
375

) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
Monika Komorowska's avatar
Monika Komorowska committed
376
377


Monika Komorowska's avatar
Monika Komorowska committed
378
/**
379
@table seq_region_synonym
380
@colour #C70C09
381
@desc Allows for storing multiple names for sequence regions.
Monika Komorowska's avatar
Monika Komorowska committed
382

383
384
385
386
387
388
389
390
391
392
393
394
@column seq_region_synonym_id           Primary key, internal identifier.
@column seq_region_id                   Foreign key references to the @link seq_region table.
@column synonym                         Alternative name for sequence region.
@column external_db_id                  Foreign key references to the @link external_db table.

*/


CREATE TABLE seq_region_synonym (

  seq_region_synonym_id       INT UNSIGNED NOT NULL  AUTO_INCREMENT,
  seq_region_id               INT(10) UNSIGNED NOT NULL,
395
  synonym                     VARCHAR(250) NOT NULL,
396
397
398
  external_db_id              INTEGER UNSIGNED,

  PRIMARY KEY (seq_region_synonym_id),
399
  UNIQUE KEY syn_idx (synonym, seq_region_id),
400
401
402
403
404
405
406
  KEY seq_region_idx (seq_region_id)

) COLLATE=latin1_swedish_ci ENGINE=MyISAM;


/**
@table seq_region_attrib
407
@colour #C70C09
408
409
410
@desc Allows "attributes" to be defined for certain seq_regions. Provides a way of storing extra information about particular seq_regions without adding extra columns to the seq_region table. e.g.

@column seq_region_id       Foreign key references to the @link seq_region table.
411
@column attrib_type_id      Foreign key references to the @link attrib_type table.
412
@column value               Attribute value.
Monika Komorowska's avatar
Monika Komorowska committed
413

414
415
@see seq_region
@see attrib_type
416
417

*/
418

419

420
CREATE TABLE seq_region_attrib (
Patrick Meidl's avatar
Patrick Meidl committed
421

422
  seq_region_id               INT(10) UNSIGNED NOT NULL DEFAULT '0',
Monika Komorowska's avatar
Monika Komorowska committed
423
424
  attrib_type_id              SMALLINT(5) UNSIGNED NOT NULL DEFAULT '0',
  value                       TEXT NOT NULL,
425

Monika Komorowska's avatar
Monika Komorowska committed
426
427
  KEY type_val_idx (attrib_type_id, value(40)),
  KEY val_only_idx (value(40)),
Magali Ruffier's avatar
Magali Ruffier committed
428
  KEY seq_region_idx (seq_region_id),
429
  UNIQUE KEY region_attribx (seq_region_id, attrib_type_id, value(500))
430

431
) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
432

433

Monika Komorowska's avatar
Monika Komorowska committed
434
/**
435
@header Fundamental Tables
436
@colour   #808000
437
*/
438

Monika Komorowska's avatar
Monika Komorowska committed
439

440
441
/**
@table alt_allele
442
@colour   #808000
443
@desc Stores information about genes on haplotypes that may be orthologous.
444

445
446
447
@column alt_allele_id          Primary key, internal identifier.
@column gene_id                Foreign key references to the @link gene table.
@column alt_allele_group_id    A group ID to show which alleles are related
448

449
*/
450

451
CREATE TABLE alt_allele (
Tiago Grego's avatar
Tiago Grego committed
452
453
        alt_allele_id INT UNSIGNED AUTO_INCREMENT,
        alt_allele_group_id INT UNSIGNED NOT NULL,
454
455
456
        gene_id INT UNSIGNED NOT NULL,

        PRIMARY KEY (alt_allele_id),
457
        UNIQUE KEY gene_idx (gene_id),
458
        KEY (gene_id,alt_allele_group_id)
459

460
) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
461

462
463
/**
@table alt_allele_attrib
464
@colour   #808000
465
@desc Holds all the different attributes assigned to individual alleles.
466

467
468
469
@column alt_allele_id           Primary key, internal identifier.
@column attrib                  Enum of attributes assigned to alternative alleles
*/
Monika Komorowska's avatar
Monika Komorowska committed
470
471


472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
CREATE TABLE alt_allele_attrib (
         alt_allele_id INT UNSIGNED,
         attrib ENUM('IS_REPRESENTATIVE',
                     'IS_MOST_COMMON_ALLELE',
                     'IN_CORRECTED_ASSEMBLY',
                     'HAS_CODING_POTENTIAL',
                     'IN_ARTIFICIALLY_DUPLICATED_ASSEMBLY',
                     'IN_SYNTENIC_REGION',
                     'HAS_SAME_UNDERLYING_DNA_SEQUENCE',
                     'IN_BROKEN_ASSEMBLY_REGION',
                     'IS_VALID_ALTERNATE',
                     'SAME_AS_REPRESENTATIVE',
                     'SAME_AS_ANOTHER_ALLELE',
                     'MANUALLY_ASSIGNED',
                     'AUTOMATICALLY_ASSIGNED'),
487

488
        KEY aa_idx (alt_allele_id,attrib)
489

490
) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
491
492


493
/**
Magali Ruffier's avatar
Magali Ruffier committed
494
@table alt_allele_group
495
@colour   #808000
496
@desc A minimal table used for tracking unique alt_allele_group_id's. MySQL does not allow multiple autoincrement fields. Further information about a group could be added here at a later date.
Monika Komorowska's avatar
Monika Komorowska committed
497

498
499
@column alt_allele_group_id     Primary key and only column.
*/
500

501
CREATE TABLE alt_allele_group (
502

503
         alt_allele_group_id INT UNSIGNED AUTO_INCREMENT,
Monika Komorowska's avatar
Monika Komorowska committed
504

505
         PRIMARY KEY (alt_allele_group_id)
506

Monika Komorowska's avatar
Monika Komorowska committed
507
) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
508

509

510
511
/**
@table analysis
512
@colour   #808000
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
@desc Usually describes a program and some database that together are used to create a feature on a piece of sequence.
Each feature is marked with an analysis_id. The most important column is logic_name, which is used by the webteam to render a feature correctly on contigview (or even retrieve the right feature).
Logic_name is also used in the pipeline to identify the analysis which has to run in a given status of the pipeline.
The module column tells the pipeline which Perl module does the whole analysis, typically a RunnableDB module.

@column analysis_id                 Primary key, internal identifier.
@column created                     Date to distinguish newer and older versions off the same analysis.
@column logic_name                  String to identify the analysis. Used mainly inside pipeline.
@column db                          Database name.
@column db_version                  Database version.
@column db_file                     File system location of the database.
@column program                     The binary used to create a feature.
@column program_version             The binary version.
@column program_file                File system location of the binary.
@column parameters                  A parameter string which is processed by the perl module.
@column module                      Perl module names (RunnableDBS usually) executing this analysis.
@column module_version              Perl module version.
@column gff_source                  How to make a gff dump from features with this analysis.
@column gff_feature                 How to make a gff dump from features with this analysis.

@see analysis_description

*/

CREATE TABLE IF NOT EXISTS analysis (

  analysis_id                 SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
540
  created                     datetime DEFAULT NULL,
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
  logic_name                  VARCHAR(128) NOT NULL,
  db                          VARCHAR(120),
  db_version                  VARCHAR(40),
  db_file                     VARCHAR(120),
  program                     VARCHAR(80),
  program_version             VARCHAR(40),
  program_file                VARCHAR(80),
  parameters                  TEXT,
  module                      VARCHAR(80),
  module_version              VARCHAR(40),
  gff_source                  VARCHAR(40),
  gff_feature                 VARCHAR(40),

  PRIMARY KEY (analysis_id),
  UNIQUE KEY logic_name_idx (logic_name)

) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
Monika Komorowska's avatar
Monika Komorowska committed
558
559


Monika Komorowska's avatar
Monika Komorowska committed
560
/**
561
@table analysis_description
562
@colour   #808000
563
@desc Allows the storage of a textual description of the analysis, as well as a "display label", primarily for the EnsEMBL web site.
564

565
566
567
568
569
@column analysis_id            Primary key, internal identifier. Foreign key references to the @link analysis table.
@column description            Textual description of the analysis.
@column display_label          Display label for the EnsEMBL web site.
@column displayable            Flag indicating if the analysis description is to be displayed on the EnsEMBL web site.
@column web_data               Other data used by the EnsEMBL web site.
Monika Komorowska's avatar
Monika Komorowska committed
570

571
@see analysis
Monika Komorowska's avatar
Monika Komorowska committed
572
573
574
575

*/


576
CREATE TABLE IF NOT EXISTS analysis_description (
Monika Komorowska's avatar
Monika Komorowska committed
577

578
579
580
581
582
  analysis_id                  SMALLINT UNSIGNED NOT NULL,
  description                  TEXT,
  display_label                VARCHAR(255) NOT NULL,
  displayable                  BOOLEAN NOT NULL DEFAULT 1,
  web_data                     TEXT,
Monika Komorowska's avatar
Monika Komorowska committed
583

584
  UNIQUE KEY analysis_idx (analysis_id)
Monika Komorowska's avatar
Monika Komorowska committed
585
586
587
588

) COLLATE=latin1_swedish_ci ENGINE=MyISAM;


589
/**
590
@table attrib_type
591
@colour   #808000
592
@desc Provides codes, names and desctriptions of attribute types.
593

594
595
596
597
598
599
@column attrib_type_id       Primary key, internal identifier.
@column code                 Attribute code, e.g. 'GapExons'.
@column name                 Attribute name, e.g. 'gap exons'.
@column description          Attribute description, e.g. 'number of gap exons'.

@see seq_region_attrib
600
601
602

*/

603
604
605
606

CREATE TABLE attrib_type (

  attrib_type_id              SMALLINT(5) UNSIGNED NOT NULL AUTO_INCREMENT,
607
  code                        VARCHAR(20) NOT NULL DEFAULT '',
608
609
610
611
612
613
  name                        VARCHAR(255) NOT NULL DEFAULT '',
  description                 TEXT,

  PRIMARY KEY (attrib_type_id),
  UNIQUE KEY code_idx (code)

614
615
616
617
) COLLATE=latin1_swedish_ci ENGINE=MyISAM;


/**
618
@table dna_align_feature
619
@colour   #808000
620
@desc Stores DNA sequence alignments generated from Blast (or Blast-like) comparisons.
621

622
623
624
625
626
627
628
629
630
@column dna_align_feature_id        Primary key, internal identifier.
@column seq_region_id               Foreign key references to the @link seq_region table.
@column seq_region_start            Sequence start position.
@column seq_region_end              Sequence end position.
@column seq_region_strand           Sequence region strand: 1 - forward; -1 - reverse.
@column hit_start                   Alignment hit start position.
@column hit_end                     Alignment hit end position.
@column hit_strand                  Alignment hit strand: 1 - forward; -1 - reverse.
@column hit_name                    Alignment hit name.
631
@column analysis_id                 Foreign key references to the @link analysis table.
632
633
634
635
636
637
@column score                       Alignment score.
@column evalue                      Alignment e-value.
@column perc_ident                  Alignment percentage identity.
@column cigar_line                  Used to encode gapped alignments.
@column external_db_id              Foreign key references to the @link external_db table.
@column hcoverage                   Hit coverage.
638
@column align_type                  Alignment string type used
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661

@see cigar_line

*/

CREATE TABLE dna_align_feature (

  dna_align_feature_id        INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
  seq_region_id               INT(10) UNSIGNED NOT NULL,
  seq_region_start            INT(10) UNSIGNED NOT NULL,
  seq_region_end              INT(10) UNSIGNED NOT NULL,
  seq_region_strand           TINYINT(1) NOT NULL,
  hit_start                   INT NOT NULL,
  hit_end                     INT NOT NULL,
  hit_strand                  TINYINT(1) NOT NULL,
  hit_name                    VARCHAR(40) NOT NULL,
  analysis_id                 SMALLINT UNSIGNED NOT NULL,
  score                       DOUBLE,
  evalue                      DOUBLE,
  perc_ident                  FLOAT,
  cigar_line                  TEXT,
  external_db_id              INTEGER UNSIGNED,
  hcoverage                   DOUBLE,
662
  align_type                  ENUM('ensembl', 'cigar', 'vulgar', 'mdtag') DEFAULT 'ensembl',
663
664
665
666
667
668

  PRIMARY KEY (dna_align_feature_id),
  KEY seq_region_idx (seq_region_id, analysis_id, seq_region_start, score),
  KEY seq_region_idx_2 (seq_region_id, seq_region_start),
  KEY hit_idx (hit_name),
  KEY analysis_idx (analysis_id),
Magali Ruffier's avatar
Magali Ruffier committed
669
  KEY external_db_idx (external_db_id)
670
671
672
673

) COLLATE=latin1_swedish_ci ENGINE=MyISAM MAX_ROWS=100000000 AVG_ROW_LENGTH=80;


674
675
/**
@table dna_align_feature_attrib
676
@colour   #808000
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
@desc Enables storage of attributes that relate to DNA sequence alignments.

@column dna_align_feature_id        Foreign key references to the @link dna_align_feature table.
@column attrib_type_id              Foreign key references to the @link attrib_type table.
@column value                       Attribute value.

@see dna_align_feature
*/

CREATE TABLE dna_align_feature_attrib (

  dna_align_feature_id        INT(10) UNSIGNED NOT NULL,
  attrib_type_id              SMALLINT(5) UNSIGNED NOT NULL,
  value                       TEXT NOT NULL,

  UNIQUE KEY dna_align_feature_attribx (dna_align_feature_id, attrib_type_id, value(500)),
  KEY dna_align_feature_idx (dna_align_feature_id),
  KEY type_val_idx (attrib_type_id, value(40)),
  KEY val_only_idx (value(40))

) COLLATE=latin1_swedish_ci ENGINE=MyISAM;


700
701
/**
@table exon
702
@colour   #808000
703
704
705
706
@desc Stores data about exons. Associated with transcripts via exon_transcript. Allows access to contigs seq_regions.
Note seq_region_start is always less that seq_region_end, i.e. when the exon is on the other strand the seq_region_start is specifying the 3prime end of the exon.

@column exon_id                     Primary key, internal identifier.
707
@column seq_region_id               Foreign key references to the @link seq_region table.
708
@column seq_region_start            Sequence start position.
709
@column seq_region_end              Sequence end position.
710
@column seq_region_strand           Sequence region strand: 1 - forward; -1 - reverse.
711
712
713
714
715
716
717
718
719
720
@column phase                       The place where the intron lands inside the codon - 0 between codons, 1 between the 1st and second base, 2 between the second and 3rd base. Exons therefore have a start phase anda end phase, but introns have just one phase.
@column end_phase                   Usually, end_phase = (phase + exon_length)%3 but end_phase could be -1 if the exon is half-coding and its 3 prime end is UTR.
@column is_current                  1 - exon is current. Always set to 1 in ensembl dbs, but needed for otterlace dbs
@column is_constitutive             1 - exon is constitutive.
@column stable_id                   Release-independent stable identifier.
@column version                     Stable identifier version number.
@column created_date                Date created.
@column modified_date               Date modified.

@see exon_transcript
721
722
723

*/

724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739

CREATE TABLE exon (

  exon_id                     INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
  seq_region_id               INT(10) UNSIGNED NOT NULL,
  seq_region_start            INT(10) UNSIGNED NOT NULL,
  seq_region_end              INT(10) UNSIGNED NOT NULL,
  seq_region_strand           TINYINT(2) NOT NULL,

  phase                       TINYINT(2) NOT NULL,
  end_phase                   TINYINT(2) NOT NULL,

  is_current                  BOOLEAN NOT NULL DEFAULT 1,
  is_constitutive             BOOLEAN NOT NULL DEFAULT 0,

  stable_id                   VARCHAR(128) DEFAULT NULL,
740
  version                     SMALLINT UNSIGNED DEFAULT NULL,
741
742
  created_date                DATETIME DEFAULT NULL,
  modified_date               DATETIME DEFAULT NULL,
743
744

  PRIMARY KEY (exon_id),
745
  KEY seq_region_idx (seq_region_id, seq_region_start),
746
  KEY stable_id_idx (stable_id, version)
747

748
749
750
751
) COLLATE=latin1_swedish_ci ENGINE=MyISAM;


/**
752
@table exon_transcript
753
@colour   #808000
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
@desc Relationship table linking exons with transcripts. The rank column indicates the 5' to 3' position of the exon within the transcript, i.e. a rank of 1 means the exon is the 5' most within this transcript.

@column exon_id                Composite key. Foreign key references to the @link exon table.
@column transcript_id          Composite key. Foreign key references to the @link transcript table.
@column rank                   Composite key.

@see exon
@see transcript

*/

CREATE TABLE exon_transcript (

  exon_id                     INT(10) UNSIGNED NOT NULL,
  transcript_id               INT(10) UNSIGNED NOT NULL,
  rank                        INT(10) NOT NULL,

  PRIMARY KEY (exon_id,transcript_id,rank),
  KEY transcript (transcript_id),
  KEY exon (exon_id)

) COLLATE=latin1_swedish_ci ENGINE=MyISAM;



/**
@table gene
781
@colour   #808000
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
@desc Allows transcripts to be related to genes.

@column gene_id                     Primary key, internal identifier.
@column biotype                     Biotype, e.g. protein_coding.
@column analysis_id                 Foreign key references to the @link analysis table.
@column seq_region_id               Foreign key references to the @link seq_region table.
@column seq_region_start            Sequence start position.
@column seq_region_end              Sequence end position.
@column seq_region_strand           Sequence region strand: 1 - forward; -1 - reverse.
@column display_xref_id             External reference for EnsEMBL web site. Foreign key references to the @link xref table.
@column source                      e.g ensembl, havana etc.
@column description                 Gene description
@column is_current                  1 - gene is current. Always set to 1 in ensembl dbs, but needed for otterlace dbs
@column canonical_transcript_id     Foreign key references to the @link transcript table.
@column stable_id                   Release-independent stable identifier.
@column version                     Stable identifier version number.
@column created_date                Date created.
@column modified_date               Date modified.

@see transcript

*/


CREATE TABLE gene (

  gene_id                     INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
  biotype                     VARCHAR(40) NOT NULL,
  analysis_id                 SMALLINT UNSIGNED NOT NULL,
  seq_region_id               INT(10) UNSIGNED NOT NULL,
  seq_region_start            INT(10) UNSIGNED NOT NULL,
  seq_region_end              INT(10) UNSIGNED NOT NULL,
  seq_region_strand           TINYINT(2) NOT NULL,
  display_xref_id             INT(10) UNSIGNED,
816
  source                      VARCHAR(40) NOT NULL,
817
818
819
820
  description                 TEXT,
  is_current                  BOOLEAN NOT NULL DEFAULT 1,
  canonical_transcript_id     INT(10) UNSIGNED NOT NULL,
  stable_id                   VARCHAR(128) DEFAULT NULL,
821
  version                     SMALLINT UNSIGNED DEFAULT NULL,
822
823
  created_date                DATETIME DEFAULT NULL,
  modified_date               DATETIME DEFAULT NULL,
824
825
826
827
828
829
830
831
832
833
834
835
836

  PRIMARY KEY (gene_id),
  KEY seq_region_idx (seq_region_id, seq_region_start),
  KEY xref_id_index (display_xref_id),
  KEY analysis_idx (analysis_id),
  KEY stable_id_idx (stable_id, version),
  KEY canonical_transcript_id_idx (canonical_transcript_id)

) COLLATE=latin1_swedish_ci ENGINE=MyISAM;


/**
@table gene_attrib
837
@colour   #808000
838
839
840
841
842
@desc Enables storage of attributes that relate to genes.

@column gene_id             Foreign key references to the @link gene table.
@column attrib_type_id      Foreign key references to the @link attrib_type table.
@column value               Attribute value.
843
844
845
846
847


@see gene
*/

848
849
850
851
852
853
854
855
856

CREATE TABLE gene_attrib (

  gene_id                     INT(10) UNSIGNED NOT NULL DEFAULT '0',
  attrib_type_id              SMALLINT(5) UNSIGNED NOT NULL DEFAULT '0',
  value                       TEXT NOT NULL,

  KEY type_val_idx (attrib_type_id, value(40)),
  KEY val_only_idx (value(40)),
Magali Ruffier's avatar
Magali Ruffier committed
857
  KEY gene_idx (gene_id),
858
  UNIQUE KEY gene_attribx (gene_id, attrib_type_id, value(500))
859

860
861
862
) COLLATE=latin1_swedish_ci ENGINE=MyISAM;


Monika Komorowska's avatar
Monika Komorowska committed
863
/**
864
@table protein_align_feature
865
@colour   #808000
866
@desc Stores translation alignments generated from Blast (or Blast-like) comparisons.
Monika Komorowska's avatar
Monika Komorowska committed
867

868
869
870
871
872
873
874
875
876
877
878
879
880
881
@column protein_align_feature_id    Primary key, internal identifier.
@column seq_region_id               Foreign key references to the @link seq_region table.
@column seq_region_start            Sequence start position.
@column seq_region_end              Sequence end position.
@column seq_region_strand           Sequence region strand: 1 - forward; -1 - reverse.
@column hit_start                   Alignment hit start position.
@column hit_end                     Alignment hit end position.
@column hit_name                    Alignment hit name.
@column analysis_id                 Foreign key references to the @link analysis table.
@column score                       Alignment score.
@column evalue                      Alignment e-value.
@column perc_ident                  Alignment percentage identity.
@column cigar_line                  Used to encode gapped alignments.
@column external_db_id              Foreign key references to the @link external_db table.
882
883
884
@column hcoverage                   Alignment hit coverage.
@column align_type                  Alignment string type used

885

886
@see cigar_line
887
888
889
890


*/

891

892
CREATE TABLE protein_align_feature (
893

894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
  protein_align_feature_id    INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
  seq_region_id               INT(10) UNSIGNED NOT NULL,
  seq_region_start            INT(10) UNSIGNED NOT NULL,
  seq_region_end              INT(10) UNSIGNED NOT NULL,
  seq_region_strand           TINYINT(1) DEFAULT '1' NOT NULL,
  hit_start                   INT(10) NOT NULL,
  hit_end                     INT(10) NOT NULL,
  hit_name                    VARCHAR(40) NOT NULL,
  analysis_id                 SMALLINT UNSIGNED NOT NULL,
  score                       DOUBLE,
  evalue                      DOUBLE,
  perc_ident                  FLOAT,
  cigar_line                  TEXT,
  external_db_id              INTEGER UNSIGNED,
  hcoverage                   DOUBLE,
909
  align_type                  ENUM('ensembl', 'cigar', 'vulgar', 'mdtag') DEFAULT 'ensembl',
Monika Komorowska's avatar
Monika Komorowska committed
910

911
912
913
914
915
916
917
918
919
920
921
922
  PRIMARY KEY (protein_align_feature_id),
  KEY seq_region_idx (seq_region_id, analysis_id, seq_region_start, score),
  KEY seq_region_idx_2 (seq_region_id, seq_region_start),
  KEY hit_idx (hit_name),
  KEY analysis_idx (analysis_id),
  KEY external_db_idx (external_db_id)

) COLLATE=latin1_swedish_ci ENGINE=MyISAM MAX_ROWS=100000000 AVG_ROW_LENGTH=80;


/**
@table protein_feature
923
@colour   #808000
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
@desc Describes features on the translations (as opposed to the DNA sequence itself), i.e. parts of the peptide. In peptide co-ordinates rather than contig co-ordinates.

@column protein_feature_id          Primary key, internal identifier.
@column translation_id              Foreign key references to the @link translation table.
@column seq_start                   Sequence start position.
@column seq_end                     Sequence end position.
@column hit_start                   Alignment hit start position.
@column hit_end                     Alignment hit end position.
@column hit_name                    Alignment hit name.
@column analysis_id                 Foreign key references to the @link analysis table.
@column score                       Alignment score.
@column evalue                      Alignment E-value.
@column perc_ident                  Alignment percentage identity.
@column external_data               External data for protein feature.
@column hit_description             Optional description of the hit. This can be a human readable name
939
940
@column cigar_line                  Used to encode gapped alignments.
@column align_type                  Alignment string type used
941
942
943
944
945
946
947
948
949
950
951
952
953
954

@see analysis

*/


CREATE TABLE protein_feature (

  protein_feature_id          INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
  translation_id              INT(10) UNSIGNED NOT NULL,
  seq_start                   INT(10) NOT NULL,
  seq_end                     INT(10) NOT NULL,
  hit_start                   INT(10) NOT NULL,
  hit_end                     INT(10) NOT NULL,
955
  hit_name                    VARCHAR(40) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
956
957
958
959
960
961
  analysis_id                 SMALLINT UNSIGNED NOT NULL,
  score                       DOUBLE,
  evalue                      DOUBLE,
  perc_ident                  FLOAT,
  external_data               TEXT,
  hit_description             TEXT,
premanand17's avatar
premanand17 committed
962
  cigar_line                  TEXT,
963
  align_type                  ENUM('ensembl', 'cigar', 'cigarplus', 'vulgar', 'mdtag') DEFAULT NULL,
964

965
  UNIQUE KEY aln_idx (translation_id,hit_name,seq_start,seq_end,hit_start,hit_end,analysis_id),
966
967
968
969
  PRIMARY KEY (protein_feature_id),
  KEY translation_idx (translation_id),
  KEY hitname_idx (hit_name),
  KEY analysis_idx (analysis_id)
Monika Komorowska's avatar
Monika Komorowska committed
970
971

) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
972
973


Monika Komorowska's avatar
Monika Komorowska committed
974
/**
975
@table supporting_feature
976
@colour   #808000
977
978
@desc Describes the exon prediction process by linking exons to DNA or protein alignment features.
As in several other tables, the feature_id column is a foreign key; the feature_type column specifies which table feature_id refers to.
Monika Komorowska's avatar
Monika Komorowska committed
979

980
981
982
@column exon_id                    Foreign key references to the @link exon table.
@column feature_type               Feature type: 'dna_align_feature' or 'protein_align_feature'
@column feature_id                 Foreign key references to the @link dna_align_feature or @link protein_align_feature table depending on the feature type.
Monika Komorowska's avatar
Monika Komorowska committed
983
984
985
986
987


*/


988
CREATE TABLE supporting_feature (
Monika Komorowska's avatar
Monika Komorowska committed
989

990
991
992
  exon_id                     INT(10) UNSIGNED DEFAULT '0' NOT NULL,
  feature_type                ENUM('dna_align_feature','protein_align_feature'),
  feature_id                  INT(10) UNSIGNED DEFAULT '0' NOT NULL,
Monika Komorowska's avatar
Monika Komorowska committed
993

994
995
  UNIQUE KEY all_idx (exon_id,feature_type,feature_id),
  KEY feature_idx (feature_type,feature_id)
Monika Komorowska's avatar
Monika Komorowska committed
996

997
) COLLATE=latin1_swedish_ci ENGINE=MyISAM MAX_ROWS=100000000 AVG_ROW_LENGTH=80;
Monika Komorowska's avatar
Monika Komorowska committed
998
999
1000
1001


/**
@table transcript
1002
@colour   #808000
Monika Komorowska's avatar
Monika Komorowska committed
1003
@desc Stores information about transcripts. Has seq_region_start, seq_region_end and seq_region_strand for faster retrieval and to allow storage independently of genes and exons.
1004
Note that a transcript is usually associated with a translation, but may not be, e.g. in the case of pseudogenes and RNA genes (those that code for RNA molecules).
Monika Komorowska's avatar
Monika Komorowska committed
1005
1006
1007

@column transcript_id               Primary key, internal identifier.
@column gene_id                     Foreign key references to the @link gene table.
1008
1009
@column analysis_id                 Foreign key references to the @link analysis table.
@column seq_region_id               Foreign key references to the @link seq_region table.
1010
@column seq_region_start            Sequence start position.
1011
@column seq_region_end              Sequence end position.
1012
1013
@column seq_region_strand           Sequence region strand: 1 - forward; -1 - reverse.
@column display_xref_id             External reference for EnsEMBL web site. Foreign key references to the @link xref table.
1014
@column source                      e.g ensembl, havana etc.
1015
1016
@column biotype                     Biotype, e.g. protein_coding.
@column description                 Transcript description.
1017
@column is_current                  Indicates a current transcript. Always set to 1 in ensembl dbs, but needed for otterlace dbs
1018
@column canonical_translation_id    Foreign key references to the @link translation table.
1019
1020
1021
1022
@column stable_id                   Release-independent stable identifier.
@column version                     Stable identifier version number.
@column created_date                Date created.
@column modified_date               Date modified.
1023
1024

*/
Michele Clamp's avatar
Michele Clamp committed
1025

1026

Monika Komorowska's avatar
Monika Komorowska committed
1027
CREATE TABLE transcript (
1028

Monika Komorowska's avatar
Monika Komorowska committed
1029
1030
  transcript_id               INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
  gene_id                     INT(10) UNSIGNED,
Monika Komorowska's avatar
Monika Komorowska committed
1031
  analysis_id                 SMALLINT UNSIGNED NOT NULL,
1032
1033
1034
  seq_region_id               INT(10) UNSIGNED NOT NULL,
  seq_region_start            INT(10) UNSIGNED NOT NULL,
  seq_region_end              INT(10) UNSIGNED NOT NULL,
Monika Komorowska's avatar
Monika Komorowska committed
1035
1036
  seq_region_strand           TINYINT(2) NOT NULL,
  display_xref_id             INT(10) UNSIGNED,
1037
  source                      VARCHAR(40) NOT NULL default 'ensembl',
Monika Komorowska's avatar
Monika Komorowska committed
1038
  biotype                     VARCHAR(40) NOT NULL,
Monika Komorowska's avatar
Monika Komorowska committed
1039
1040
  description                 TEXT,
  is_current                  BOOLEAN NOT NULL DEFAULT 1,
Monika Komorowska's avatar
Monika Komorowska committed
1041
  canonical_translation_id    INT(10) UNSIGNED,
1042
  stable_id                   VARCHAR(128) DEFAULT NULL,
1043
  version                     SMALLINT UNSIGNED DEFAULT NULL,
1044
1045
  created_date                DATETIME DEFAULT NULL,
  modified_date               DATETIME DEFAULT NULL,
Monika Komorowska's avatar
Monika Komorowska committed
1046

Monika Komorowska's avatar
Monika Komorowska committed
1047
  PRIMARY KEY (transcript_id),
Monika Komorowska's avatar
Monika Komorowska committed
1048
  KEY seq_region_idx (seq_region_id, seq_region_start),
Monika Komorowska's avatar
Monika Komorowska committed
1049
  KEY gene_index (gene_id),
Monika Komorowska's avatar
Monika Komorowska committed
1050
  KEY xref_id_index (display_xref_id),
Monika Komorowska's avatar
Monika Komorowska committed
1051
  KEY analysis_idx (analysis_id),
1052
1053
  UNIQUE INDEX canonical_translation_idx (canonical_translation_id),
  KEY stable_id_idx (stable_id, version)
Monika Komorowska's avatar
Monika Komorowska committed
1054
1055

) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
1056

1057

Monika Komorowska's avatar
Monika Komorowska committed
1058
/**
Monika Komorowska's avatar
Monika Komorowska committed
1059
@table transcript_attrib
1060
@colour   #808000
Monika Komorowska's avatar
Monika Komorowska committed
1061
@desc Enables storage of attributes that relate to transcripts.
1062

Monika Komorowska's avatar
Monika Komorowska committed
1063
@column transcript_id       Foreign key references to the @link transcript table.
1064
@column attrib_type_id      Foreign key references to the @link attrib_type table.
1065
@column value               Attribute value.
1066

Monika Komorowska's avatar
Monika Komorowska committed
1067
@see transcript
1068

Monika Komorowska's avatar
Monika Komorowska committed
1069
*/
1070

Monika Komorowska's avatar
Monika Komorowska committed
1071
CREATE TABLE transcript_attrib (
1072

Monika Komorowska's avatar
Monika Komorowska committed
1073
1074
1075
  transcript_id               INT(10) UNSIGNED NOT NULL DEFAULT '0',
  attrib_type_id              SMALLINT(5) UNSIGNED NOT NULL DEFAULT '0',
  value                       TEXT NOT NULL,
1076

Monika Komorowska's avatar
Monika Komorowska committed
1077
1078
  KEY type_val_idx (attrib_type_id, value(40)),
  KEY val_only_idx (value(40)),
Magali Ruffier's avatar
Magali Ruffier committed
1079
  KEY transcript_idx (transcript_id),
1080
  UNIQUE KEY transcript_attribx (transcript_id, attrib_type_id, value(500))
1081

1082
) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
1083

1084

1085
1086
/**
@table transcript_supporting_feature
1087
@colour   #808000
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
@desc Describes the exon prediction process by linking transcripts to DNA or protein alignment features.
As in several other tables, the feature_id column is a foreign key; the feature_type column specifies which table feature_id refers to.

@column transcript_id              Foreign key references to the @link transcript table.
@column feature_type               Feature type: 'dna_align_feature' or 'protein_align_feature'
@column feature_id                 Foreign key references to the @link dna_align_feature or @link protein_align_feature table depending on the feature type.

*/


CREATE TABLE transcript_supporting_feature (

  transcript_id               INT(10) UNSIGNED DEFAULT '0' NOT NULL,
  feature_type                ENUM('dna_align_feature','protein_align_feature'),
  feature_id                  INT(10) UNSIGNED DEFAULT '0' NOT NULL,

  UNIQUE KEY all_idx (transcript_id,feature_type,feature_id),
  KEY feature_idx (feature_type,feature_id)

) COLLATE=latin1_swedish_ci ENGINE=MyISAM MAX_ROWS=100000000 AVG_ROW_LENGTH=80;


Monika Komorowska's avatar