From a5778a6575854afa51ad975d43500a1aac523bc1 Mon Sep 17 00:00:00 2001 From: Patrick Meidl <pm2@sanger.ac.uk> Date: Thu, 9 Mar 2006 15:54:21 +0000 Subject: [PATCH] schema and API changes for archive stable id history --- modules/Bio/EnsEMBL/ArchiveStableId.pm | 321 +++-- .../EnsEMBL/DBSQL/ArchiveStableIdAdaptor.pm | 581 +++++--- modules/t/archiveStableId.t | 4 +- sql/patch_37_38.sql | 9 + sql/table.sql | 1237 +++++++++-------- 5 files changed, 1258 insertions(+), 894 deletions(-) diff --git a/modules/Bio/EnsEMBL/ArchiveStableId.pm b/modules/Bio/EnsEMBL/ArchiveStableId.pm index 1effcb36fa..4b78f71bb1 100644 --- a/modules/Bio/EnsEMBL/ArchiveStableId.pm +++ b/modules/Bio/EnsEMBL/ArchiveStableId.pm @@ -1,11 +1,4 @@ -# EnsEMBL module for ArchiveStableId -# Copyright EMBL-EBI/Sanger center 2003 -# -# -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code +package Bio::EnsEMBL::ArchiveStableId; =head1 NAME @@ -13,81 +6,98 @@ Bio::EnsEMBL::ArchiveStableId =head1 SYNOPSIS -ArchiveStableId objects are the main workunit for retrieving stable id archived information from - EnsEMBL core database. - =head1 DESCRIPTION - Attributes: +ArchiveStableId objects are the main workunit for retrieving stable id archived +information from EnsEMBL core database. + +Attributes: type: Gene, Transcript, Translation, Exon, other, undef stable_id: eg. ENSG00000000001 + version: e.g. 1 db_name: eg. homo_sapiens_core_12_31 - version: 1 + release: e.g. 35 + assembly: e.g. NCBI35 + successors: listref of Bio::EnsEMBL::ArchiveStableIds + adaptor: Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor - Methods: - new: - new_fast: - get_all_direct_predecessors: - get_all_direct_successors: +Status: At Risk. This module is in development. + +=head1 METHODS - get_components: - - Status: At Risk. This module is in development. + new + new_fast + get_all_predecessors + get_all_successors + get_peptide + get_all_transcript_archive_ids + get_all_translation_archive_ids +=head1 LICENCE -=cut +This code is distributed under an Apache style licence: +Please see http://www.ensembl.org/code_licence.html for details +=head1 AUTHOR +Ensembl core API team +Currently maintained by Patrick Meidl <meidl@ebi.ac.uk> -package Bio::EnsEMBL::ArchiveStableId; +=head1 CONTACT + +Please post comments/questions to the Ensembl development list +<ensembl-dev@ebi.ac.uk> + +=cut + use strict; use warnings; no warnings qw(uninitialized); use Bio::EnsEMBL::Root; +our @ISA = qw(Bio::EnsEMBL::Root); + use Bio::EnsEMBL::Utils::Argument qw(rearrange); use Bio::EnsEMBL::Utils::Exception qw(deprecate); -use vars qw(@ISA); - - -@ISA = qw(Bio::EnsEMBL::Root); - - =head2 new - Arg 1 : -stable_id $stable_id - Arg [ ] : -version $version - Arg [ ] : -db_name $db_name - Arg [ ] : -adaptor $adaptor - Arg [ ] : -type $type - "Gene", "Transcript", "Translation", "Exon" - Example : none - Description: standard constructor with named arguments to create ArchiveStableId - Returntype : Bio::EnsEMBL::ArchiveStableId - Exceptions : none - Caller : Adaptor - Status : At Risk - : under development + Arg [STABLE_ID] : String $stable_id + Arg [VERSION] : Int $version + Arg [DB_NAME] : String $db_name + Arg [RELEASE] : String $release + Arg [ASSEMBLY_NAME] : String $assembly + Arg [TYPE] : String $type - "Gene", "Transcript", "Translation", "Exon" + Arg [ADAPTOR] : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor $adaptor + Example : none + Description : standard constructor with named arguments to create + ArchiveStableId + Returntype : Bio::EnsEMBL::ArchiveStableId + Exceptions : none + Caller : general, Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor + Status : At Risk + : under development =cut - sub new { my $class = shift; $class = ref( $class ) || $class; my $self = bless {}, $class; - my ( $stable_id, $version, $db_name, $type, $adaptor ) = - rearrange( [ qw( STABLE_ID VERSION DB_NAME TYPE ADAPTOR ) ], @_ ); + my ($stable_id, $version, $db_name, $release, $assembly, $type, $adaptor) = + rearrange([qw( STABLE_ID VERSION DB_NAME RELEASE ASSEMBLY TYPE ADAPTOR)], + @_ ); $self->{'stable_id'} = $stable_id; $self->{'version'} = $version; $self->{'db_name'} = $db_name; + $self->{'release'} = $release; + $self->{'assembly'} = $assembly; $self->{'type'} = $type; $self->{'adaptor'} = $adaptor; @@ -95,93 +105,107 @@ sub new { } - =head2 new_fast - Arg [1] : string $stable_id - Arg [2] : int $version - Arg [3] : string $db_name - Arg [4] : string $type - Arg [5] : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor $adaptor - Example : none - Description: faster version of above constructor - Returntype : Bio::EnsEMBL::ArchiveStableId - Exceptions : none - Caller : general, Adaptor - Status : At Risk - : under development + Arg [1] : String $stable_id + Arg [2] : Int $version + Arg [3] : String $db_name + Arg [4] : String $release + Arg [5] : String $assembly + Arg [6] : String $type - "Gene", "Transcript", "Translation", "Exon" + Arg [7] : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor $adaptor + Example : none + Description : faster version of above constructor + Returntype : Bio::EnsEMBL::ArchiveStableId + Exceptions : none + Caller : general, Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor + Status : At Risk + : under development =cut - sub new_fast { my $class = shift; - $class = ref( $class ) || $class; + $class = ref ($class) || $class; my $self = bless { - 'stable_id' => $_[0], - 'version' => $_[1], - 'db_name' => $_[2], - 'type' => $_[3], - 'adaptor' => $_[4] - }, $class; + 'stable_id' => $_[0], + 'version' => $_[1], + 'db_name' => $_[2], + 'release' => $_[3], + 'assembly' => $_[4], + 'type' => $_[5], + 'adaptor' => $_[6] + }, $class; + return $self; } =head2 get_all_predecessors - Args : none - Example : none - Description: Retrieve a list of ArchiveStableIds that were mapped to this one. - Returntype : listref Bio::EnsEMBL::ArchiveStableId - Exceptions : none - Caller : general - Status : At Risk - : under development + Args : none + Example : none + Description : Retrieve a list of ArchiveStableIds that were mapped to this + one. + Returntype : listref of Bio::EnsEMBL::ArchiveStableId + Exceptions : none + Caller : general + Status : At Risk + : under development =cut - sub get_all_predecessors { my $self = shift; + + my $predecessors = $self->adaptor->fetch_predecessors_by_archive_id($self); + + foreach my $pre (@$predecessors) { + $pre->successors($self); + } - $self->adaptor->fetch_pre_by_arch_id( $self ); + return $predecessors; } + =head2 get_all_successors - Args : none - Example : none - Description: Retrieve a list of ArchiveStableIds that this one was mapped to. - Returntype : listref Bio::EnsEMBL::ArchiveStableId - Exceptions : none - Caller : general - Status : At Risk - : under development + Args : none + Example : none + Description : Retrieve a list of ArchiveStableIds that this one was mapped to. + Returntype : listref Bio::EnsEMBL::ArchiveStableId + Exceptions : none + Caller : general + Status : At Risk + : under development =cut sub get_all_successors { my $self = shift; - $self->adaptor->fetch_succ_by_arch_id( $self ); + if ($self->{'successors'}) { + return $self->{'successors'}; + } else { + my $successors = $self->adaptor->fetch_successors_by_archive_id($self); + return $self->successors(@$successors); + } } - =head2 get_peptide - Args : none - Example : none - Description: Retrieves the peptide string for this ArchiveStableId. - Undef if this is not a Translation or cant be found in the database. - Returntype : string - Exceptions : none - Caller : general - Status : At Risk - : under development + Args : none + Example : none + Description : Retrieves the peptide string for this ArchiveStableId. + Returntype : String, or undef if this is not a Translation or cant be found + in the database. + Exceptions : none + Caller : general + Status : At Risk + : under development =cut @@ -198,19 +222,18 @@ sub get_peptide { =head2 get_all_transcript_archive_ids - Args : none - Example : none - Description: If this is a genes ArchiveStableId and found in the database, this - function gets the transcripts archiveStableIds from it. Returns undef otherwise. - Returntype : listref Bio::EnsEMBL::ArchiveStableId - Exceptions : empty if not a gene stable id or not in database - Caller : general - Status : At Risk - : under development + Args : none + Example : none + Description : If this is a genes ArchiveStableId and found in the database, + this function gets the transcripts archiveStableIds from it. + Returntype : listref of Bio::EnsEMBL::ArchiveStableId + Exceptions : none + Caller : general + Status : At Risk + : under development =cut - sub get_all_transcript_archive_ids { my $self = shift; @@ -224,22 +247,20 @@ sub get_all_transcript_archive_ids { } - =head2 get_all_translation_archive_ids - Args : none - Example : none - Description: Retrieves the Translation ArchiveStableId for this transcript stable id. - If not found or this is not a transcripts id return undef - Returntype : Bio::EnsEMBL::ArchiveStableId - Exceptions : undef if not in db or not a Transcript - Caller : general - Status : At Risk - : under development + Args : none + Example : none + Description : Retrieves the Translation ArchiveStableIds for this transcript + stable id. + Returntype : listref of Bio::EnsEMBL::ArchiveStableId + Exceptions : none + Caller : general + Status : At Risk + : under development =cut - sub get_all_translation_archive_ids { my $self = shift; @@ -259,52 +280,54 @@ sub get_all_translation_archive_ids { } } -sub get_translation_archive_id { - my $self = shift; - - deprecate("Use get_all_translation_archive_ids() instead"); - - return $self->get_all_translation_archive_ids; -} - - -# getter / setter attribute section - -sub type { - my $self = shift; - if( @_ ) { - $self->{'type'} = shift; - } - return $self->{'type'}; -} +# getter/setters for attributes sub stable_id { my $self = shift; - if( @_ ) { - $self->{'stable_id'} = shift; - } + $self->{'stable_id'} = shift if (@_); return $self->{'stable_id'}; } sub db_name { my $self = shift; - if( @_ ) { - $self->{'db_name'} = shift; - } + $self->{'db_name'} = shift if (@_); return $self->{'db_name'}; } +sub release { + my $self = shift; + $self->{'release'} = shift if (@_); + return $self->{'release'}; +} + +sub assembly { + my $self = shift; + $self->{'assembly'} = shift if (@_); + return $self->{'assembly'}; +} + sub adaptor { my $self = shift; - if( @_ ) { - $self->{'adaptor'} = shift; - } + $self->{'adaptor'} = shift if (@_); return $self->{'adaptor'}; } +sub type { + my $self = shift; + $self->{'type'} = shift if (@_); + return $self->{'type'}; +} + +sub successors { + my $self = shift; + $self->{'successors'} = \@_; + return $self->{'successors'}; +} + # lazy loading + sub version { my $self = shift; if( @_ ) { @@ -321,4 +344,16 @@ sub version { return $self->{'version'}; } + +# deprecated methods (changed to more descriptive names) + +sub get_translation_archive_id { + my $self = shift; + + deprecate("Use get_all_translation_archive_ids() instead"); + + return $self->get_all_translation_archive_ids; +} + + 1; diff --git a/modules/Bio/EnsEMBL/DBSQL/ArchiveStableIdAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/ArchiveStableIdAdaptor.pm index 1b820bcb37..2e687a7017 100644 --- a/modules/Bio/EnsEMBL/DBSQL/ArchiveStableIdAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/ArchiveStableIdAdaptor.pm @@ -6,6 +6,17 @@ Bio::EnsEMBL::ArchiveStableIdAdaptor =head1 SYNOPSIS +my $reg = "Bio::EnsEMBL::Registry"; +my $archiveStableIdAdaptor = + $reg->get_adaptor('human', 'core', 'ArchiveStableId'); + +my $arch_id = $archiveStableIdAdaptor->fetch_by_stable_id("ENSG00000068990"); +my @history = @{ $archiveStableIdAdaptor->fetch_stable_id_history($arch_id) }; + +foreach my $a (@history) { + print "Stable ID: ".$a->stable_id.".".$a->version."\n"; + print "Release: ".$a->release." (".$a->assembly.", ".$a->db_name.")\n"); +} =head1 DESCRIPTION @@ -22,12 +33,18 @@ This whole module has a status of At Risk as it is under development. =head1 METHODS + fetch_by_stable_id fetch_by_stable_id_version fetch_by_stable_id_dbname - fetch_pre_by_arch_id - fetch_succ_by_arch_id - list_dbnames + fetch_all_by_gene_archive_id + fetch_by_transcript_archive_id + fetch_predecessors_by_archive_id + fetch_successors_by_archive_id + fetch_stable_id_history + fetch_predecessor_history + fetch_successor_history get_peptide + list_dbnames _lookup_version _resolve_type @@ -39,6 +56,7 @@ Please see http://www.ensembl.org/code_licence.html for details =head1 AUTHOR Ensembl core API team +Currently maintained by Patrick Meidl <meidl@ebi.ac.uk> =head1 CONTACT @@ -47,27 +65,29 @@ Please post comments/questions to the Ensembl development list =cut + use strict; use warnings; no warnings qw(uninitialized); use Bio::EnsEMBL::DBSQL::BaseAdaptor; -use Bio::EnsEMBL::ArchiveStableId; - our @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor); +use Bio::EnsEMBL::ArchiveStableId; +use Bio::EnsEMBL::Utils::Exception qw(deprecate); + =head2 fetch_by_stable_id - Arg [1] : string $stable_id - Example : none - Description: retrives an ArchiveStableId that is the latest incarnation of - given stable_id. If not in database, will return undef. - Returntype : Bio::EnsEMBL::ArchiveStableId - Exceptions : none - Caller : general - Status : At Risk - : under development + Arg [1] : string $stable_id + Example : none + Description : retrives an ArchiveStableId that is the latest incarnation of + given stable_id. + Returntype : Bio::EnsEMBL::ArchiveStableId or undef if not in database + Exceptions : none + Caller : general + Status : At Risk + : under development =cut @@ -93,16 +113,16 @@ sub fetch_by_stable_id { =head2 fetch_by_stable_id_version - Arg [1] : string $stable_id - Arg [2] : int $version - Example : none - Description: Create an archiveStableId with given version and stableId - No lookup is done in the database. - Returntype : Bio::EnsEMBL::ArchiveStableId - Exceptions : none - Caller : general - Status : At Risk - : under development + Arg [1] : string $stable_id + Arg [2] : int $version + Example : none + Description : Create an archiveStableId with given version and stableId + No lookup is done in the database. + Returntype : Bio::EnsEMBL::ArchiveStableId + Exceptions : none + Caller : general + Status : At Risk + : under development =cut @@ -126,16 +146,15 @@ sub fetch_by_stable_id_version { =head2 fetch_by_stable_id_dbname - Arg [1] : string $stable_id - Arg [2] : string $db_name - Example : none - Description: create an ArchiveStableId from given arguments. - No database lookup is done. - Returntype : Bio::EnsEMBL::ArchiveStableId - Exceptions : none - Caller : general - Status : At Risk - : under development + Arg [1] : string $stable_id + Arg [2] : string $db_name + Example : none + Description : create an ArchiveStableId from given arguments. + Returntype : Bio::EnsEMBL::ArchiveStableId or undef if not in database + Exceptions : none + Caller : general + Status : At Risk + : under development =cut @@ -163,15 +182,15 @@ sub fetch_by_stable_id_dbname { =head2 fetch_all_by_gene_archive_id - Arg [1] : Bio::EnsEMBL::ArchiveStableId $gene_archive_id - Example : none - Description: Given the ArchiveStableId of a gene retrieves ArchiveStableIds - of Transcripts that make that gene. - Returntype : listref Bio::EnsEMBL::ArchiveStableId - Exceptions : empty if not a gene stable id or not in database - Caller : ArchiveStableId->get_all_transcripts() - Status : At Risk - : under development + Arg [1] : Bio::EnsEMBL::ArchiveStableId $gene_archive_id + Example : none + Description : Given the ArchiveStableId of a gene retrieves ArchiveStableIds + of Transcripts that make that gene. + Returntype : listref Bio::EnsEMBL::ArchiveStableId + Exceptions : none + Caller : Bio::EnsEMBL::ArchiveStableId->get_all_transcript_archive_ids + Status : At Risk + : under development =cut @@ -181,21 +200,25 @@ sub fetch_all_by_gene_archive_id { my @result = (); my $sql = qq( - SELECT ga.transcript_stable_id, ga.transcript_version, - m.old_db_name - FROM gene_archive ga, mapping_session m - WHERE ga.gene_stable_id = ? - AND ga.gene_version = ? - AND ga.mapping_session_id = m.mapping_session_id + SELECT + ga.transcript_stable_id, + ga.transcript_version, + m.old_db_name, + m.old_release, + m.old_assembly + FROM gene_archive ga, mapping_session m + WHERE ga.gene_stable_id = ? + AND ga.gene_version = ? + AND ga.mapping_session_id = m.mapping_session_id ); - my $sth = $self->prepare( $sql ); + my $sth = $self->prepare($sql); $sth->bind_param(1,$gene_archive_id->stable_id,SQL_VARCHAR); $sth->bind_param(2,$gene_archive_id->version,SQL_SMALLINT); $sth->execute(); - my ( $stable_id, $version, $db_name ); - $sth->bind_columns( \$stable_id, \$version, \$db_name ); + my ($stable_id, $version, $db_name, $release, $assembly); + $sth->bind_columns(\$stable_id, \$version, \$db_name, \$release, \$assembly); while( $sth->fetch() ) { my $new_arch_id = Bio::EnsEMBL::ArchiveStableId->new @@ -204,7 +227,9 @@ sub fetch_all_by_gene_archive_id { -adaptor => $self, -stable_id => $stable_id, -type => "Transcript", - -db_name => $db_name + -db_name => $db_name, + -release => $release, + -assembly => $assembly ); push( @result, $new_arch_id ); @@ -217,15 +242,15 @@ sub fetch_all_by_gene_archive_id { =head2 fetch_by_transcript_archive_id - Arg [1] : Bio::EnsEMBL::ArchiveStableId - Example : none - Description: Given a Transcripts ArchiveStableId retrieves the - Translations ArchiveStableId. - Returntype : Bio::EnsEMBL::ArchiveStableId - Exceptions : undef if not in db or not a Transcript - Caller : Bio::EnsEMBL::ArchiveStableId->get_translation_archive_id - Status : At Risk - : under development + Arg [1] : Bio::EnsEMBL::ArchiveStableId + Example : none + Description : Given a Transcripts ArchiveStableId retrieves the + Translations ArchiveStableId. + Returntype : Bio::EnsEMBL::ArchiveStableId or undef if not in database + Exceptions : none + Caller : Bio::EnsEMBL::ArchiveStableId->get_all_translation_archive_ids + Status : At Risk + : under development =cut @@ -234,11 +259,15 @@ sub fetch_by_transcript_archive_id { my $transcript_archive_id = shift; my $sql = qq( - SELECT ga.translation_stable_id, ga.translation_version, - m.old_db_name - FROM gene_archive ga, mapping_session m - WHERE ga.transcript_stable_id = ? - AND ga.transcript_version = ? + SELECT + ga.translation_stable_id, + ga.translation_version, + m.old_db_name, + m.old_release, + m.old_assembly + FROM gene_archive ga, mapping_session m + WHERE ga.transcript_stable_id = ? + AND ga.transcript_version = ? ); my $sth = $self->prepare( $sql ); @@ -246,7 +275,7 @@ sub fetch_by_transcript_archive_id { $sth->bind_param(2,$transcript_archive_id->version,SQL_SMALLINT); $sth->execute(); - my ( $stable_id, $version, $db_name ) = $sth->fetchrow_array(); + my ($stable_id, $version, $db_name, $release, $assembly) = $sth->fetchrow_array(); $sth->finish(); @@ -257,7 +286,9 @@ sub fetch_by_transcript_archive_id { -adaptor => $self, -stable_id => $stable_id, -type => "Translation", - -db_name => $db_name + -db_name => $db_name, + -release => $release, + -assembly => $assembly ); return $new_arch_id; @@ -267,21 +298,21 @@ sub fetch_by_transcript_archive_id { } -=head2 fetch_pre_by_arch_id +=head2 fetch_predecessors_by_archive_id - Arg [1] : Bio::EnsEMBL::ArchiveStableId - Example : none - Description: Retrieve a list of ArchiveStableIds that were mapped to the - given one. - Returntype : listref Bio::EnsEMBL::ArchiveStableId - Exceptions : none - Caller : Bio::EnsEMBL::ArchiveStableId->get_all_predecessors - Status : At Risk - : under development + Arg [1] : Bio::EnsEMBL::ArchiveStableId + Example : none + Description : Retrieve a list of ArchiveStableIds that were mapped to the + given one. + Returntype : listref Bio::EnsEMBL::ArchiveStableId + Exceptions : none + Caller : Bio::EnsEMBL::ArchiveStableId->get_all_predecessors + Status : At Risk + : under development =cut -sub fetch_pre_by_arch_id { +sub fetch_predecessors_by_archive_id { my $self = shift; my $arch_id = shift; my @result; @@ -293,27 +324,35 @@ sub fetch_pre_by_arch_id { } my $sql = qq( - SELECT sie.old_stable_id, sie.old_version, m.old_db_name - FROM mapping_session m, stable_id_event sie + SELECT + sie.old_stable_id, + sie.old_version, + m.old_db_name, + m.old_release, + m.old_assembly + FROM mapping_session m, stable_id_event sie WHERE sie.mapping_session_id = m.mapping_session_id - AND sie.new_stable_id = ? - AND m.new_db_name = ? + AND sie.new_stable_id = ? + AND m.new_db_name = ? ); my $sth = $self->prepare( $sql ); $sth->bind_param(1,$arch_id->stable_id, SQL_VARCHAR); $sth->bind_param(2,$arch_id->db_name,SQL_VARCHAR); $sth->execute(); - my ( $old_stable_id, $old_version, $old_db_name ); - $sth->bind_columns( \$old_stable_id, \$old_version, \$old_db_name ); + + my ($old_stable_id, $old_version, $old_db_name, $old_release, $old_assembly); + $sth->bind_columns(\$old_stable_id, \$old_version, \$old_db_name, \$old_release, \$old_assembly); + while( $sth->fetch() ) { if( defined $old_stable_id ) { - my $old_arch_id = Bio::EnsEMBL::ArchiveStableId->new ( -version => $old_version, -stable_id => $old_stable_id, -db_name => $old_db_name, + -release => $old_release, + -assembly => $old_assembly, -adaptor => $self ); _resolve_type( $old_arch_id ); @@ -326,134 +365,238 @@ sub fetch_pre_by_arch_id { } -=head2 fetch_all_currently_related +=head2 fetch_successors_by_archive_id - Arg [1] : Bio::EnsEMBL::ArchiveStableId $arch_id - The one where you want to know the currently related ones. - Example : none - Description: Gives back a list of archive stable ids which are successors in - the stable_id_event tree of the given stable_id. Might well be - empty. - Returntype : listref Bio::EnsEMBL::ArchiveStableId - Exceptions : none - Caller : webcode for archive - Status : At Risk - : under development + Arg [1] : Bio::EnsEMBL::ArchiveStableId + Example : none + Description : Retrieve a list of ArchiveStableIds that the given one was + mapped to. This method goes forward only one level, to retrieve + a full successor history use fetch_successor_history(). + Returntype : listref Bio::EnsEMBL::ArchiveStableId + Exceptions : none + Caller : Bio::EnsEMBL::ArchiveStableId->get_all_successors + Status : At Risk + : under development =cut -sub fetch_all_currently_related { +sub fetch_successors_by_archive_id { my $self = shift; my $arch_id = shift; + my @result; - my $current_db_name = $self->list_dbnames()->[0]; - my $dbname = $arch_id->db_name; + + if( ! ( defined $arch_id->stable_id() && + defined $arch_id->db_name() )) { + $self->throw( "Need db_name for successor retrieval" ); + } - my $old = []; + my $sql = qq( + SELECT + sie.new_stable_id, + sie.new_version, + m.new_db_name, + m.new_release, + m.new_assembly + FROM mapping_session m, stable_id_event sie + WHERE sie.mapping_session_id = m.mapping_session_id + AND sie.old_stable_id = ? + AND m.old_db_name = ? + ); + + my $sth = $self->prepare( $sql ); + $sth->bind_param(1,$arch_id->stable_id,SQL_VARCHAR); + $sth->bind_param(2,$arch_id->db_name,SQL_VARCHAR); + $sth->execute(); + + my ($new_stable_id, $new_version, $new_db_name, $new_release, $new_assembly); + $sth->bind_columns(\$new_stable_id, \$new_version, \$new_db_name, \$new_release, \$new_assembly); + + while( $sth->fetch() ) { + if( defined $new_stable_id ) { + my $new_arch_id = Bio::EnsEMBL::ArchiveStableId->new + ( + -version => $new_version, + -stable_id => $new_stable_id, + -db_name => $new_db_name, + -release => $new_release, + -assembly => $new_assembly, + -adaptor => $self + ); + + _resolve_type($new_arch_id); + push( @result, $new_arch_id ); + } + } + $sth->finish(); + + return \@result; +} + + +=head2 fetch_archive_id_history + + Arg [1] : Bio::EnsEMBL::ArchiveStableId $arch_id + Example : none + Description : Gives back a list of archive stable ids which are successors or + predecessors in the stable_id_event tree of the given + stable_id. Might well be empty. This is not the complete network + this stable id belongs to, but rather branches out from this id + only. + Returntype : listref of Bio::EnsEMBL::ArchiveStableId + Since every ArchiveStableId knows about it's successors, this is + a linked tree. + Exceptions : none + Caller : webcode for archive + Status : At Risk + : under development + +=cut + +sub fetch_archive_id_history { + my $self = shift; + my $arch_id = shift; + + my @result = ( + $arch_id, + @{ $self->fetch_predecessor_history($arch_id) }, + @{ $self->fetch_successor_history($arch_id) } + ); + + # filter duplicates + my %unique = map { join(":", $_->stable_id, $_->version, $_->release) => $_ } + @result; + @result = values %unique; + + return \@result; +} + + +=head2 fetch_successor_history + + Arg [1] : Bio::EnsEMBL::ArchiveStableId $arch_id + Example : none + Description : Gives back a list of archive stable ids which are successors in + the stable_id_event tree of the given stable_id. Might well be + empty. + Returntype : listref Bio::EnsEMBL::ArchiveStableId + Since every ArchiveStableId knows about it's successors, this is + a linked tree. + Exceptions : none + Caller : webcode for archive + Status : At Risk + : under development + +=cut + +sub fetch_successor_history { + my $self = shift; + my $arch_id = shift; + + my $current_db_name = $self->list_dbnames->[0]; + my $dbname = $arch_id->db_name; - if( $dbname eq $current_db_name ) { - return [ $arch_id ]; + if ($dbname eq $current_db_name) { + return [$arch_id]; } - push( @$old, $arch_id ); + my $old = []; + my @result = (); + + push @$old, $arch_id; - while( $dbname ne $current_db_name ) { + while ($dbname ne $current_db_name) { my $new = []; - while( my $asi = ( shift @$old )) { - push( @$new, @{$asi->get_all_successors()}); + while (my $asi = (shift @$old)) { + push @$new, @{ $asi->get_all_successors }; } - if( @$new ) { - $dbname = $new->[0]->db_name(); + if (@$new) { + $dbname = $new->[0]->db_name; } else { last; } @$old = @$new; + push @result, @$new; } - my %stable_ids; - my @result; - while( my $arch_id = ( shift @$old )) { - if( exists $stable_ids{ $arch_id->stable_id } ) { - next; - } else { - push( @result, $arch_id ); - $stable_ids{ $arch_id->stable_id() } = 1; - } - } + # filter duplicates + my %unique = map { join(":", $_->stable_id, $_->version, $_->release) => $_ } + @result; + @result = values %unique; return \@result; } -=head2 fetch_succ_by_arch_id +=head2 fetch_predecessor_history - Arg [1] : Bio::EnsEMBL::ArchiveStableId - Example : none - Description: Retrieve a list of ArchiveStableIds that the given one was - mapped to. - Returntype : listref Bio::EnsEMBL::ArchiveStableId - Exceptions : none - Caller : Bio::EnsEMBL::ArchiveStableId->get_all_successors - Status : At Risk - : under development + Arg [1] : Bio::EnsEMBL::ArchiveStableId $arch_id + Example : none + Description : Gives back a list of archive stable ids which are predecessors + in the stable_id_event tree of the given stable_id. Might well + be empty. + Returntype : listref Bio::EnsEMBL::ArchiveStableId + Since every ArchiveStableId knows about it's successors, this is + a linked tree. + Exceptions : none + Caller : webcode for archive + Status : At Risk + : under development =cut -sub fetch_succ_by_arch_id { +sub fetch_predecessor_history { my $self = shift; my $arch_id = shift; - my @result; - - if( ! ( defined $arch_id->stable_id() && - defined $arch_id->db_name() )) { - $self->throw( "Need db_name for successor retrieval" ); + my $oldest_db_name = $self->list_dbnames->[-1]; + my $dbname = $arch_id->db_name; + + if ($dbname eq $oldest_db_name) { + return [$arch_id]; } - my $sql = qq( - SELECT sie.new_stable_id, sie.new_version, m.new_db_name - FROM mapping_session m, stable_id_event sie - WHERE sie.mapping_session_id = m.mapping_session_id - AND sie.old_stable_id = ? - AND m.old_db_name = ? - ); + my $old = []; + my @result = (); - my $sth = $self->prepare( $sql ); - $sth->bind_param(1,$arch_id->stable_id,SQL_VARCHAR); - $sth->bind_param(2,$arch_id->db_name,SQL_VARCHAR); - $sth->execute(); - my ( $new_stable_id, $new_version, $new_db_name ); - $sth->bind_columns( \$new_stable_id, \$new_version, \$new_db_name ); - while( $sth->fetch() ) { - if( defined $new_stable_id ) { - my $new_arch_id = Bio::EnsEMBL::ArchiveStableId->new - ( - -version => $new_version, - -stable_id => $new_stable_id, - -db_name => $new_db_name, - -adaptor => $self - ); - _resolve_type( $new_arch_id ); - push( @result, $new_arch_id ); + push @$old, $arch_id; + + while ($dbname ne $oldest_db_name) { + my $new = []; + while (my $asi = (shift @$old)) { + push @$new, @{ $asi->get_all_predecessors }; } + + if( @$new ) { + $dbname = $new->[0]->db_name; + } else { + last; + } + @$old = @$new; + push @result, @$new; } - $sth->finish(); + + # filter duplicates + my %unique = map { join(":", $_->stable_id, $_->version, $_->release) => $_ } + @result; + @result = values %unique; + return \@result; } =head2 list_dbnames - Args : none - Example : none - Description: A list of available database names from the latest (current) to - the oldest (ordered). - Returntype : listref string - Exceptions : none - Caller : general - Status : At Risk - : under development + Args : none + Example : none + Description : A list of available database names from the latest (current) to + the oldest (ordered). + Returntype : listref string + Exceptions : none + Caller : general + Status : At Risk + : under development =cut @@ -463,8 +606,8 @@ sub list_dbnames { if( ! defined $self->{'dbnames'} ) { my $sql = qq( SELECT old_db_name, new_db_name - FROM mapping_session - ORDER BY created DESC + FROM mapping_session + ORDER BY created DESC ); my $sth = $self->prepare( $sql ); $sth->execute(); @@ -495,15 +638,15 @@ sub list_dbnames { =head2 get_peptide - Arg [1] : - Example : none - Description: Retrieves the peptide string for given ArchiveStableId. If its - not a peptide or not in the database returns undef. - Returntype : string - Exceptions : none - Caller : ArchiveStableId->get_peptide or general - Status : At Risk - : under development + Arg [1] : Bio::EnsEMBL::ArchiveStableId $arch_id + Example : none + Description : Retrieves the peptide string for given ArchiveStableId. If its + not a peptide or not in the database returns undef. + Returntype : string or undef + Exceptions : none + Caller : Bio::EnsEMBL::ArchiveStableId->get_peptide, general + Status : At Risk + : under development =cut @@ -556,35 +699,48 @@ sub _lookup_version { if( ! defined $arch_id->{'db_name'} ) { # latest version of this stable id - my $sql_tmp = "SELECT new_db_name, new_version "; - $sql_tmp .= "FROM stable_id_event sie, mapping_session m "; - $sql_tmp .= "WHERE sie.mapping_session_id = m.mapping_session_id AND new_stable_id = \"@{[$arch_id->stable_id]}\" $EXTRA_SQL "; - $sql_tmp .= "ORDER BY m.created DESC"; - $sql = $self->dbc->add_limit_clause($sql_tmp,1); + my $sql_tmp = qq( + SELECT + m.new_db_name, + m.new_release, + m.new_assembly, + sie.new_version + FROM stable_id_event sie, mapping_session m + WHERE sie.mapping_session_id = m.mapping_session_id + AND new_stable_id = "@{[$arch_id->stable_id]}" $EXTRA_SQL + ORDER BY m.created DESC + ); + $sql = $self->dbc->add_limit_clause($sql_tmp, 1); } else { $sql = qq( - SELECT old_db_name, old_version - FROM stable_id_event sie, mapping_session m - WHERE sie.mapping_session_id = m.mapping_session_id - AND old_stable_id = "@{[$arch_id->stable_id]}" - AND m.old_db_name = "@{[$arch_id->db_name]}" - $EXTRA_SQL - ); + SELECT + m.old_db_name, + m.old_release, + m.old_assembly, + sie.old_version + FROM stable_id_event sie, mapping_session m + WHERE sie.mapping_session_id = m.mapping_session_id + AND sie.old_stable_id = "@{[$arch_id->stable_id]}" + AND m.old_db_name = "@{[$arch_id->db_name]}" + $EXTRA_SQL + ); } my $id_type; my $sth = $self->prepare( $sql ); $sth->execute(); - my ( $db_name, $version ) = $sth->fetchrow_array(); + my ($db_name, $release, $assembly, $version) = $sth->fetchrow_array(); $sth->finish(); if( ! defined $db_name ) { return 0; } else { - $arch_id->version( $version ); + $arch_id->version($version); if( ! defined $arch_id->{'db_name'} ) { - $arch_id->db_name( $db_name ); + $arch_id->db_name($db_name); + $arch_id->release($release); + $arch_id->assembly($assembly); } } @@ -613,4 +769,35 @@ sub _resolve_type { $arch_id->type( $id_type ); } + +# deprecated methods (changed to more descriptive names) + +sub fetch_pre_by_arch_id { + my $self = shift; + my $arch_id = shift; + + deprecate("Use fetch_predecessors_by_archive_id() instead"); + + return $self->fetch_predecessors_by_archive_id($arch_id); +} + +sub fetch_succ_by_arch_id { + my $self = shift; + my $arch_id = shift; + + deprecate("Use fetch_successors_by_archive_id() instead"); + + return $self->fetch_successors_by_archive_id($arch_id); +} + +sub fetch_all_currently_related { + my $self = shift; + my $arch_id = shift; + + deprecate("Use fetch_successor_history() instead"); + + return $self->fetch_successor_history($arch_id); +} + + 1; diff --git a/modules/t/archiveStableId.t b/modules/t/archiveStableId.t index 1179fcfd62..a0bc9395e1 100644 --- a/modules/t/archiveStableId.t +++ b/modules/t/archiveStableId.t @@ -105,11 +105,11 @@ for my $asi ( @$succ_asis ) { ok( scalar( @$succ_asis ) == 0 ); # -# 8 fetch_all_currently_related +# 8 fetch_successor_history # $asi = $asia->fetch_by_stable_id_dbname( "G2", "release_1" ); -my $asis = $asia->fetch_all_currently_related( $asi ); +my $asis = $asia->fetch_successor_history( $asi ); debug( "\tCurrently related from G2.release_1" ); for my $asi ( @$asis ) { diff --git a/sql/patch_37_38.sql b/sql/patch_37_38.sql index f168171e26..15b45e775a 100644 --- a/sql/patch_37_38.sql +++ b/sql/patch_37_38.sql @@ -56,6 +56,15 @@ CREATE TABLE unmapped_reason ( ) COLLATE=latin1_swedish_ci TYPE=MyISAM; +# Add some more columns to mapping_session + +ALTER TABLE mapping_session ADD COLUMN new_assembly varchar(20) NOT NULL default '' AFTER new_db_name; +ALTER TABLE mapping_session ADD COLUMN old_assembly varchar(20) NOT NULL default '' AFTER new_db_name; +ALTER TABLE mapping_session ADD COLUMN new_release varchar(5) NOT NULL default '' AFTER new_db_name; +ALTER TABLE mapping_session ADD COLUMN old_release varchar(5) NOT NULL default '' AFTER new_db_name; +ALTER TABLE mapping_session CHANGE created created DATETIME NOT NULL; + + # Add the new oligo tables, which replace the affy tables CREATE TABLE oligo_feature ( diff --git a/sql/table.sql b/sql/table.sql index 436dc0d7df..bfa796ed9f 100755 --- a/sql/table.sql +++ b/sql/table.sql @@ -12,54 +12,82 @@ # - internal ids are integers named tablename_id # - same name is given in foreign key relations + +################################################################################ +# +# Table structure for table 'oligo_feature' +# + CREATE TABLE oligo_feature ( - oligo_feature_id INT NOT NULL auto_increment, - seq_region_id INT UNSIGNED NOT NULL, - seq_region_start INT NOT NULL, - seq_region_end INT NOT NULL, - seq_region_strand TINYINT NOT NULL, - - mismatches TINYINT, - oligo_probe_id INT NOT NULL, - analysis_id INT NOT NULL, - - PRIMARY KEY (oligo_feature_id), - KEY seq_region_idx (seq_region_id, seq_region_start), - KEY probe_idx (oligo_probe_id) + + oligo_feature_id INT NOT NULL AUTO_INCREMENT, + seq_region_id INT UNSIGNED NOT NULL, + seq_region_start INT NOT NULL, + seq_region_end INT NOT NULL, + seq_region_strand TINYINT NOT NULL, + mismatches TINYINT, + oligo_probe_id INT NOT NULL, + analysis_id INT NOT NULL, + + PRIMARY KEY (oligo_feature_id), + KEY seq_region_idx (seq_region_id, seq_region_start), + KEY probe_idx (oligo_probe_id) + ) COLLATE=latin1_swedish_ci TYPE=MyISAM; + +################################################################################ +# +# Table structure for table 'oligo_probe' +# + CREATE TABLE oligo_probe ( - oligo_probe_id INT NOT NULL auto_increment, - oligo_array_id INT NOT NULL, - probeset VARCHAR(40), - name VARCHAR(20), - description TEXT, - length SMALLINT NOT NULL, - - PRIMARY KEY (oligo_probe_id, oligo_array_id), - KEY probeset_idx (probeset), - KEY array_idx (oligo_array_id) + + oligo_probe_id INT NOT NULL AUTO_INCREMENT, + oligo_array_id INT NOT NULL, + probeset VARCHAR(40), + name VARCHAR(20), + description TEXT, + length SMALLINT NOT NULL, + + PRIMARY KEY (oligo_probe_id, oligo_array_id), + KEY probeset_idx (probeset), + KEY array_idx (oligo_array_id) + ) COLLATE=latin1_swedish_ci TYPE=MyISAM; + +################################################################################ +# +# Table structure for table 'oligo_array' +# + CREATE TABLE oligo_array ( - oligo_array_id INT NOT NULL auto_increment, - parent_array_id INT, - probe_setsize TINYINT NOT NULL, - name VARCHAR(40) NOT NULL, - type ENUM( 'AFFY', 'OLIGO' ), - PRIMARY KEY (oligo_array_id) + oligo_array_id INT NOT NULL auto_increment, + parent_array_id INT, + probe_setsize TINYINT NOT NULL, + name VARCHAR(40) NOT NULL, + type ENUM( 'AFFY', 'OLIGO' ), + + PRIMARY KEY (oligo_array_id) + ) COLLATE=latin1_swedish_ci TYPE=MyISAM; +################################################################################ +# +# Table structure for table 'alt_allele' +# + CREATE TABLE alt_allele ( - alt_allele_id INT NOT NULL auto_increment, - gene_id INT NOT NULL, + alt_allele_id INT NOT NULL AUTO_INCREMENT, + gene_id INT NOT NULL, - UNIQUE gene_idx( gene_id ), - UNIQUE allele_idx( alt_allele_id, gene_id ) -) COLLATE=latin1_swedish_ci; - + UNIQUE gene_idx (gene_id), + UNIQUE allele_idx (alt_allele_id, gene_id) + +) COLLATE=latin1_swedish_ci TYPE=MyISAM; ################################################################################ @@ -67,10 +95,12 @@ CREATE TABLE alt_allele ( # Table structure for table 'analysis' # # semantics: +# # analysis_id - internal id -# created - date to distinguish newer and older versions off the -# same analysis. Not well maintained so far. -# logic_name string to identify the analysis. Used mainly inside pipeline. +# created +# - date to distinguish newer and older versions off the same analysis. Not +# well maintained so far. +# logic_name - string to identify the analysis. Used mainly inside pipeline. # db, db_version, db_file # - db should be a database name, db version the version of that db # db_file the file system location of that database, @@ -79,42 +109,50 @@ CREATE TABLE alt_allele ( # - The binary used to create a feature. Similar semantic to above # module, module_version # - Perl module names (RunnableDBS usually) executing this analysis. -# parameters a paramter string which is processed by the perl module +# parameters - a paramter string which is processed by the perl module # gff_source, gff_feature # - how to make a gff dump from features with this analysis CREATE TABLE analysis ( - analysis_id int(10) unsigned NOT NULL auto_increment, + analysis_id INT(10) UNSIGNED NOT NULL AUTO_INCREMENT, created datetime DEFAULT '0000-00-00 00:00:00' NOT NULL, - logic_name varchar(40) not null, - db varchar(120), - db_version varchar(40), - db_file varchar(120), - program varchar(80), - program_version varchar(40), - program_file varchar(80), - parameters varchar(255), - module varchar(80), - module_version varchar(40), - gff_source varchar(40), - gff_feature varchar(40), + logic_name VARCHAR(40) NOT NULL, + db VARCHAR(120), + db_version VARCHAR(40), + db_file VARCHAR(120), + program VARCHAR(80), + program_version VARCHAR(40), + program_file VARCHAR(80), + parameters VARCHAR(255), + module VARCHAR(80), + module_version VARCHAR(40), + gff_source VARCHAR(40), + gff_feature VARCHAR(40), PRIMARY KEY (analysis_id), - KEY logic_name_idx( logic_name ), - UNIQUE(logic_name) + KEY logic_name_idx (logic_name), + UNIQUE (logic_name) + +) COLLATE=latin1_swedish_ci TYPE=MyISAM; -) COLLATE=latin1_swedish_ci; +################################################################################ +# +# Table structure for table 'analysis_description' +# CREATE TABLE analysis_description ( - analysis_id int(10) unsigned NOT NULL, - description text, - display_label varchar(255), - KEY analysis_idx( analysis_id ) -) COLLATE=latin1_swedish_ci; + analysis_id INT(10) UNSIGNED NOT NULL, + description TEXT, + display_label VARCHAR(255), + + KEY analysis_idx (analysis_id) + +) COLLATE=latin1_swedish_ci TYPE=MyISAM; + ################################################################################ # @@ -123,53 +161,63 @@ CREATE TABLE analysis_description ( # This table stores DNA sequence. CREATE TABLE dna ( - seq_region_id int unsigned NOT NULL, - sequence mediumtext NOT NULL, + + seq_region_id INT UNSIGNED NOT NULL, + sequence MEDIUMTEXT NOT NULL, PRIMARY KEY (seq_region_id) -) MAX_ROWS = 750000 AVG_ROW_LENGTH = 19000; +) COLLATE=latin1_swedish_ci TYPE=MyISAM MAX_ROWS=750000 AVG_ROW_LENGTH=19000; + ################################################################################ # # Table structure for table 'dnac' # -# Contains equivalent data to dna table, but 4 letters of DNA code are represented -# by a single binary character, based on 2 bit encoding -# do not need to worry about ambiguity of length, since this is stored in contig.length -# n_line column contains start-end pairs of coordinates in the string that are really Ns +# Contains equivalent data to dna table, but 4 letters of DNA code are +# represented by a single binary character, based on 2 bit encoding +# +# do not need to worry about ambiguity of length, since this is stored in +# contig.length +# +# n_line column contains start-end pairs of coordinates in the string that are +# really Ns CREATE TABLE dnac ( - seq_region_id int unsigned NOT NULL, - sequence mediumblob NOT NULL, - n_line text, + + seq_region_id INT UNSIGNED NOT NULL, + sequence MEDIUMBLOB NOT NULL, + n_line TEXT, PRIMARY KEY (seq_region_id) -) MAX_ROWS = 750000 AVG_ROW_LENGTH = 19000; + +) COLLATE=latin1_swedish_ci TYPE=MyISAM MAX_ROWS=750000 AVG_ROW_LENGTH=19000; + ################################################################################ # # Table structure for table 'exon' # -# Note seq_region_start always less that seq_region_end, i.e. -# when the exon is on the other strand the seq_region_start -# is specifying the 3prime end of the exon. +# Note seq_region_start always less that seq_region_end, i.e. when the exon is +# on the other strand the seq_region_start is specifying the 3prime end of the +# exon. CREATE TABLE exon ( - exon_id int unsigned NOT NULL auto_increment, - seq_region_id int(10) unsigned NOT NULL, # foreign key, seq_region:seq_region_id - seq_region_start int(10) unsigned NOT NULL, # start of exon within seq_region - seq_region_end int(10) unsigned NOT NULL, # end of exon within specified seq_region - seq_region_strand tinyint(2) NOT NULL, # 1 or -1 depending on the strand of the exon - - phase tinyint(2) NOT NULL, - end_phase tinyint(2) NOT NULL, + exon_id INT UNSIGNED NOT NULL AUTO_INCREMENT, + seq_region_id INT(10) UNSIGNED NOT NULL, + seq_region_start INT(10) UNSIGNED NOT NULL, + seq_region_end INT(10) UNSIGNED NOT NULL, + seq_region_strand TINYINT(2) NOT NULL, + + phase TINYINT(2) NOT NULL, + end_phase TINYINT(2) NOT NULL, PRIMARY KEY (exon_id), - KEY seq_region_idx (seq_region_id, seq_region_start ) + KEY seq_region_idx (seq_region_id, seq_region_start) + +) COLLATE=latin1_swedish_ci TYPE=MyISAM; -) COLLATE=latin1_swedish_ci; ################################################################################ # @@ -178,16 +226,17 @@ CREATE TABLE exon ( CREATE TABLE exon_stable_id ( - exon_id int unsigned not null, # foreign key exon:exon_id - stable_id VARCHAR(128) not null, - version int(10), + exon_id INT UNSIGNED NOT NULL, + stable_id VARCHAR(128) NOT NULL, + version INT(10), created_date DATETIME NOT NULL, modified_date DATETIME NOT NULL, - PRIMARY KEY( exon_id ), - UNIQUE( stable_id, version ) + PRIMARY KEY (exon_id), + UNIQUE (stable_id, version) + +) COLLATE=latin1_swedish_ci TYPE=MyISAM; -) COLLATE=latin1_swedish_ci; ################################################################################ # @@ -199,15 +248,16 @@ CREATE TABLE exon_stable_id ( CREATE TABLE exon_transcript ( - exon_id INT unsigned NOT NULL, # foreign key exon:exon_id - transcript_id INT unsigned NOT NULL, # foregin key transcript:transcript_id - rank int(10) NOT NULL, + exon_id INT UNSIGNED NOT NULL, + transcript_id INT UNSIGNED NOT NULL, + rank INT(10) NOT NULL, PRIMARY KEY (exon_id,transcript_id,rank), KEY transcript (transcript_id), - KEY exon ( exon_id ) + KEY exon (exon_id) + +) COLLATE=latin1_swedish_ci TYPE=MyISAM; -) COLLATE=latin1_swedish_ci; ################################################################################ # @@ -216,21 +266,22 @@ CREATE TABLE exon_transcript ( CREATE TABLE simple_feature ( - simple_feature_id int unsigned not null auto_increment, - seq_region_id int(10) unsigned NOT NULL, - seq_region_start int(10) unsigned NOT NULL, - seq_region_end int(10) unsigned NOT NULL, - seq_region_strand tinyint(1) NOT NULL, - display_label varchar(40) NOT NULL, - analysis_id int(10) unsigned NOT NULL, - score double, + simple_feature_id INT UNSIGNED NOT NULL AUTO_INCREMENT, + seq_region_id INT(10) UNSIGNED NOT NULL, + seq_region_start INT(10) UNSIGNED NOT NULL, + seq_region_end INT(10) UNSIGNED NOT NULL, + seq_region_strand TINYINT(1) NOT NULL, + display_label VARCHAR(40) NOT NULL, + analysis_id INT(10) UNSIGNED NOT NULL, + score DOUBLE, - PRIMARY KEY ( simple_feature_id ), - KEY seq_region_idx (seq_region_id, seq_region_start ), - KEY analysis_idx( analysis_id ), - KEY hit_idx( display_label ) + PRIMARY KEY (simple_feature_id), + KEY seq_region_idx (seq_region_id, seq_region_start), + KEY analysis_idx (analysis_id), + KEY hit_idx (display_label) + +) COLLATE=latin1_swedish_ci TYPE=MyISAM MAX_ROWS=100000000 AVG_ROW_LENGTH=80; -) MAX_ROWS=100000000 AVG_ROW_LENGTH=80; ################################################################################ # @@ -239,27 +290,28 @@ CREATE TABLE simple_feature ( CREATE TABLE protein_align_feature ( - protein_align_feature_id int unsigned not null auto_increment, - seq_region_id int(10) unsigned NOT NULL, - seq_region_start int(10) unsigned NOT NULL, - seq_region_end int(10) unsigned NOT NULL, - seq_region_strand tinyint(1) DEFAULT '1' NOT NULL, - hit_start int(10) NOT NULL, - hit_end int(10) NOT NULL, - hit_name varchar(40) NOT NULL, - analysis_id int(10) unsigned NOT NULL, - score double, - evalue double, - perc_ident float, - cigar_line text, - - PRIMARY KEY ( protein_align_feature_id ), - KEY seq_region_idx( seq_region_id, analysis_id, seq_region_start, score ), - KEY seq_region_idx_2( seq_region_id, seq_region_start), - KEY hit_idx( hit_name ), - KEY analysis_idx( analysis_id ) - -) MAX_ROWS=100000000 AVG_ROW_LENGTH=80; + protein_align_feature_id INT UNSIGNED NOT NULL AUTO_INCREMENT, + seq_region_id INT(10) UNSIGNED NOT NULL, + seq_region_start INT(10) UNSIGNED NOT NULL, + seq_region_end INT(10) UNSIGNED NOT NULL, + seq_region_strand TINYINT(1) DEFAULT '1' NOT NULL, + hit_start INT(10) NOT NULL, + hit_end INT(10) NOT NULL, + hit_name VARCHAR(40) NOT NULL, + analysis_id INT(10) UNSIGNED NOT NULL, + score DOUBLE, + evalue DOUBLE, + perc_ident FLOAT, + cigar_line TEXT, + + PRIMARY KEY (protein_align_feature_id), + KEY seq_region_idx (seq_region_id, analysis_id, seq_region_start, score), + KEY seq_region_idx_2 (seq_region_id, seq_region_start), + KEY hit_idx (hit_name), + KEY analysis_idx (analysis_id) + +) COLLATE=latin1_swedish_ci TYPE=MyISAM MAX_ROWS=100000000 AVG_ROW_LENGTH=80; + ################################################################################ # @@ -268,50 +320,52 @@ CREATE TABLE protein_align_feature ( CREATE TABLE dna_align_feature ( - dna_align_feature_id int unsigned not null auto_increment, - seq_region_id int(10) unsigned NOT NULL, - seq_region_start int(10) unsigned NOT NULL, - seq_region_end int(10) unsigned NOT NULL, - seq_region_strand tinyint(1) NOT NULL, - hit_start int NOT NULL, - hit_end int NOT NULL, - hit_strand tinyint(1) NOT NULL, - hit_name varchar(40) NOT NULL, - analysis_id int(10) unsigned NOT NULL, - score double, - evalue double, - perc_ident float, - cigar_line text, + dna_align_feature_id INT UNSIGNED NOT NULL AUTO_INCREMENT, + seq_region_id INT(10) UNSIGNED NOT NULL, + seq_region_start INT(10) UNSIGNED NOT NULL, + seq_region_end INT(10) UNSIGNED NOT NULL, + seq_region_strand TINYINT(1) NOT NULL, + hit_start INT NOT NULL, + hit_end INT NOT NULL, + hit_strand TINYINT(1) NOT NULL, + hit_name VARCHAR(40) NOT NULL, + analysis_id INT(10) UNSIGNED NOT NULL, + score DOUBLE, + evalue DOUBLE, + perc_ident FLOAT, + cigar_line TEXT, + + PRIMARY KEY (dna_align_feature_id), + KEY seq_region_idx (seq_region_id, analysis_id, seq_region_start, score), + KEY seq_region_idx_2 (seq_region_id, seq_region_start), + KEY hit_idx (hit_name), + KEY analysis_idx (analysis_id) - PRIMARY KEY ( dna_align_feature_id ), - KEY seq_region_idx( seq_region_id, analysis_id, seq_region_start, score ), - KEY seq_region_idx_2( seq_region_id, seq_region_start), - KEY hit_idx( hit_name ), - KEY analysis_idx( analysis_id ) +) COLLATE=latin1_swedish_ci TYPE=MyISAM MAX_ROWS=100000000 AVG_ROW_LENGTH=80; -) MAX_ROWS=100000000 AVG_ROW_LENGTH=80; ################################################################################ # # Table structure for table 'repeat_consensus' # +# repeat_class examples: SINE, LINE, DNA Transposon, Retroviral LTR, +# Satellite, Tandem CREATE TABLE repeat_consensus ( - repeat_consensus_id int unsigned NOT NULL auto_increment, - repeat_name varchar(255) NOT NULL, - repeat_class varchar(100) NOT NULL, # eg: SINE, LINE, DNA Transposon, - # Retroviral LTR, Satellite,Tandem - repeat_type varchar(40) NOT NULL, - repeat_consensus text, + repeat_consensus_id INT UNSIGNED NOT NULL AUTO_INCREMENT, + repeat_name VARCHAR(255) NOT NULL, + repeat_class VARCHAR(100) NOT NULL, + repeat_type VARCHAR(40) NOT NULL, + repeat_consensus TEXT, - PRIMARY KEY( repeat_consensus_id ), + PRIMARY KEY (repeat_consensus_id), KEY name (repeat_name), KEY class (repeat_class), - KEY consensus(repeat_consensus(10)), - KEY type( repeat_type ) + KEY consensus (repeat_consensus(10)), + KEY type (repeat_type) -) COLLATE=latin1_swedish_ci; +) COLLATE=latin1_swedish_ci TYPE=MyISAM; ################################################################################ # @@ -320,23 +374,24 @@ CREATE TABLE repeat_consensus ( CREATE TABLE repeat_feature ( - repeat_feature_id int unsigned NOT NULL auto_increment, - seq_region_id int(10) unsigned NOT NULL, - seq_region_start int(10) unsigned NOT NULL, - seq_region_end int(10) unsigned NOT NULL, - seq_region_strand tinyint(1) DEFAULT '1' NOT NULL, - repeat_start int(10) NOT NULL, - repeat_end int(10) NOT NULL, - repeat_consensus_id int(10) unsigned NOT NULL, - analysis_id int(10) unsigned NOT NULL, - score double, + repeat_feature_id INT UNSIGNED NOT NULL AUTO_INCREMENT, + seq_region_id INT(10) UNSIGNED NOT NULL, + seq_region_start INT(10) UNSIGNED NOT NULL, + seq_region_end INT(10) UNSIGNED NOT NULL, + seq_region_strand TINYINT(1) DEFAULT '1' NOT NULL, + repeat_start INT(10) NOT NULL, + repeat_end INT(10) NOT NULL, + repeat_consensus_id INT(10) UNSIGNED NOT NULL, + analysis_id INT(10) UNSIGNED NOT NULL, + score DOUBLE, - PRIMARY KEY ( repeat_feature_id ), - KEY seq_region_idx( seq_region_id, seq_region_start ), - KEY repeat_idx( repeat_consensus_id ), - KEY analysis_idx( analysis_id ) + PRIMARY KEY (repeat_feature_id), + KEY seq_region_idx (seq_region_id, seq_region_start), + KEY repeat_idx (repeat_consensus_id), + KEY analysis_idx (analysis_id) + +) COLLATE=latin1_swedish_ci TYPE=MyISAM MAX_ROWS=100000000 AVG_ROW_LENGTH=80; -) MAX_ROWS=100000000 AVG_ROW_LENGTH=80; ################################################################################ # @@ -345,41 +400,44 @@ CREATE TABLE repeat_feature ( CREATE TABLE gene ( - gene_id int unsigned NOT NULL auto_increment, + gene_id INT UNSIGNED NOT NULL AUTO_INCREMENT, biotype VARCHAR(40) NOT NULL, - analysis_id int, - seq_region_id int(10) unsigned NOT NULL, - seq_region_start int(10) unsigned NOT NULL, - seq_region_end int(10) unsigned NOT NULL, - seq_region_strand tinyint(2) NOT NULL, - display_xref_id int unsigned, + analysis_id INT, + seq_region_id INT(10) UNSIGNED NOT NULL, + seq_region_start INT(10) UNSIGNED NOT NULL, + seq_region_end INT(10) UNSIGNED NOT NULL, + seq_region_strand TINYINT(2) NOT NULL, + display_xref_id INT UNSIGNED, source VARCHAR(20) NOT NULL, - status enum( 'KNOWN', 'NOVEL', 'PUTATIVE', 'PREDICTED' ), - description text, + status ENUM('KNOWN', 'NOVEL', 'PUTATIVE', 'PREDICTED'), + description TEXT, PRIMARY KEY (gene_id), - KEY seq_region_idx( seq_region_id, seq_region_start ), - KEY xref_id_index ( display_xref_id ), - KEY analysis_idx( analysis_id ) + KEY seq_region_idx (seq_region_id, seq_region_start), + KEY xref_id_index (display_xref_id), + KEY analysis_idx (analysis_id) + +) COLLATE=latin1_swedish_ci TYPE=MyISAM; -) COLLATE=latin1_swedish_ci; ################################################################################ # # Table structure for table 'gene_stable_id' # + CREATE TABLE gene_stable_id ( - gene_id int unsigned not null, # foreign key gene:gene_id - stable_id VARCHAR(128) not null, - version int(10), + gene_id INT UNSIGNED NOT NULL, + stable_id VARCHAR(128) NOT NULL, + version INT(10), created_date DATETIME NOT NULL, modified_date DATETIME NOT NULL, - PRIMARY KEY( gene_id ), - UNIQUE( stable_id, version ) + PRIMARY KEY (gene_id), + UNIQUE (stable_id, version) + +) COLLATE=latin1_swedish_ci TYPE=MyISAM; -) COLLATE=latin1_swedish_ci; ################################################################################ # @@ -388,14 +446,15 @@ CREATE TABLE gene_stable_id ( CREATE TABLE supporting_feature ( - exon_id int(11) DEFAULT '0' NOT NULL, - feature_type enum('dna_align_feature','protein_align_feature'), - feature_id int(11) DEFAULT '0' NOT NULL, + exon_id INT(11) DEFAULT '0' NOT NULL, + feature_type ENUM('dna_align_feature','protein_align_feature'), + feature_id INT(11) DEFAULT '0' NOT NULL, UNIQUE all_idx (exon_id,feature_type,feature_id), KEY feature_idx (feature_type,feature_id) -) MAX_ROWS=100000000 AVG_ROW_LENGTH=80; +) COLLATE=latin1_swedish_ci TYPE=MyISAM MAX_ROWS=100000000 AVG_ROW_LENGTH=80; + ################################################################################ # @@ -404,14 +463,15 @@ CREATE TABLE supporting_feature ( CREATE TABLE transcript_supporting_feature ( - transcript_id int(11) DEFAULT '0' NOT NULL, - feature_type enum('dna_align_feature','protein_align_feature'), - feature_id int(11) DEFAULT '0' NOT NULL, + transcript_id INT(11) DEFAULT '0' NOT NULL, + feature_type ENUM('dna_align_feature','protein_align_feature'), + feature_id INT(11) DEFAULT '0' NOT NULL, UNIQUE all_idx (transcript_id,feature_type,feature_id), KEY feature_idx (feature_type,feature_id) -) MAX_ROWS=100000000 AVG_ROW_LENGTH=80; +) COLLATE=latin1_swedish_ci TYPE=MyISAM MAX_ROWS=100000000 AVG_ROW_LENGTH=80; + ################################################################################ # @@ -420,23 +480,24 @@ CREATE TABLE transcript_supporting_feature ( CREATE TABLE transcript ( - transcript_id INT UNSIGNED NOT NULL auto_increment, - gene_id INT UNSIGNED NOT NULL, # foreign key gene:gene_id - seq_region_id int(10) unsigned NOT NULL, - seq_region_start int(10) unsigned NOT NULL, - seq_region_end int(10) unsigned NOT NULL, - seq_region_strand tinyint(2) NOT NULL, - display_xref_id int unsigned, + transcript_id INT UNSIGNED NOT NULL AUTO_INCREMENT, + gene_id INT UNSIGNED NOT NULL, + seq_region_id INT(10) UNSIGNED NOT NULL, + seq_region_start INT(10) UNSIGNED NOT NULL, + seq_region_end INT(10) UNSIGNED NOT NULL, + seq_region_strand TINYINT(2) NOT NULL, + display_xref_id INT UNSIGNED, biotype VARCHAR(40) NOT NULL, - status enum( 'KNOWN', 'NOVEL', 'PUTATIVE', 'PREDICTED' ), - description text, + status ENUM('KNOWN', 'NOVEL', 'PUTATIVE', 'PREDICTED'), + description TEXT, PRIMARY KEY (transcript_id), - KEY seq_region_idx( seq_region_id, seq_region_start ), + KEY seq_region_idx (seq_region_id, seq_region_start), KEY gene_index (gene_id), - KEY xref_id_index ( display_xref_id ) + KEY xref_id_index (display_xref_id) + +) COLLATE=latin1_swedish_ci TYPE=MyISAM; -) COLLATE=latin1_swedish_ci; ################################################################################ # @@ -445,16 +506,17 @@ CREATE TABLE transcript ( CREATE TABLE transcript_stable_id ( - transcript_id int unsigned not null, # foreign key transcript:transcript_id - stable_id VARCHAR(128) not null, - version int(10), + transcript_id INT UNSIGNED NOT NULL, + stable_id VARCHAR(128) NOT NULL, + version INT(10), created_date DATETIME NOT NULL, modified_date DATETIME NOT NULL, - PRIMARY KEY( transcript_id ), - UNIQUE( stable_id, version ) + PRIMARY KEY (transcript_id), + UNIQUE (stable_id, version) + +) COLLATE=latin1_swedish_ci TYPE=MyISAM; -) COLLATE=latin1_swedish_ci; ################################################################################ # @@ -466,16 +528,17 @@ CREATE TABLE transcript_stable_id ( CREATE TABLE translation ( - translation_id INT UNSIGNED NOT NULL auto_increment, + translation_id INT UNSIGNED NOT NULL AUTO_INCREMENT, transcript_id INT UNSIGNED NOT NULL, seq_start INT(10) NOT NULL, # relative to exon start - start_exon_id INT UNSIGNED NOT NULL, # foreign key exon:exon_id + start_exon_id INT UNSIGNED NOT NULL, seq_end INT(10) NOT NULL, # relative to exon start - end_exon_id INT UNSIGNED NOT NULL, # foreign key exon:exon_id + end_exon_id INT UNSIGNED NOT NULL, PRIMARY KEY (translation_id), KEY (transcript_id) -) COLLATE=latin1_swedish_ci; + +) COLLATE=latin1_swedish_ci TYPE=MyISAM; ################################################################################ @@ -484,16 +547,16 @@ CREATE TABLE translation ( # CREATE TABLE translation_stable_id ( - translation_id INT unsigned NOT NULL, # foreign key translation:translation_id + translation_id INT UNSIGNED NOT NULL, stable_id VARCHAR(128) NOT NULL, version INT(10), created_date DATETIME NOT NULL, modified_date DATETIME NOT NULL, - PRIMARY KEY( translation_id ), - UNIQUE( stable_id, version ) + PRIMARY KEY (translation_id), + UNIQUE (stable_id, version) -) COLLATE=latin1_swedish_ci; +) COLLATE=latin1_swedish_ci TYPE=MyISAM; ################################################################################ @@ -501,32 +564,34 @@ CREATE TABLE translation_stable_id ( # Table structure for table 'assembly' # # This is a denormalised golden path. -# The data in this table defines the "static golden path", i.e. the -# best effort draft full genome sequence as determined by the UCSC or NCBI -# (depending which assembly you are using) -# -# Each row represents a component, e.g. a contig, (comp_seq_region_id, -# FK from seq_region table) at least part of which is present in the golden path. -# The part of the component that is in the path is delimited by fields cmp_start -# and cmp_end (start < end), and the absolute position within the golden path -# chromosome (or other appropriate assembled structure) (asm_seq_region_id) is -# given by asm_start and asm_end. -# +# +# The data in this table defines the "static golden path", i.e. the best effort +# draft full genome sequence as determined by the UCSC or NCBI (depending which +# assembly you are using). +# +# Each row represents a component, e.g. a contig, (comp_seq_region_id, FK from +# seq_region table) at least part of which is present in the golden path. +# +# The part of the component that is in the path is delimited by fields +# cmp_start and cmp_end (start < end), and the absolute position within the +# golden path chromosome (or other appropriate assembled structure) +# (asm_seq_region_id) is given by asm_start and asm_end. CREATE TABLE assembly ( - asm_seq_region_id int unsigned NOT NULL, - cmp_seq_region_id int(10) unsigned NOT NULL, - asm_start int(10) NOT NULL, - asm_end int(10) NOT NULL, - cmp_start int(10) NOT NULL, - cmp_end int(10) NOT NULL, - ori tinyint NOT NULL, + asm_seq_region_id INT UNSIGNED NOT NULL, + cmp_seq_region_id INT(10) UNSIGNED NOT NULL, + asm_start INT(10) NOT NULL, + asm_end INT(10) NOT NULL, + cmp_start INT(10) NOT NULL, + cmp_end INT(10) NOT NULL, + ori TINYINT NOT NULL, - KEY(cmp_seq_region_id), - KEY(asm_seq_region_id, asm_start) + KEY (cmp_seq_region_id), + KEY (asm_seq_region_id, asm_start) + +) COLLATE=latin1_swedish_ci TYPE=MyISAM; -) COLLATE=latin1_swedish_ci; ################################################################################ # @@ -535,24 +600,25 @@ CREATE TABLE assembly ( CREATE TABLE protein_feature ( - protein_feature_id int(10) unsigned NOT NULL auto_increment, - translation_id int NOT NULL, - seq_start int(10) NOT NULL, - seq_end int(10) NOT NULL, - hit_start int(10) NOT NULL, - hit_end int(10) NOT NULL, - hit_id varchar(40) NOT NULL, - analysis_id int(10) unsigned NOT NULL, - score double NOT NULL, - evalue double, - perc_ident float, - - PRIMARY KEY (protein_feature_id), + protein_feature_id INT(10) UNSIGNED NOT NULL AUTO_INCREMENT, + translation_id INT NOT NULL, + seq_start INT(10) NOT NULL, + seq_end INT(10) NOT NULL, + hit_start INT(10) NOT NULL, + hit_end INT(10) NOT NULL, + hit_id VARCHAR(40) NOT NULL, + analysis_id INT(10) UNSIGNED NOT NULL, + score DOUBLE NOT NULL, + evalue DOUBLE, + perc_ident FLOAT, + + PRIMARY KEY (protein_feature_id), KEY (translation_id), - KEY hid_index ( hit_id ), - KEY analysis_idx( analysis_id ) + KEY hid_index (hit_id), + KEY analysis_idx (analysis_id) + +) COLLATE=latin1_swedish_ci TYPE=MyISAM; -) COLLATE=latin1_swedish_ci; ################################################################################ # @@ -561,13 +627,13 @@ CREATE TABLE protein_feature ( CREATE TABLE interpro ( - interpro_ac varchar(40) NOT NULL, - id varchar(40) NOT NULL, + interpro_ac VARCHAR(40) NOT NULL, + id VARCHAR(40) NOT NULL, UNIQUE (interpro_ac, id), KEY (id) -) COLLATE=latin1_swedish_ci; +) COLLATE=latin1_swedish_ci TYPE=MyISAM; ################################################################################ @@ -576,17 +642,18 @@ CREATE TABLE interpro ( # CREATE TABLE karyotype ( - karyotype_id int unsigned NOT NULL auto_increment, - seq_region_id int unsigned NOT NULL, - seq_region_start int(10) NOT NULL, - seq_region_end int(10) NOT NULL, - band varchar(40) NOT NULL, - stain varchar(40) NOT NULL, + karyotype_id INT UNSIGNED NOT NULL AUTO_INCREMENT, + seq_region_id INT UNSIGNED NOT NULL, + seq_region_start INT(10) NOT NULL, + seq_region_end INT(10) NOT NULL, + band VARCHAR(40) NOT NULL, + stain VARCHAR(40) NOT NULL, PRIMARY KEY (karyotype_id), KEY region_band_idx (seq_region_id,band) -) COLLATE=latin1_swedish_ci; +) COLLATE=latin1_swedish_ci TYPE=MyISAM; + ################################################################################ # @@ -595,16 +662,20 @@ CREATE TABLE karyotype ( CREATE TABLE object_xref ( - object_xref_id INT not null auto_increment, - ensembl_id int unsigned not null, - ensembl_object_type ENUM( 'RawContig', 'Transcript', 'Gene', 'Translation', 'regulatory_factor', 'regulatory_feature' ) not null, - xref_id INT unsigned not null, + object_xref_id INT NOT NULL AUTO_INCREMENT, + ensembl_id INT UNSIGNED NOT NULL, + ensembl_object_type ENUM('RawContig', 'Transcript', 'Gene', + 'Translation', 'regulatory_factor', + 'regulatory_feature') + NOT NULL, + xref_id INT UNSIGNED NOT NULL, + + UNIQUE (ensembl_object_type, ensembl_id, xref_id), + KEY oxref_idx (object_xref_id, xref_id, ensembl_object_type, ensembl_id), + KEY xref_idx (xref_id, ensembl_object_type) - UNIQUE ( ensembl_object_type, ensembl_id, xref_id ), - KEY oxref_idx( object_xref_id, xref_id, ensembl_object_type, ensembl_id ), - KEY xref_idx(xref_id, ensembl_object_type) +) COLLATE=latin1_swedish_ci TYPE=MyISAM; -) COLLATE=latin1_swedish_ci; ################################################################################ # @@ -612,23 +683,26 @@ CREATE TABLE object_xref ( # CREATE TABLE identity_xref( - object_xref_id INT unsigned not null , - query_identity int(5), - target_identity int(5), - - hit_start int, - hit_end int, - translation_start int, - translation_end int, - cigar_line text, - - score double, - evalue double, - analysis_id int, + + object_xref_id INT UNSIGNED NOT NULL , + query_identity INT(5), + target_identity INT(5), + + hit_start INT, + hit_end INT, + translation_start INT, + translation_end INT, + cigar_line TEXT, + + score DOUBLE, + evalue DOUBLE, + analysis_id INT, PRIMARY KEY (object_xref_id), - KEY analysis_idx( analysis_id ) -) COLLATE=latin1_swedish_ci; + KEY analysis_idx (analysis_id) + +) COLLATE=latin1_swedish_ci TYPE=MyISAM; + ################################################################################ # @@ -637,13 +711,15 @@ CREATE TABLE identity_xref( CREATE TABLE go_xref ( - object_xref_id int(10) unsigned DEFAULT '0' NOT NULL, - linkage_type enum('IC', 'IDA', 'IEA', 'IEP', 'IGI', 'IMP', - 'IPI', 'ISS', 'NAS', 'ND', 'TAS', 'NR', 'RCA') NOT NULL, + object_xref_id INT(10) UNSIGNED DEFAULT '0' NOT NULL, + linkage_type ENUM('IC', 'IDA', 'IEA', 'IEP', 'IGI', 'IMP', + 'IPI', 'ISS', 'NAS', 'ND', 'TAS', 'NR', 'RCA') + NOT NULL, KEY (object_xref_id), - UNIQUE(object_xref_id, linkage_type) + UNIQUE (object_xref_id, linkage_type) + +) COLLATE=latin1_swedish_ci TYPE=MyISAM; -) COLLATE=latin1_swedish_ci; ################################################################################ # @@ -652,21 +728,22 @@ CREATE TABLE go_xref ( CREATE TABLE xref ( - xref_id INT unsigned not null auto_increment, - external_db_id int not null, - dbprimary_acc VARCHAR(40) not null, - display_label VARCHAR(128) not null, + xref_id INT UNSIGNED NOT NULL AUTO_INCREMENT, + external_db_id INT NOT NULL, + dbprimary_acc VARCHAR(40) NOT NULL, + display_label VARCHAR(128) NOT NULL, version VARCHAR(10) DEFAULT '' NOT NULL, description VARCHAR(255), info_type ENUM('PROJECTION', 'MISC'), info_text VARCHAR(255), - PRIMARY KEY( xref_id ), - UNIQUE KEY id_index( dbprimary_acc, external_db_id ), - KEY display_index ( display_label ), - KEY info_type_idx ( info_type ) + PRIMARY KEY (xref_id), + UNIQUE KEY id_index (dbprimary_acc, external_db_id), + KEY display_index (display_label), + KEY info_type_idx (info_type) + +) COLLATE=latin1_swedish_ci TYPE=MyISAM; -) COLLATE=latin1_swedish_ci; ################################################################################ # @@ -675,12 +752,14 @@ CREATE TABLE xref ( CREATE TABLE external_synonym ( - xref_id INT unsigned not null, - synonym VARCHAR(40) not null, - PRIMARY KEY( xref_id, synonym ), - KEY name_index( synonym ) + xref_id INT UNSIGNED NOT NULL, + synonym VARCHAR(40) NOT NULL, + + PRIMARY KEY (xref_id, synonym), + KEY name_index (synonym) + +) COLLATE=latin1_swedish_ci TYPE=MyISAM; -) COLLATE=latin1_swedish_ci; ################################################################################ # @@ -689,56 +768,67 @@ CREATE TABLE external_synonym ( CREATE TABLE external_db ( - external_db_id INT not null, + external_db_id INT NOT NULL, db_name VARCHAR(27) NOT NULL, db_release VARCHAR(40) NOT NULL, - status ENUM ('KNOWNXREF','KNOWN','XREF','PRED','ORTH', 'PSEUDO') not null, - + status ENUM('KNOWNXREF','KNOWN','XREF','PRED','ORTH', + 'PSEUDO') + NOT NULL, dbprimary_acc_linkable BOOLEAN DEFAULT 1 NOT NULL, display_label_linkable BOOLEAN DEFAULT 0 NOT NULL, - priority INT NOT NULL, - db_display_name VARCHAR(255), - PRIMARY KEY( external_db_id ) + PRIMARY KEY (external_db_id) -) COLLATE=latin1_swedish_ci; +) COLLATE=latin1_swedish_ci TYPE=MyISAM; +################################################################################ +# +# Table structure for table 'prediction_exon' +# CREATE TABLE prediction_exon ( - prediction_exon_id int unsigned not null auto_increment, - prediction_transcript_id int unsigned not null, - exon_rank smallint unsigned not null, - seq_region_id int unsigned not null, - seq_region_start int unsigned not null, - seq_region_end int unsigned not null, - seq_region_strand tinyint not null, - start_phase tinyint not null, - score double, - p_value double, - - PRIMARY KEY( prediction_exon_id ), - KEY (prediction_transcript_id), - KEY ( seq_region_id, seq_region_start ) -) COLLATE=latin1_swedish_ci; + + prediction_exon_id INT UNSIGNED NOT NULL AUTO_INCREMENT, + prediction_transcript_id INT UNSIGNED NOT NULL, + exon_rank SMALLINT UNSIGNED NOT NULL, + seq_region_id INT UNSIGNED NOT NULL, + seq_region_start INT UNSIGNED NOT NULL, + seq_region_end INT UNSIGNED NOT NULL, + seq_region_strand TINYINT NOT NULL, + start_phase TINYINT NOT NULL, + score DOUBLE, + p_value DOUBLE, + + PRIMARY KEY (prediction_exon_id), + KEY (prediction_transcript_id), + KEY (seq_region_id, seq_region_start) + +) COLLATE=latin1_swedish_ci TYPE=MyISAM; +################################################################################ +# +# Table structure for table 'prediction_transcript' +# + CREATE TABLE prediction_transcript ( - prediction_transcript_id int unsigned not null auto_increment, - seq_region_id int unsigned not null, - seq_region_start int unsigned not null, - seq_region_end int unsigned not null, - seq_region_strand tinyint not null, - analysis_id int, - display_label varchar(255), - - PRIMARY KEY( prediction_transcript_id ), - KEY ( seq_region_id, seq_region_start ), - KEY analysis_idx( analysis_id ) -) COLLATE=latin1_swedish_ci; + prediction_transcript_id INT UNSIGNED NOT NULL AUTO_INCREMENT, + seq_region_id INT UNSIGNED NOT NULL, + seq_region_start INT UNSIGNED NOT NULL, + seq_region_end INT UNSIGNED NOT NULL, + seq_region_strand TINYINT NOT NULL, + analysis_id INT, + display_label VARCHAR(255), + + PRIMARY KEY (prediction_transcript_id), + KEY (seq_region_id, seq_region_start), + KEY analysis_idx (analysis_id) + +) COLLATE=latin1_swedish_ci TYPE=MyISAM; ################################################################################ @@ -748,36 +838,37 @@ CREATE TABLE prediction_transcript ( CREATE TABLE meta ( - meta_id INT not null auto_increment, - meta_key varchar( 40 ) not null, - meta_value varchar( 255 ) not null, + meta_id INT NOT NULL AUTO_INCREMENT, + meta_key VARCHAR(40) NOT NULL, + meta_value VARCHAR(255) NOT NULL, - PRIMARY KEY( meta_id ), - KEY meta_key_index ( meta_key ), - KEY meta_value_index ( meta_value ) + PRIMARY KEY (meta_id), + KEY meta_key_index (meta_key), + KEY meta_value_index (meta_value) -) COLLATE=latin1_swedish_ci; +) COLLATE=latin1_swedish_ci TYPE=MyISAM; -# Auto add schema version to database +# Auto add schema version to database INSERT INTO meta (meta_key, meta_value) VALUES ("schema_version", "38"); + ################################################################################ # # Table structure for table 'marker_synonym' CREATE TABLE marker_synonym ( - marker_synonym_id int unsigned not null auto_increment, - marker_id int unsigned not null, # foreign key marker:marker_id - source varchar(20), - name varchar(30), + marker_synonym_id INT UNSIGNED NOT NULL AUTO_INCREMENT, + marker_id INT UNSIGNED NOT NULL, + source VARCHAR(20), + name VARCHAR(30), PRIMARY KEY (marker_synonym_id), KEY marker_synonym_idx (marker_synonym_id, name), KEY marker_idx (marker_id) -) COLLATE=latin1_swedish_ci; +) COLLATE=latin1_swedish_ci TYPE=MyISAM; ################################################################################ @@ -786,19 +877,20 @@ CREATE TABLE marker_synonym ( CREATE TABLE marker ( - marker_id int unsigned not null auto_increment, - display_marker_synonym_id int unsigned, #foreign key marker_synonym:marker_synonym_id - left_primer varchar(100) not null, - right_primer varchar(100) not null, - min_primer_dist int(10) unsigned not null, - max_primer_dist int(10) unsigned not null, - priority int, - type enum('est', 'microsatellite'), + marker_id INT UNSIGNED NOT NULL AUTO_INCREMENT, + display_marker_synonym_id INT UNSIGNED, + left_primer VARCHAR(100) NOT NULL, + right_primer VARCHAR(100) NOT NULL, + min_primer_dist INT(10) UNSIGNED NOT NULL, + max_primer_dist INT(10) UNSIGNED NOT NULL, + priority INT, + type ENUM('est', 'microsatellite'), PRIMARY KEY (marker_id), KEY marker_idx (marker_id, priority) -) COLLATE=latin1_swedish_ci; +) COLLATE=latin1_swedish_ci TYPE=MyISAM; + ################################################################################ # @@ -806,37 +898,39 @@ CREATE TABLE marker ( CREATE TABLE marker_feature ( - marker_feature_id int unsigned not null auto_increment, - marker_id int unsigned not null, #foreign key marker:marker_id - seq_region_id int(10) unsigned NOT NULL, #foreign key contig:seq_region_id - seq_region_start int(10) unsigned NOT NULL, - seq_region_end int(10) unsigned NOT NULL, - analysis_id int(10) unsigned NOT NULL, #foreign key analysis:analysis_id - map_weight int(10) unsigned, + marker_feature_id INT UNSIGNED NOT NULL AUTO_INCREMENT, + marker_id INT UNSIGNED NOT NULL, + seq_region_id INT(10) UNSIGNED NOT NULL, + seq_region_start INT(10) UNSIGNED NOT NULL, + seq_region_end INT(10) UNSIGNED NOT NULL, + analysis_id INT(10) UNSIGNED NOT NULL, + map_weight INT(10) UNSIGNED, PRIMARY KEY (marker_feature_id), - KEY seq_region_idx (seq_region_id, seq_region_start ), - KEY analysis_idx( analysis_id ) + KEY seq_region_idx (seq_region_id, seq_region_start), + KEY analysis_idx (analysis_id) -) COLLATE=latin1_swedish_ci; +) COLLATE=latin1_swedish_ci TYPE=MyISAM; + ################################################################################ # # Table structure for table 'marker_map_location' CREATE TABLE marker_map_location ( - marker_id int unsigned not null, #foreign key marker:marker_id - map_id int unsigned not null, #foreign key map:map_id - chromosome_name varchar(15) not null, - marker_synonym_id int unsigned not null, #foreign key marker_synonym:marker_synonym_id - position varchar(15) not null, - lod_score double, + marker_id INT UNSIGNED NOT NULL, + map_id INT UNSIGNED NOT NULL, + chromosome_name VARCHAR(15) NOT NULL, + marker_synonym_id INT UNSIGNED NOT NULL, + position VARCHAR(15) NOT NULL, + lod_score DOUBLE, PRIMARY KEY (marker_id, map_id), - KEY map_idx( map_id, chromosome_name, position) + KEY map_idx (map_id, chromosome_name, position) + +) COLLATE=latin1_swedish_ci TYPE=MyISAM; -) COLLATE=latin1_swedish_ci; ################################################################################ # @@ -844,11 +938,13 @@ CREATE TABLE marker_map_location ( CREATE TABLE map ( - map_id int unsigned not null auto_increment, - map_name varchar(30) not null, + map_id INT UNSIGNED NOT NULL AUTO_INCREMENT, + map_name VARCHAR(30) NOT NULL, PRIMARY KEY (map_id) -) COLLATE=latin1_swedish_ci; + +) COLLATE=latin1_swedish_ci TYPE=MyISAM; + ################################################################################ # @@ -857,14 +953,14 @@ CREATE TABLE map ( CREATE TABLE misc_feature ( - misc_feature_id int(10) unsigned NOT NULL auto_increment, - seq_region_id int(10) unsigned NOT NULL default '0', - seq_region_start int(10) unsigned NOT NULL default '0', - seq_region_end int(10) unsigned NOT NULL default '0', - seq_region_strand tinyint(4) NOT NULL default '0', + misc_feature_id INT(10) UNSIGNED NOT NULL AUTO_INCREMENT, + seq_region_id INT(10) UNSIGNED NOT NULL default '0', + seq_region_start INT(10) UNSIGNED NOT NULL default '0', + seq_region_end INT(10) UNSIGNED NOT NULL default '0', + seq_region_strand TINYINT(4) NOT NULL default '0', PRIMARY KEY (misc_feature_id), - KEY seq_region_idx( seq_region_id, seq_region_start ) + KEY seq_region_idx (seq_region_id, seq_region_start) ) COLLATE=latin1_swedish_ci TYPE=MyISAM; @@ -875,26 +971,31 @@ CREATE TABLE misc_feature ( # CREATE TABLE misc_attrib ( - misc_feature_id int(10) unsigned NOT NULL default '0', - attrib_type_id smallint(5) unsigned NOT NULL default '0', - value varchar(255) NOT NULL default '', - KEY type_val_idx( attrib_type_id, value ), - KEY misc_feature_idx( misc_feature_id ) + misc_feature_id INT(10) UNSIGNED NOT NULL default '0', + attrib_type_id SMALLINT(5) UNSIGNED NOT NULL default '0', + value VARCHAR(255) NOT NULL default '', + + KEY type_val_idx (attrib_type_id, value), + KEY misc_feature_idx (misc_feature_id) + ) COLLATE=latin1_swedish_ci TYPE=MyISAM; + ################################################################################ # # Table structure for table 'translation_attrib' # CREATE TABLE translation_attrib ( - translation_id int(10) unsigned NOT NULL default '0', - attrib_type_id smallint(5) unsigned NOT NULL default '0', - value varchar(255) NOT NULL default '', - KEY type_val_idx( attrib_type_id, value ), - KEY translation_idx( translation_id ) + translation_id INT(10) UNSIGNED NOT NULL default '0', + attrib_type_id SMALLINT(5) UNSIGNED NOT NULL default '0', + value VARCHAR(255) NOT NULL default '', + + KEY type_val_idx (attrib_type_id, value), + KEY translation_idx (translation_id) + ) COLLATE=latin1_swedish_ci TYPE=MyISAM; ################################################################################ @@ -903,26 +1004,31 @@ CREATE TABLE translation_attrib ( # CREATE TABLE transcript_attrib ( - transcript_id int(10) unsigned NOT NULL default '0', - attrib_type_id smallint(5) unsigned NOT NULL default '0', - value varchar(255) NOT NULL default '', - KEY type_val_idx( attrib_type_id, value ), - KEY transcript_idx( transcript_id ) + transcript_id INT(10) UNSIGNED NOT NULL default '0', + attrib_type_id SMALLINT(5) UNSIGNED NOT NULL default '0', + value VARCHAR(255) NOT NULL default '', + + KEY type_val_idx (attrib_type_id, value), + KEY transcript_idx (transcript_id) + ) COLLATE=latin1_swedish_ci TYPE=MyISAM; + ################################################################################ # # Table structure for table 'gene_attrib' # CREATE TABLE gene_attrib ( - gene_id int(10) unsigned NOT NULL default '0', - attrib_type_id smallint(5) unsigned NOT NULL default '0', - value varchar(255) NOT NULL default '', - KEY type_val_idx( attrib_type_id, value ), - KEY gene_idx( gene_id ) + gene_id INT(10) UNSIGNED NOT NULL default '0', + attrib_type_id SMALLINT(5) UNSIGNED NOT NULL default '0', + value VARCHAR(255) NOT NULL default '', + + KEY type_val_idx (attrib_type_id, value), + KEY gene_idx (gene_id) + ) COLLATE=latin1_swedish_ci TYPE=MyISAM; @@ -932,11 +1038,12 @@ CREATE TABLE gene_attrib ( # CREATE TABLE seq_region_attrib ( - seq_region_id int(10) unsigned NOT NULL default '0', - attrib_type_id smallint(5) unsigned NOT NULL default '0', - value varchar(255) NOT NULL default '', - KEY type_val_idx( attrib_type_id, value), + seq_region_id INT(10) UNSIGNED NOT NULL default '0', + attrib_type_id SMALLINT(5) UNSIGNED NOT NULL default '0', + value VARCHAR(255) NOT NULL default '', + + KEY type_val_idx (attrib_type_id, value), KEY seq_region_idx (seq_region_id) ) COLLATE=latin1_swedish_ci TYPE=MyISAM; @@ -949,16 +1056,17 @@ CREATE TABLE seq_region_attrib ( CREATE TABLE attrib_type ( - attrib_type_id smallint(5) unsigned NOT NULL auto_increment, - code varchar(15) NOT NULL default '', - name varchar(255) NOT NULL default '', - description text, + attrib_type_id SMALLINT(5) UNSIGNED NOT NULL AUTO_INCREMENT, + code VARCHAR(15) NOT NULL default '', + name VARCHAR(255) NOT NULL default '', + description TEXT, - PRIMARY KEY ( attrib_type_id), - UNIQUE KEY c(code) + PRIMARY KEY (attrib_type_id), + UNIQUE KEY c (code) ) COLLATE=latin1_swedish_ci TYPE=MyISAM; + ################################################################################ # # Table structure for table 'misc_set' @@ -966,17 +1074,18 @@ CREATE TABLE attrib_type ( CREATE TABLE misc_set ( - misc_set_id smallint(5) unsigned NOT NULL auto_increment, - code varchar(25) NOT NULL default '', - name varchar(255) NOT NULL default '', - description text NOT NULL, - max_length int unsigned not null, + misc_set_id SMALLINT(5) UNSIGNED NOT NULL AUTO_INCREMENT, + code VARCHAR(25) NOT NULL default '', + name VARCHAR(255) NOT NULL default '', + description TEXT NOT NULL, + max_length INT UNSIGNED NOT NULL, PRIMARY KEY (misc_set_id), - UNIQUE KEY c(code) + UNIQUE KEY c (code) ) COLLATE=latin1_swedish_ci TYPE=MyISAM; + ################################################################################ # # Table structure for table 'misc_feature_misc_set' @@ -984,19 +1093,14 @@ CREATE TABLE misc_set ( CREATE TABLE misc_feature_misc_set ( - misc_feature_id int(10) unsigned NOT NULL default '0', - misc_set_id smallint(5) unsigned NOT NULL default '0', + misc_feature_id INT(10) UNSIGNED NOT NULL default '0', + misc_set_id SMALLINT(5) UNSIGNED NOT NULL default '0', - PRIMARY KEY ( misc_feature_id, misc_set_id ), - KEY reverse_idx( misc_set_id, misc_feature_id ) + PRIMARY KEY (misc_feature_id, misc_set_id), + KEY reverse_idx (misc_set_id, misc_feature_id) ) COLLATE=latin1_swedish_ci TYPE=MyISAM; -################################################################################ -# -# Tables for QTLs -# -################################################################################ ################################################################################ # @@ -1005,17 +1109,18 @@ CREATE TABLE misc_feature_misc_set ( CREATE TABLE qtl ( - qtl_id int unsigned auto_increment not null, - trait varchar(255) not null, - lod_score float, - flank_marker_id_1 int, - flank_marker_id_2 int, - peak_marker_id int, + qtl_id INT UNSIGNED AUTO_INCREMENT NOT NULL, + trait VARCHAR(255) NOT NULL, + lod_score FLOAT, + flank_marker_id_1 INT, + flank_marker_id_2 INT, + peak_marker_id INT, + + PRIMARY KEY (qtl_id), + KEY trait_idx (trait) - PRIMARY KEY ( qtl_id ), - KEY trait_idx( trait ) +) COLLATE=latin1_swedish_ci TYPE=MyISAM; -) COLLATE=latin1_swedish_ci; ################################################################################ # @@ -1024,15 +1129,16 @@ CREATE TABLE qtl ( CREATE TABLE qtl_synonym ( - qtl_synonym_id int unsigned auto_increment not null, - qtl_id int unsigned not null, - source_database enum("rat genome database", "ratmap") not null, - source_primary_id varchar(255) not null, + qtl_synonym_id INT UNSIGNED AUTO_INCREMENT NOT NULL, + qtl_id INT UNSIGNED NOT NULL, + source_database ENUM("rat genome database", "ratmap") NOT NULL, + source_primary_id VARCHAR(255) NOT NULL, PRIMARY KEY (qtl_synonym_id), - KEY qtl_idx(qtl_id) + KEY qtl_idx (qtl_id) + +) COLLATE=latin1_swedish_ci TYPE=MyISAM; -) COLLATE=latin1_swedish_ci; ################################################################################ # @@ -1041,22 +1147,18 @@ CREATE TABLE qtl_synonym ( CREATE TABLE qtl_feature ( - seq_region_id int not null, - seq_region_start int not null, - seq_region_end int not null, - qtl_id int not null, - analysis_id int not null, + seq_region_id INT NOT NULL, + seq_region_start INT NOT NULL, + seq_region_end INT NOT NULL, + qtl_id INT NOT NULL, + analysis_id INT NOT NULL, + + KEY (qtl_id), + KEY loc_idx (seq_region_id, seq_region_start), + KEY analysis_idx (analysis_id) - KEY( qtl_id ), - KEY loc_idx( seq_region_id, seq_region_start ), - KEY analysis_idx( analysis_id ) -) COLLATE=latin1_swedish_ci; +) COLLATE=latin1_swedish_ci TYPE=MyISAM; -################################################################################ -# -# Tables for stable ID mapping tracking -# -################################################################################ ################################################################################ # @@ -1065,15 +1167,20 @@ CREATE TABLE qtl_feature ( CREATE TABLE mapping_session ( - mapping_session_id int(11) NOT NULL auto_increment, - old_db_name varchar(80) NOT NULL default '', - new_db_name varchar(80) NOT NULL default '', - created timestamp(14) NOT NULL, + mapping_session_id INT(11) NOT NULL AUTO_INCREMENT, + old_db_name VARCHAR(80) NOT NULL default '', + new_db_name VARCHAR(80) NOT NULL default '', + old_release VARCHAR(5) NOT NULL default '', + new_release VARCHAR(5) NOT NULL default '', + old_assembly VARCHAR(20) NOT NULL default '', + new_assembly VARCHAR(20) NOT NULL default '', + created_date DATETIME NOT NULL, - PRIMARY KEY (mapping_session_id) + PRIMARY KEY (mapping_session_id) ) COLLATE=latin1_swedish_ci TYPE=MyISAM; + ################################################################################ # # Table structure for table 'stable_id_event' @@ -1081,20 +1188,22 @@ CREATE TABLE mapping_session ( CREATE TABLE stable_id_event ( - old_stable_id varchar(128), - old_version smallint, - new_stable_id varchar(128), - new_version smallint, - mapping_session_id int(11) NOT NULL default '0', - type ENUM('gene', 'transcript', 'translation') NOT NULL, + old_stable_id VARCHAR(128), + old_version SMALLINT, + new_stable_id VARCHAR(128), + new_version SMALLINT, + mapping_session_id INT(11) NOT NULL default '0', + type ENUM('gene', 'transcript', 'translation') NOT NULL, - UNIQUE KEY uni_idx (mapping_session_id, old_stable_id, old_version, new_stable_id, new_version, type), + UNIQUE KEY uni_idx (mapping_session_id, old_stable_id, old_version, + new_stable_id, new_version, type), KEY new_idx (new_stable_id), KEY old_idx (old_stable_id) ) COLLATE=latin1_swedish_ci TYPE=MyISAM; + ################################################################################ # # Table structure for table 'gene_archive' @@ -1103,20 +1212,21 @@ CREATE TABLE stable_id_event ( CREATE TABLE gene_archive ( gene_stable_id VARCHAR(128) NOT NULL, - gene_version smallint NOT NULL, + gene_version SMALLINT NOT NULL, transcript_stable_id VARCHAR(128) NOT NULL, - transcript_version smallint NOT NULL, + transcript_version SMALLINT NOT NULL, translation_stable_id VARCHAR(128) NOT NULL, - translation_version smallint NOT NULL, - peptide_archive_id int NOT NULL, - mapping_session_id int NOT NULL, + translation_version SMALLINT NOT NULL, + peptide_archive_id INT NOT NULL, + mapping_session_id INT NOT NULL, - KEY gene_idx( gene_stable_id, gene_version ), - KEY transcript_idx( transcript_stable_id, transcript_version ), - KEY translation_idx( translation_stable_id, translation_version ) + KEY gene_idx (gene_stable_id, gene_version), + KEY transcript_idx (transcript_stable_id, transcript_version), + KEY translation_idx (translation_stable_id, translation_version) ) COLLATE=latin1_swedish_ci TYPE=MyISAM; + ################################################################################ # # Table structure for table 'peptide_archive' @@ -1126,13 +1236,14 @@ CREATE TABLE peptide_archive ( peptide_archive_id INT NOT NULL AUTO_INCREMENT, md5_checksum char(32), - peptide_seq mediumtext NOT NULL, + peptide_seq MEDIUMTEXT NOT NULL, - PRIMARY KEY( peptide_archive_id ), - KEY checksum( md5_checksum ) + PRIMARY KEY (peptide_archive_id), + KEY checksum (md5_checksum) ) COLLATE=latin1_swedish_ci TYPE=MyISAM; + ################################################################################ # # Table structure for table 'seq_region' @@ -1145,9 +1256,9 @@ CREATE TABLE seq_region ( coord_system_id INT(10) NOT NULL, length INT(10) NOT NULL, - UNIQUE(coord_system_id, name), + UNIQUE (coord_system_id, name), PRIMARY KEY (seq_region_id), - KEY name_idx(name) + KEY name_idx (name) ) COLLATE=latin1_swedish_ci TYPE=MyISAM; @@ -1170,12 +1281,12 @@ CREATE TABLE assembly_exception ( ori INT NOT NULL, PRIMARY KEY (assembly_exception_id), - KEY sr_idx (seq_region_id, seq_region_start), KEY ex_idx (exc_seq_region_id, exc_seq_region_start) ) COLLATE=latin1_swedish_ci TYPE=MyISAM; + ################################################################################ # # Table structure for table 'coord_system' @@ -1183,18 +1294,19 @@ CREATE TABLE assembly_exception ( CREATE TABLE coord_system ( - coord_system_id INT NOT NULL auto_increment, + coord_system_id INT NOT NULL AUTO_INCREMENT, name VARCHAR(40) NOT NULL, version VARCHAR(40), rank INT NOT NULL, - attrib SET ('default_version', 'sequence_level'), + attrib SET('default_version', 'sequence_level'), - UNIQUE(name, version), - UNIQUE(rank), + UNIQUE (name, version), + UNIQUE (rank), PRIMARY KEY (coord_system_id) ) COLLATE=latin1_swedish_ci TYPE=MyISAM; + ################################################################################ # # Table structure for table 'meta_coord' @@ -1211,34 +1323,46 @@ CREATE TABLE meta_coord ( ) COLLATE=latin1_swedish_ci TYPE=MyISAM; +################################################################################ +# +# Table structure for table 'density_feature' +# CREATE TABLE density_feature ( - density_feature_id INT NOT NULL auto_increment, - density_type_id INT NOT NULL, #FK refs density_type - seq_region_id INT NOT NULL, #FK refs seq_region + + density_feature_id INT NOT NULL AUTO_INCREMENT, + density_type_id INT NOT NULL, + seq_region_id INT NOT NULL, seq_region_start INT NOT NULL, seq_region_end INT NOT NULL, density_value FLOAT NOT NULL, - PRIMARY KEY(density_feature_id), + PRIMARY KEY (density_feature_id), KEY seq_region_idx (density_type_id, seq_region_id, seq_region_start), KEY seq_region_id_idx (seq_region_id) ) COLLATE=latin1_swedish_ci TYPE=MyISAM; +################################################################################ +# +# Table structure for table 'density_type' +# CREATE TABLE density_type ( - density_type_id INT NOT NULL auto_increment, - analysis_id INT NOT NULL, #FK refs analysis + + density_type_id INT NOT NULL AUTO_INCREMENT, + analysis_id INT NOT NULL, block_size INT NOT NULL, region_features INT NOT NULL, value_type ENUM('sum','ratio') NOT NULL, - PRIMARY KEY(density_type_id), - UNIQUE(analysis_id, block_size, region_features) + + PRIMARY KEY (density_type_id), + UNIQUE (analysis_id, block_size, region_features) ) COLLATE=latin1_swedish_ci TYPE=MyISAM; + ################################################################################ # # Table structure for table 'regulatory_feature' @@ -1247,22 +1371,22 @@ CREATE TABLE density_type ( CREATE TABLE regulatory_feature ( - regulatory_feature_id INT NOT NULL auto_increment, - name VARCHAR(255) NOT NULL, - seq_region_id INT NOT NULL, # FK refs seq_region - seq_region_start INT NOT NULL, - seq_region_end INT NOT NULL, - seq_region_strand TINYINT NOT NULL, - analysis_id INT NOT NULL, # FK refs analysis - regulatory_factor_id INT, # FK refs regulatory_factor - + regulatory_feature_id INT NOT NULL AUTO_INCREMENT, + name VARCHAR(255) NOT NULL, + seq_region_id INT NOT NULL, + seq_region_start INT NOT NULL, + seq_region_end INT NOT NULL, + seq_region_strand TINYINT NOT NULL, + analysis_id INT NOT NULL, + regulatory_factor_id INT, - PRIMARY KEY(regulatory_feature_id), - KEY seq_region_idx(seq_region_id, analysis_id, seq_region_start), - KEY seq_region_idx_2(seq_region_id, seq_region_start) + PRIMARY KEY (regulatory_feature_id), + KEY seq_region_idx (seq_region_id, analysis_id, seq_region_start), + KEY seq_region_idx_2 (seq_region_id, seq_region_start) ) COLLATE=latin1_swedish_ci TYPE=MyISAM; + ################################################################################ # # Table structure for table 'regulatory_factor' @@ -1270,14 +1394,16 @@ CREATE TABLE regulatory_feature ( CREATE TABLE regulatory_factor ( - regulatory_factor_id INT NOT NULL auto_increment, + regulatory_factor_id INT NOT NULL AUTO_INCREMENT, name VARCHAR(255) NOT NULL, - type ENUM('miRNA_target', 'transcription_factor', 'transcription_factor_complex'), + type ENUM('miRNA_target', 'transcription_factor', + 'transcription_factor_complex'), - PRIMARY KEY(regulatory_factor_id) + PRIMARY KEY (regulatory_factor_id) ) COLLATE=latin1_swedish_ci TYPE=MyISAM; + ################################################################################ # # Table structure for table 'regulatory_feature_object' @@ -1286,11 +1412,11 @@ CREATE TABLE regulatory_factor ( CREATE TABLE regulatory_feature_object ( - regulatory_feature_id INT NOT NULL, # FK to regulatory_feature - ensembl_object_type ENUM( 'Transcript', 'Translation', 'Gene') NOT NULL, - ensembl_object_id INT NOT NULL, # FK to transcript,gene etc - influence ENUM('positive', 'negative', 'mixed', 'unknown'), - evidence VARCHAR(255), + regulatory_feature_id INT NOT NULL, + ensembl_object_type ENUM('Transcript', 'Translation', 'Gene') NOT NULL, + ensembl_object_id INT NOT NULL, # FK to transcript,gene etc + influence ENUM('positive', 'negative', 'mixed', 'unknown'), + evidence VARCHAR(255), KEY regulatory_feature_idx (regulatory_feature_id), KEY ensembl_object_idx (ensembl_object_type, ensembl_object_id) @@ -1306,9 +1432,9 @@ CREATE TABLE regulatory_feature_object ( CREATE TABLE regulatory_factor_coding ( - regulatory_factor_id INT NOT NULL, # FK to regulatory_factor - transcript_id INT, # FK to transcript - gene_id INT, # FK to gene + regulatory_factor_id INT NOT NULL, + transcript_id INT, + gene_id INT, KEY transcript_idx (transcript_id), KEY gene_idx (gene_id), @@ -1316,6 +1442,7 @@ CREATE TABLE regulatory_factor_coding ( ) COLLATE=latin1_swedish_ci TYPE=MyISAM; + ################################################################################ # # Table structure for table 'regulatory_search_region' @@ -1324,24 +1451,26 @@ CREATE TABLE regulatory_factor_coding ( CREATE TABLE regulatory_search_region ( - regulatory_search_region_id INT NOT NULL auto_increment, - name VARCHAR(255) NOT NULL, - seq_region_id INT NOT NULL, # FK refs seq_region - seq_region_start INT NOT NULL, - seq_region_end INT NOT NULL, - seq_region_strand TINYINT NOT NULL, - ensembl_object_type ENUM( 'Transcript', 'Translation', 'Gene') NOT NULL, - ensembl_object_id INT, # FK to gene/transcript/translation - analysis_id INT NOT NULL, # FK to analysis + regulatory_search_region_id INT NOT NULL AUTO_INCREMENT, + name VARCHAR(255) NOT NULL, + seq_region_id INT NOT NULL, + seq_region_start INT NOT NULL, + seq_region_end INT NOT NULL, + seq_region_strand TINYINT NOT NULL, + ensembl_object_type ENUM('Transcript', 'Translation', 'Gene') + NOT NULL, + ensembl_object_id INT, # FK to gene/transcript/translation + analysis_id INT NOT NULL, PRIMARY KEY (regulatory_search_region_id), KEY rsr_idx (regulatory_search_region_id), KEY ensembl_object_idx (ensembl_object_type, ensembl_object_id), - KEY seq_region_idx(seq_region_id, seq_region_start), - KEY seq_region_idx_2(seq_region_id, seq_region_start) + KEY seq_region_idx (seq_region_id, seq_region_start), + KEY seq_region_idx_2 (seq_region_id, seq_region_start) ) COLLATE=latin1_swedish_ci TYPE=MyISAM; + ################################################################################ # # Table structure for table 'unmapped_object' @@ -1358,15 +1487,18 @@ CREATE TABLE unmapped_object ( unmapped_reason_id SMALLINT(5) UNSIGNED NOT NULL, query_score DOUBLE, target_score DOUBLE, - ensembl_id INT(10) unsigned default '0', - ensembl_object_type ENUM('RawContig','Transcript','Gene','Translation') collate latin1_bin default 'RawContig', - PRIMARY KEY ( unmapped_object_id ), - KEY id_idx( identifier ), - KEY anal_idx( analysis_id ), - KEY anal_exdb_idx( analysis_id, external_db_id) + ensembl_id INT(10) UNSIGNED default '0', + ensembl_object_type ENUM('RawContig','Transcript','Gene','Translation') + default 'RawContig', + + PRIMARY KEY (unmapped_object_id), + KEY id_idx (identifier), + KEY anal_idx (analysis_id), + KEY anal_exdb_idx (analysis_id, external_db_id) ) COLLATE=latin1_swedish_ci TYPE=MyISAM; + ################################################################################ # # Table structure for table 'unmapped_reason' @@ -1379,6 +1511,7 @@ CREATE TABLE unmapped_reason ( summary_description VARCHAR(255), full_description VARCHAR(255), - PRIMARY KEY ( unmapped_reason_id ) + PRIMARY KEY (unmapped_reason_id) ) COLLATE=latin1_swedish_ci TYPE=MyISAM; + -- GitLab