diff --git a/modules/Bio/EnsEMBL/Biotype.pm b/modules/Bio/EnsEMBL/Biotype.pm new file mode 100644 index 0000000000000000000000000000000000000000..636b6430c0aba3fa188270ab3f6c0669910d7ca4 --- /dev/null +++ b/modules/Bio/EnsEMBL/Biotype.pm @@ -0,0 +1,276 @@ +=head1 LICENSE + +Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute +Copyright [2016-2018] EMBL-European Bioinformatics Institute + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +=cut + + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <http://lists.ensembl.org/mailman/listinfo/dev>. + + Questions may also be sent to the Ensembl help desk at + <http://www.ensembl.org/Help/Contact>. + +=cut + +=head1 NAME + +Bio::EnsEMBL::Biotype + +=head1 SYNOPSIS + + my $biotype = new Bio::EnsEMBL::Biotype( + -name => 'new_biotype, + -object_type => 'gene', + -biotype_group => 'a_biotype_group', + -so_acc => 'SO::1234567', + -description => 'New biotype' + ); + + my $name = $biotype->name(); + my $biotype_group = $biotype->biotype_group(); + my $so_acc = $biotype->so_acc(); + +=head1 DESCRIPTION + + This is the Biotype object class. + Gene and Transcript objects used to have a biotype() method that returned the biotype name + (the biotype field in the gene and transcript tables). + From e93 a new biotype table was added. However because of legacy code using direct sql + queries on the biotype column of gene and transcript tables, that column that contains the + biotype name was not replaced by biotype_id containing a foreign key to the new biotype table. + Gene and Transcripts can still link to a Biotype through the key (name, object_type). + +=head1 METHODS + +=cut + + +package Bio::EnsEMBL::Biotype; + +use strict; +use warnings; + +use Bio::EnsEMBL::Storable; +use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning); +use Bio::EnsEMBL::Utils::Scalar qw(check_ref assert_ref); +use Bio::EnsEMBL::Utils::Argument qw(rearrange); +use Scalar::Util qw(weaken isweak); + +use parent qw(Bio::EnsEMBL::Storable); + +=head2 new + + Arg [-BIOTYPE_ID] : + int - dbID of the biotype + Arg [-NAME] : + string - the name of the biotype (for ensembl) + Arg [-OBJECT_TYPE] : + string - the object type this biotype applies to (gene or transcript) + Arg [-BIOTYPE_GROUP] : + string - the name of the biotype group (for ensembl) + Arg [-SO_ACC] : + string - the Sequence Ontology accession of this biotype + Arg [-DESCRIPTION] : + string - the biotype description + Arg [-DB_TYPE] : + string - the database type for this biotype + Arg [-ATTRIB_TYPE_ID] : + int - attrib_type_id + + Example : $biotype = Bio::EnsEMBL::Biotype->new(...); + Description: Creates a new biotype object + Returntype : Bio::EnsEMBL::Biotype + Exceptions : none + +=cut + +sub new { + my ( $caller, @args ) = @_; + + my $class = ref($caller) || $caller; + + my $self = $class->SUPER::new(); + + my($dbID, $name, $object_type, $biotype_group, $so_acc, $description, $db_type, $attrib_type_id) = + rearrange([qw(BIOTYPE_ID NAME OBJECT_TYPE BIOTYPE_GROUP SO_ACC DESCRIPTION DB_TYPE ATTRIB_TYPE_ID)], @args); + + $self->{'dbID'} = $dbID; + $self->{'name'} = $name; + $self->{'object_type'} = $object_type; + $self->{'biotype_group'} = $biotype_group; + $self->{'so_acc'} = $so_acc; + $self->{'description'} = $description; + $self->{'db_type'} = $db_type; + $self->{'attrib_type_id'} = $attrib_type_id; + + return $self; +} + +=head2 new_fast + + Arg [1] : hashref to be blessed + Description: Construct a new Bio::EnsEMBL::Biotype using the hashref. + Exceptions : none + Returntype : Bio::EnsEMBL::Biotype + +=cut + + +sub new_fast { + my ( $class, $hashref ) = @_; + + my $self = bless $hashref, $class; + + if ( !isweak($self->{adaptor}) ) { + weaken($self->{adaptor}) + } + + return $self; +} + +=head2 name + + Arg [1] : (optional) string $name + The name of this biotype according to ensembl. + Example : $name = $biotype->name() + Description: Getter/Setter for the name of this biotype. + Returntype : string + Exceptions : none + +=cut + +sub name { + my ( $self, $name ) = @_; + + if ( defined($name) ) { + $self->{'name'} = $name; + } + + return $self->{'name'}; +} + +=head2 biotype_group + + Arg [1] : (optional) string $biotype_group + Example : $biotype_group = $biotype->biotype_group(); + Description: Getter/Setter for the biotype_group of this biotype. + Biotype groups are used internally at ensembl pipelines + and consist on few defined categories. + Returntype : string + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub biotype_group { + my ( $self, $biotype_group ) = @_; + + if ( defined($biotype_group) ) { + $self->{'biotype_group'} = $biotype_group; + } + + return $self->{'biotype_group'}; +} + +=head2 so_acc + + Arg [1] : (optional) string $so_acc + Example : $feat->so_acc(); + Description: Getter/Setter for the Sequence Ontology accession of this biotype. + It must be a SO like accession. + Returntype : string + Exceptions : thrown if an invalid so_acc argument is passed + +=cut + +sub so_acc { + my ( $self, $so_acc ) = @_; + + if ( defined($so_acc) ) { + # throw an error if setting something that does not look like an SO acc + unless ( $so_acc =~ m/\ASO:\d+\z/x ) { + throw("so_acc must be a Sequence Ontology accession. '$so_acc' does not look like one.") + } + + $self->{'so_acc'} = $so_acc; + } + + return $self->{'so_acc'}; +} + +=head2 object_type + + Arg [1] : (optional) string $object_type + Example : $object_type = $biotype->object_type(); + Description: Getter/Setter for the object_type of this biotype. + Biotypes can be assigned to either genes or transcripts, + object_type refers to which of them. + Returntype : string + Exceptions : thrown if an invalid object_type argument is passed (not gene or transcript) + +=cut + +sub object_type { + my ( $self, $object_type ) = @_; + + if ( defined($object_type) ) { + $object_type = lc $object_type; + # throw an error if setting something that does not look like an SO acc + unless ( $object_type eq 'gene' || $object_type eq 'transcript' ) { + throw("object_type must be gene or transcript. Got '$object_type'.") + } + + $self->{'object_type'} = $object_type; + } + + return $self->{'object_type'}; +} + +=for Rationale: + + Gene and Transcript objects used to have a biotype() method that returned the string biotype name. + From e93 those methods were replaced and a Biotype object is now returned. + To maintain legacy code functioning, overload was used to return the string biotype name when + the object is not used as such. + New code should explicitly call biotype->name() if the string biotype name is what is required. + +=cut + +use overload + 'fallback' => 1, + '""' => \&as_string; + +=head2 as_string + + Example : $biotype_str = $biotype->as_string(); + $biotype_str = $biotype(); + Description : Retrieves a printable string of the biotype object. + At the moment this is the biotype ensembl name. + If Biotype object is called this is what is returned to maintain compatibility + with legacy biotype() methods in Gene and Transcript objects. + Returns : string for the biotype. + +=cut + +sub as_string { + return shift->name; +} + +1; diff --git a/modules/Bio/EnsEMBL/DBSQL/BiotypeAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/BiotypeAdaptor.pm new file mode 100644 index 0000000000000000000000000000000000000000..66693733fd7fbf46c4b4cb36ef986da83a44158d --- /dev/null +++ b/modules/Bio/EnsEMBL/DBSQL/BiotypeAdaptor.pm @@ -0,0 +1,197 @@ +=head1 LICENSE + +Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute +Copyright [2016-2018] EMBL-European Bioinformatics Institute + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +=cut + + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <http://lists.ensembl.org/mailman/listinfo/dev>. + + Questions may also be sent to the Ensembl help desk at + <http://www.ensembl.org/Help/Contact>. + +=cut + +=head1 NAME + + Bio::EnsEMBL::DBSQL::BiotypeAdaptor - An adaptor which performs database + interaction relating to the storage and retrieval of Biotypes + +=head1 SYNOPSIS + + my $biotype = $db_adaptor->fetch_by_name_object_type('protein_coding', 'gene'); + +=head1 DESCRIPTION + + This adaptor provides a means to retrieve and store information related + to Biotypes. Primarily this involves the retrieval or storage of + Bio::EnsEMBL::Biotype objects from a database. + + See Bio::EnsEMBL::Biotype for details of the Biotype class. + +=head1 METHODS + +=cut + +package Bio::EnsEMBL::DBSQL::BiotypeAdaptor; + +use Bio::EnsEMBL::DBSQL::BaseAdaptor; +use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning); +use Bio::EnsEMBL::Biotype; + +use strict; +use warnings; + +use parent qw(Bio::EnsEMBL::DBSQL::BaseAdaptor); + +=head2 _tables + + Arg [1] : none + Description: PROTECTED implementation of superclass abstract method. + Returns the names, aliases of the tables to use for queries. + Returntype : list of arrays of strings + Exceptions : none + +=cut + +sub _tables { + my $self = shift; + + return (['biotype', 'b']); +} + +=head2 _columns + + Arg [1] : none + Example : none + Description: PROTECTED implementation of superclass abstract method. + Returns a list of columns to use for queries. + Returntype : list of strings + Exceptions : none + +=cut + +sub _columns { + my $self = shift; + + return ('b.biotype_id', 'b.name', 'b.object_type', 'b.db_type', 'b.attrib_type_id', 'b.description', 'b.biotype_group', 'b.so_acc'); +} + +=head2 _objs_from_sth + + Arg [1] : StatementHandle $sth + Example : none + Description: PROTECTED implementation of abstract superclass method. + responsible for the creation of ProteinFeatures + Returntype : arrayref of Bio::EnsEMBL::Biotype objects + Exceptions : none + +=cut + +sub _objs_from_sth { + my ($self, $sth) = @_; + + my ($dbID, $name, $object_type, $db_type, $attrib_type_id, $description, $biotype_group, $so_acc); + + $sth->bind_columns(\$dbID, \$name, \$object_type, \$db_type, \$attrib_type_id, \$description, \$biotype_group, \$so_acc); + + my @biotypes; + + while($sth->fetch()) { + push( @biotypes, + my $feat = Bio::EnsEMBL::Biotype->new_fast( { + 'dbID' => $dbID, + 'name' => $name, + 'object_type' => $object_type, + 'db_type' => $db_type, + 'attrib_type_id' => $attrib_type_id, + 'description' => $description, + 'biotype_group' => $biotype_group, + 'so_acc' => $so_acc, + } ) + ); + } + + return \@biotypes; +} + + +=head2 fetch_by_name_object_type + + Arg [1] : String $name + The name of the biotype to retrieve + Arg [2] : String $object_type + The object type of the biotype to retrieve (gene or transcript) + Example : $biotype = $biotype_adaptor->fetch_by_name_object_type('gene', 'protein_coding'); + Description: Retrieves a biotype object from the database via its combined key (name, object_type). + If the Biotype requested does not exist in the database, a new Biotype object is + created with the provided name and object_type to be returned. + Returntype : Bio::EnsEMBL::Biotype + Exceptions : none + +=cut + +sub fetch_by_name_object_type { + my ($self, $name, $object_type) = @_; + + my $constraint = "b.name = ? AND b.object_type = ?"; + $self->bind_param_generic_fetch($name, SQL_VARCHAR); + $self->bind_param_generic_fetch($object_type, SQL_VARCHAR); + my ($biotype) = @{$self->generic_fetch($constraint)}; + + # If request biotype does not exist in the table + # create a new biotype object containing name and object_type only + # this is required by genebuild in pipelines + if (!defined $biotype) { + $biotype = Bio::EnsEMBL::Biotype->new( + -NAME => $name, + -OBJECT_TYPE => $object_type, + ) + } + + return $biotype; +} + +=head2 fetch_all_by_object_type + + Arg [1] : String $object_type + The object_type of the biotypes to retrieve (gene or transcript). + Example : $biotypes = $biotype_adaptor->fetch_all_by_object_type('gene'); + Description: Retrieves an array reference of biotype objects from the database. + Returntype : arrayref of Bio::EnsEMBL::Biotype objects or empty arrayref + Warning : If empty arrayref is to be returned + Exceptions : none + +=cut + +sub fetch_all_by_object_type { + my ($self, $object_type) = @_; + + my $constraint = "b.object_type = ?"; + $self->bind_param_generic_fetch($object_type, SQL_VARCHAR); + my @biotypes = @{$self->generic_fetch($constraint)}; + + if ( !@biotypes ) { + warning("No objects retrieved. Check if object_type '$object_type' is correct.") + } + + return \@biotypes; +} + +1; \ No newline at end of file diff --git a/modules/Bio/EnsEMBL/DBSQL/DBAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/DBAdaptor.pm index 8396036e4b0ce04598967d72ae74773c4fd1b722..62000866518c51392f6c4d5c6c5ce559da7b2193 100755 --- a/modules/Bio/EnsEMBL/DBSQL/DBAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/DBAdaptor.pm @@ -402,6 +402,7 @@ sub get_available_adaptors { AssemblyMapper => 'Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor', AssemblySlice => 'Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor', Attribute => 'Bio::EnsEMBL::DBSQL::AttributeAdaptor', + Biotype => 'Bio::EnsEMBL::DBSQL::BiotypeAdaptor', CoordSystem => 'Bio::EnsEMBL::DBSQL::CoordSystemAdaptor', DataFile => 'Bio::EnsEMBL::DBSQL::DataFileAdaptor', DBEntry => 'Bio::EnsEMBL::DBSQL::DBEntryAdaptor', @@ -518,7 +519,7 @@ sub _each_DASFeatureFactory{ features to be obtained from Slices and from RawContigs. The external feature adaptor which is passed to this method - will have its db attribuite set to this DBAdaptor object via + will have its db attribute set to this DBAdaptor object via the db accessor method. ExternalFeatureAdaptors passed to this method are stored @@ -636,7 +637,7 @@ sub add_ExternalFeatureFactory{ Caller : external Status : Medium Risk : please use the Registry method, as at some time this - : may no longer be supprted. + : may no longer be supported. =cut @@ -660,7 +661,7 @@ sub get_adaptor { Caller : external Status : Medium Risk : please use the Registry method, as at some time this - : may no longer be supprted. + : may no longer be supported. =cut @@ -1068,7 +1069,7 @@ sub switch_adaptor { Arg [1] : String name of the adaptor type to switch back in Example : $dba->has_switchable_adaptor("sequence"); #explicit switching back - Returntype : Boolean indicating if the given adaptor is being activly switched + Returntype : Boolean indicating if the given adaptor is being actively switched Description : Provides a wrapper around the Registry has_switchable_adaptor() method defaulting both species and group to the current DBAdaptor. This will inform if the specified adaptor is being switched out diff --git a/modules/Bio/EnsEMBL/Gene.pm b/modules/Bio/EnsEMBL/Gene.pm index eba0228adde96299805182534b5d0a071ef92bd6..17045ec1a40f6336a3211c8d5b7a9f44295acf75 100755 --- a/modules/Bio/EnsEMBL/Gene.pm +++ b/modules/Bio/EnsEMBL/Gene.pm @@ -66,12 +66,12 @@ use strict; use POSIX; use Bio::EnsEMBL::Feature; use Bio::EnsEMBL::Intron; +use Bio::EnsEMBL::Biotype; use Bio::EnsEMBL::Utils::Argument qw(rearrange); use Bio::EnsEMBL::Utils::Exception qw(throw warning); use Bio::EnsEMBL::Utils::Scalar qw(assert_ref); -use vars qw(@ISA); -@ISA = qw(Bio::EnsEMBL::Feature); +use parent qw(Bio::EnsEMBL::Feature); =head2 new @@ -171,8 +171,9 @@ sub new { $self->external_status($external_status) if ( defined $external_status ); $self->display_xref($display_xref) if ( defined $display_xref ); - $self->biotype($type) if ( defined $type ); - $self->biotype($biotype) if ( defined $biotype ); + + $self->{'biotype'} = $biotype || $type; + $self->description($description); $self->source($source); @@ -896,27 +897,6 @@ sub _clear_homologues { delete $self->{homologues}; } - -=head2 biotype - - Arg [1] : (optional) String - the biotype to set - Example : $gene->biotype("protein_coding"); - Description: Getter/setter for the attribute biotype - Returntype : String - Exceptions : none - Caller : general - Status : Stable - -=cut - -sub biotype { - my $self = shift; - - $self->{'biotype'} = shift if( @_ ); - return ( $self->{'biotype'} || "protein_coding" ); -} - - =head2 add_Transcript Arg [1] : Bio::EnsEMBL::Transcript $trans @@ -1521,6 +1501,50 @@ sub havana_gene { return $ott; } +=head2 biotype -1; + Arg [1] : Arg [1] : (optional) String - the biotype to set + Example : my $biotype = $gene->biotype; + my $biotype = $gene->biotype('protin_coding'); + Description: Returns the Biotype object of this gene. + When no biotype exists, defaults to 'protein_coding'. + When used to set to a biotype that does not exist in + the biotype table, a biotype object is created with + the provided argument as name and object_type gene. + Returntype : Bio::EnsEMBL::Biotype + Exceptions : none + +=cut + +sub biotype { + my ( $self, $new_value) = @_; + + # have a biotype object and not setting new one, return it + if ( ref $self->{'biotype'} eq 'Bio::EnsEMBL::Biotype' && !defined $new_value ) { + return $self->{'biotype'}; + } + # biotype is first set as a string retrieved from the gene table + # there is no biotype object in the gene object, retrieve it using the biotype string + # if no string, default to protein_coding. this is legacy behaviour and should probably be revisited + if ( ref $self->{'biotype'} ne 'Bio::EnsEMBL::Biotype' && !defined $new_value) { + $new_value = $self->{'biotype'} // 'protein_coding'; + } + + # retrieve biotype object from the biotype adaptor + if( defined $self->adaptor() ) { + my $ba = $self->adaptor()->db()->get_BiotypeAdaptor(); + $self->{'biotype'} = $ba->fetch_by_name_object_type( $new_value, 'gene' ); + } + # if $self->adaptor is unavailable, create a new biotype object containing name and object_type only + else { + $self->{'biotype'} = Bio::EnsEMBL::Biotype->new( + -NAME => $new_value, + -OBJECT_TYPE => 'gene', + ) + } + + return $self->{'biotype'} ; +} + +1; diff --git a/modules/Bio/EnsEMBL/Transcript.pm b/modules/Bio/EnsEMBL/Transcript.pm index 31b33e8d06a36c861257199fd400b2e39c15b846..c9fdc5c466d5ae310915a6c6eb725d92260a2524 100755 --- a/modules/Bio/EnsEMBL/Transcript.pm +++ b/modules/Bio/EnsEMBL/Transcript.pm @@ -70,13 +70,12 @@ use Bio::EnsEMBL::ExonTranscript; use Bio::EnsEMBL::CDS; use Bio::EnsEMBL::TranscriptMapper; use Bio::EnsEMBL::SeqEdit; - +use Bio::EnsEMBL::Biotype; use Bio::EnsEMBL::Utils::Argument qw( rearrange ); use Bio::EnsEMBL::Utils::Exception qw(warning throw ); use Bio::EnsEMBL::Utils::Scalar qw( assert_ref ); -use vars qw(@ISA); -@ISA = qw(Bio::EnsEMBL::Feature); +use parent qw(Bio::EnsEMBL::Feature); =head2 new @@ -172,7 +171,9 @@ sub new { $self->edits_enabled(1); $self->description($description); - $self->biotype($biotype); + + $self->{'biotype'} = $biotype; + $self->source($source); # Default version @@ -597,24 +598,6 @@ sub external_name { } } - -=head2 biotype - - Arg [1] : string $biotype - Description: get/set for attribute biotype - Returntype : string - Exceptions : none - Caller : general - Status : Stable - -=cut - -sub biotype { - my $self = shift; - $self->{'biotype'} = shift if( @_ ); - return ( $self->{'biotype'} || "protein_coding" ); -} - =head2 source Arg [1] : (optional) String - the source to set @@ -3212,6 +3195,52 @@ sub get_Gene { return $parent_gene; } +=head2 biotype + + Arg [1] : Arg [1] : (optional) String - the biotype to set + Example : my $biotype = $transcript->biotype; + my $biotype = $transcript->biotype('protin_coding'); + Description: Returns the Biotype object of this transcript. + When no biotype exists, defaults to 'protein_coding'. + When used to set to a biotype that does not exist in + the biotype table, a biotype object is created with + the provided argument as name and object_type transcript. + Returntype : Bio::EnsEMBL::Biotype + Exceptions : none + +=cut + +sub biotype { + my ( $self, $new_value) = @_; + + # have a biotype object and not setting new one, return it + if ( ref $self->{'biotype'} eq 'Bio::EnsEMBL::Biotype' && !defined $new_value ) { + return $self->{'biotype'}; + } + + # biotype is first set as a string retrieved from the transcript table + # there is no biotype object in the transcript object, retrieve it using the biotype string + # if no string, default to protein_coding. this is legacy behaviour and should probably be revisited + if ( ref $self->{'biotype'} ne 'Bio::EnsEMBL::Biotype' && !defined $new_value) { + $new_value = $self->{'biotype'} // 'protein_coding'; + } + + # retrieve biotype object from the biotype adaptor + if( defined $self->adaptor() ) { + my $ba = $self->adaptor()->db()->get_BiotypeAdaptor(); + $self->{'biotype'} = $ba->fetch_by_name_object_type( $new_value, 'transcript' ); + } + # if $self->adaptor is unavailable, create a new biotype object containing name and object_type only + else { + $self->{'biotype'} = Bio::EnsEMBL::Biotype->new( + -NAME => $new_value, + -OBJECT_TYPE => 'transcript', + ) + } + + return $self->{'biotype'} ; +} + 1; diff --git a/modules/Bio/EnsEMBL/Utils/SequenceOntologyMapper.pm b/modules/Bio/EnsEMBL/Utils/SequenceOntologyMapper.pm index aa8d1cfc0b7805bd8ce4ae7a9d549fbe499b5fb1..f1dd02faedf526a4b2983845e5e2e59e336b3759 100644 --- a/modules/Bio/EnsEMBL/Utils/SequenceOntologyMapper.pm +++ b/modules/Bio/EnsEMBL/Utils/SequenceOntologyMapper.pm @@ -28,7 +28,7 @@ SequenceOntologyMapper - Translates EnsEMBL objects into Sequence Ontology terms use Bio::EnsEMBL::Utils::SequenceOntologyMapper -# get an Ensembl feature somehow in scalar $feature +# get an Ensembl feature somehow in scalar $feature ... ... @@ -40,9 +40,14 @@ print $mapper->to_name($feature), "\n"; =head1 DESCRIPTION -Basic mapper from Ensembl feature or related objects to Sequence Ontology +Basic mapper from Ensembl feature or related objects to Sequence Ontology (http://www.sequenceontology.org) terms. +Gene and Transcript objects contain a biotype() that will return a Biotype object. +This object contains several methods, including so_acc() that provides the +corresponding SO accession. It is thus recommended to use the Biotype object +if dealing with Gene or Transcript objects. + The interface allows to map to SO accessions and names. =cut @@ -55,206 +60,205 @@ use warnings; use Bio::EnsEMBL::Utils::Cache; use Bio::EnsEMBL::Utils::Exception; -my %gene_so_mapping = +my %gene_so_mapping = ( # Protein coding gene biotype - 'protein_coding' => 'SO:0001217', - 'IG_C_gene' => 'SO:0001217', - 'IG_D_gene' => 'SO:0001217', - 'IG_gene' => 'SO:0001217', - 'IG_J_gene' => 'SO:0001217', - 'IG_LV_gene' => 'SO:0001217', - 'IG_M_gene' => 'SO:0001217', - 'IG_V_gene' => 'SO:0001217', - 'IG_Z_gene' => 'SO:0001217', - 'mRNA' => 'SO:0001217', - 'nontranslating_CDS' => 'SO:0001217', - 'polymorphic' => 'SO:0001217', - 'polymorphic_pseudogene' => 'SO:0001217', - 'TR_C_gene' => 'SO:0001217', - 'TR_D_gene' => 'SO:0001217', - 'TR_gene' => 'SO:0001217', - 'TR_J_gene' => 'SO:0001217', - 'TR_V_gene' => 'SO:0001217', - + 'protein_coding' => 'SO:0001217', + 'IG_C_gene' => 'SO:0001217', + 'IG_D_gene' => 'SO:0001217', + 'IG_gene' => 'SO:0001217', + 'IG_J_gene' => 'SO:0001217', + 'IG_LV_gene' => 'SO:0001217', + 'IG_M_gene' => 'SO:0001217', + 'IG_V_gene' => 'SO:0001217', + 'IG_Z_gene' => 'SO:0001217', + 'mRNA' => 'SO:0001217', + 'nontranslating_CDS' => 'SO:0001217', + 'polymorphic' => 'SO:0001217', + 'polymorphic_pseudogene' => 'SO:0001217', + 'TR_C_gene' => 'SO:0001217', + 'TR_D_gene' => 'SO:0001217', + 'TR_gene' => 'SO:0001217', + 'TR_J_gene' => 'SO:0001217', + 'TR_V_gene' => 'SO:0001217', + # Pseudogene biotype - 'IG_C_pseudogene' => 'SO:0000336', - 'IG_D_pseudogene' => 'SO:0000336', - 'IG_J_pseudogene' => 'SO:0000336', - 'IG_pseudogene' => 'SO:0000336', - 'IG_V_pseudogene' => 'SO:0000336', - 'miRNA_pseudogene' => 'SO:0000336', - 'misc_RNA_pseudogene' => 'SO:0000336', - 'Mt_tRNA_pseudogene' => 'SO:0000336', - 'ncbi_pseudogene' => 'SO:0000336', - 'ncRNA_pseudogene' => 'SO:0000336', - 'processed_pseudogene' => 'SO:0000336', - 'pseudogene' => 'SO:0000336', - 'rRNA_pseudogene' => 'SO:0000336', - 'scRNA_pseudogene' => 'SO:0000336', - 'snoRNA_pseudogene' => 'SO:0000336', - 'snRNA_pseudogene' => 'SO:0000336', - 'transcribed_processed_pseudogene' => 'SO:0000336', - 'transcribed_unitary_pseudogene' => 'SO:0000336', + 'IG_C_pseudogene' => 'SO:0000336', + 'IG_D_pseudogene' => 'SO:0000336', + 'IG_J_pseudogene' => 'SO:0000336', + 'IG_pseudogene' => 'SO:0000336', + 'IG_V_pseudogene' => 'SO:0000336', + 'miRNA_pseudogene' => 'SO:0000336', + 'misc_RNA_pseudogene' => 'SO:0000336', + 'Mt_tRNA_pseudogene' => 'SO:0000336', + 'ncbi_pseudogene' => 'SO:0000336', + 'ncRNA_pseudogene' => 'SO:0000336', + 'processed_pseudogene' => 'SO:0000336', + 'pseudogene' => 'SO:0000336', + 'rRNA_pseudogene' => 'SO:0000336', + 'scRNA_pseudogene' => 'SO:0000336', + 'snoRNA_pseudogene' => 'SO:0000336', + 'snRNA_pseudogene' => 'SO:0000336', + 'transcribed_processed_pseudogene' => 'SO:0000336', + 'transcribed_unitary_pseudogene' => 'SO:0000336', 'transcribed_unprocessed_pseudogene' => 'SO:0000336', - 'translated_processed_pseudogene' => 'SO:0000336', - 'translated_unprocessed_pseudogene' => 'SO:0000336', - 'tRNA_pseudogene' => 'SO:0000336', - 'TR_J_pseudogene' => 'SO:0000336', - 'TR_pseudogene' => 'SO:0000336', - 'TR_V_pseudogene' => 'SO:0000336', - 'unitary_pseudogene' => 'SO:0000336', - 'unprocessed_pseudogene' => 'SO:0000336', + 'translated_processed_pseudogene' => 'SO:0000336', + 'translated_unprocessed_pseudogene' => 'SO:0000336', + 'tRNA_pseudogene' => 'SO:0000336', + 'TR_J_pseudogene' => 'SO:0000336', + 'TR_pseudogene' => 'SO:0000336', + 'TR_V_pseudogene' => 'SO:0000336', + 'unitary_pseudogene' => 'SO:0000336', + 'unprocessed_pseudogene' => 'SO:0000336', # ncRNA gene biotypes - '3prime_overlapping_ncrna' => 'SO:0001263', - 'ambiguous_orf' => 'SO:0001263', - 'antisense' => 'SO:0001263', - 'antisense_RNA' => 'SO:0001263', - 'antitoxin' => 'SO:0001263', - 'bidirectional_promoter_lncrna' => 'SO:0001263', - 'class_II_RNA' => 'SO:0001263', - 'class_I_RNA' => 'SO:0001263', - 'CRISPR' => 'SO:0001263', - 'guide_RNA' => 'SO:0001263', - 'known_ncrna' => 'SO:0001263', - 'lincRNA' => 'SO:0001263', - 'lncRNA' => 'SO:0001263', - 'macro_lncRNA' => 'SO:0001263', - 'miRNA' => 'SO:0001263', - 'misc_RNA' => 'SO:0001263', - 'Mt_rRNA' => 'SO:0001263', - 'Mt_tRNA' => 'SO:0001263', - 'ncRNA' => 'SO:0001263', - 'ncrna_host' => 'SO:0001263', - 'non_coding' => 'SO:0001263', - 'piRNA' => 'SO:0001263', - 'pre_miRNA' => 'SO:0001263', - 'processed_transcript' => 'SO:0001263', - 'retained_intron' => 'SO:0001263', - 'ribozyme' => 'SO:0001263', - 'RNase_MRP_RNA' => 'SO:0001263', - 'RNase_P_RNA' => 'SO:0001263', - 'rRNA' => 'SO:0001263', - 'scaRNA' => 'SO:0001263', - 'scRNA' => 'SO:0001263', - 'sense_intronic' => 'SO:0001263', - 'sense_overlapping' => 'SO:0001263', - 'snlRNA' => 'SO:0001263', - 'snoRNA' => 'SO:0001263', - 'snRNA' => 'SO:0001263', - 'sRNA' => 'SO:0001263', - 'SRP_RNA' => 'SO:0001263', - 'telomerase_RNA' => 'SO:0001263', - 'tmRNA' => 'SO:0001263', - 'tRNA' => 'SO:0001263', - 'vaultRNA' => 'SO:0001263', - 'Y_RNA' => 'SO:0001263' + '3prime_overlapping_ncrna' => 'SO:0001263', + 'ambiguous_orf' => 'SO:0001263', + 'antisense' => 'SO:0001263', + 'antisense_RNA' => 'SO:0001263', + 'antitoxin' => 'SO:0001263', + 'bidirectional_promoter_lncrna' => 'SO:0001263', + 'class_II_RNA' => 'SO:0001263', + 'class_I_RNA' => 'SO:0001263', + 'CRISPR' => 'SO:0001263', + 'guide_RNA' => 'SO:0001263', + 'known_ncrna' => 'SO:0001263', + 'lincRNA' => 'SO:0001263', + 'lncRNA' => 'SO:0001263', + 'macro_lncRNA' => 'SO:0001263', + 'miRNA' => 'SO:0001263', + 'misc_RNA' => 'SO:0001263', + 'Mt_rRNA' => 'SO:0001263', + 'Mt_tRNA' => 'SO:0001263', + 'ncRNA' => 'SO:0001263', + 'ncrna_host' => 'SO:0001263', + 'non_coding' => 'SO:0001263', + 'piRNA' => 'SO:0001263', + 'pre_miRNA' => 'SO:0001263', + 'processed_transcript' => 'SO:0001263', + 'retained_intron' => 'SO:0001263', + 'ribozyme' => 'SO:0001263', + 'RNase_MRP_RNA' => 'SO:0001263', + 'RNase_P_RNA' => 'SO:0001263', + 'rRNA' => 'SO:0001263', + 'scaRNA' => 'SO:0001263', + 'scRNA' => 'SO:0001263', + 'sense_intronic' => 'SO:0001263', + 'sense_overlapping' => 'SO:0001263', + 'snlRNA' => 'SO:0001263', + 'snoRNA' => 'SO:0001263', + 'snRNA' => 'SO:0001263', + 'sRNA' => 'SO:0001263', + 'SRP_RNA' => 'SO:0001263', + 'telomerase_RNA' => 'SO:0001263', + 'tmRNA' => 'SO:0001263', + 'tRNA' => 'SO:0001263', + 'vaultRNA' => 'SO:0001263', + 'Y_RNA' => 'SO:0001263' ); -my %transcript_so_mapping = +my %transcript_so_mapping = ( # mRNA biotypes - 'protein_coding' => 'SO:0000234', - 'mRNA' => 'SO:0000234', - 'nonsense_mediated_decay' => 'SO:0000234', - 'nontranslating_CDS' => 'SO:0000234', - 'non_stop_decay' => 'SO:0000234', - 'polymorphic_pseudogene' => 'SO:0000234', - + 'protein_coding' => 'SO:0000234', + 'mRNA' => 'SO:0000234', + 'nonsense_mediated_decay' => 'SO:0000234', + 'nontranslating_CDS' => 'SO:0000234', + 'non_stop_decay' => 'SO:0000234', + 'polymorphic_pseudogene' => 'SO:0000234', + # IG biotypes (SO:3000000 gene_segment) - 'IG_C_gene' => 'SO:0000478', # C_gene_segment - 'TR_C_gene' => 'SO:0000478', # C_gene_segment - 'IG_D_gene' => 'SO:0000458', # D_gene_segment - 'TR_D_gene' => 'SO:0000458', # D_gene_segment - 'IG_gene' => 'SO:3000000', # gene_segment - 'TR_gene' => 'SO:3000000', # gene_segment - 'IG_J_gene' => 'SO:0000470', # J_gene_segment - 'TR_J_gene' => 'SO:0000470', # J_gene_segment - 'IG_LV_gene' => 'SO:3000000', # gene_segment - 'IG_M_gene' => 'SO:3000000', # gene_segment - 'IG_V_gene' => 'SO:0000466', # V_gene_segment - 'TR_V_gene' => 'SO:0000466', # V_gene_segment - 'IG_Z_gene' => 'SO:3000000', # gene_segment + 'IG_C_gene' => 'SO:0000478', # C_gene_segment + 'TR_C_gene' => 'SO:0000478', # C_gene_segment + 'IG_D_gene' => 'SO:0000458', # D_gene_segment + 'TR_D_gene' => 'SO:0000458', # D_gene_segment + 'IG_gene' => 'SO:3000000', # gene_segment + 'TR_gene' => 'SO:3000000', # gene_segment + 'IG_J_gene' => 'SO:0000470', # J_gene_segment + 'TR_J_gene' => 'SO:0000470', # J_gene_segment + 'IG_LV_gene' => 'SO:3000000', # gene_segment + 'IG_M_gene' => 'SO:3000000', # gene_segment + 'IG_V_gene' => 'SO:0000466', # V_gene_segment + 'TR_V_gene' => 'SO:0000466', # V_gene_segment + 'IG_Z_gene' => 'SO:3000000', # gene_segment # Pseudogenic_transcript biotypes - 'pseudogene' => 'SO:0000516', - 'disrupted_domain' => 'SO:0000516', - 'IG_C_pseudogene' => 'SO:0000516', - 'IG_D_pseudogene' => 'SO:0000516', - 'IG_J_pseudogene' => 'SO:0000516', - 'IG_pseudogene' => 'SO:0000516', - 'IG_V_pseudogene' => 'SO:0000516', - 'miRNA_pseudogene' => 'SO:0000516', - 'misc_RNA_pseudogene' => 'SO:0000516', - 'Mt_tRNA_pseudogene' => 'SO:0000516', - 'ncbi_pseudogene' => 'SO:0000516', - 'ncRNA_pseudogene' => 'SO:0000516', - 'processed_pseudogene' => 'SO:0000516', - 'rRNA_pseudogene' => 'SO:0000516', - 'scRNA_pseudogene' => 'SO:0000516', - 'snoRNA_pseudogene' => 'SO:0000516', - 'snRNA_pseudogene' => 'SO:0000516', - 'transcribed_processed_pseudogene' => 'SO:0000516', - 'transcribed_unitary_pseudogene' => 'SO:0000516', - 'transcribed_unprocessed_pseudogene' => 'SO:0000516', - 'translated_processed_pseudogene' => 'SO:0000516', - 'translated_unprocessed_pseudogene' => 'SO:0000516', - 'tRNA_pseudogene' => 'SO:0000516', - 'TR_J_pseudogene' => 'SO:0000516', - 'TR_pseudogene' => 'SO:0000516', - 'TR_V_pseudogene' => 'SO:0000516', - 'unitary_pseudogene' => 'SO:0000516', - 'unprocessed_pseudogene' => 'SO:0000516', + 'pseudogene' => 'SO:0000516', + 'disrupted_domain' => 'SO:0000516', + 'IG_C_pseudogene' => 'SO:0000516', + 'IG_D_pseudogene' => 'SO:0000516', + 'IG_J_pseudogene' => 'SO:0000516', + 'IG_pseudogene' => 'SO:0000516', + 'IG_V_pseudogene' => 'SO:0000516', + 'miRNA_pseudogene' => 'SO:0000516', + 'misc_RNA_pseudogene' => 'SO:0000516', + 'Mt_tRNA_pseudogene' => 'SO:0000516', + 'ncbi_pseudogene' => 'SO:0000516', + 'ncRNA_pseudogene' => 'SO:0000516', + 'processed_pseudogene' => 'SO:0000516', + 'rRNA_pseudogene' => 'SO:0000516', + 'scRNA_pseudogene' => 'SO:0000516', + 'snoRNA_pseudogene' => 'SO:0000516', + 'snRNA_pseudogene' => 'SO:0000516', + 'transcribed_processed_pseudogene' => 'SO:0000516', + 'transcribed_unitary_pseudogene' => 'SO:0000516', + 'transcribed_unprocessed_pseudogene' => 'SO:0000516', + 'translated_processed_pseudogene' => 'SO:0000516', + 'translated_unprocessed_pseudogene' => 'SO:0000516', + 'tRNA_pseudogene' => 'SO:0000516', + 'TR_J_pseudogene' => 'SO:0000516', + 'TR_pseudogene' => 'SO:0000516', + 'TR_V_pseudogene' => 'SO:0000516', + 'unitary_pseudogene' => 'SO:0000516', + 'unprocessed_pseudogene' => 'SO:0000516', # ncRNA transcript biotypes ## Long non coding RNAs - '3prime_overlapping_ncrna' => 'SO:0001877', - 'ambiguous_orf' => 'SO:0001877', - 'antisense' => 'SO:0001877', - 'antisense_RNA' => 'SO:0001877', - 'antitoxin' => 'SO:0001877', - 'bidirectional_promoter_lncrna' => 'SO:0001877', - 'lincRNA' => 'SO:0001877', - 'macro_lncRNA' => 'SO:0001877', - 'ncrna_host' => 'SO:0001877', - 'non_coding' => 'SO:0001877', - 'processed_transcript' => 'SO:0001877', - 'retained_intron' => 'SO:0001877', - 'ribozyme' => 'SO:0001877', - 'sense_intronic' => 'SO:0001877', - 'sense_overlapping' => 'SO:0001877', - + '3prime_overlapping_ncrna' => 'SO:0001877', + 'ambiguous_orf' => 'SO:0001877', + 'antisense' => 'SO:0001877', + 'antisense_RNA' => 'SO:0001877', + 'antitoxin' => 'SO:0001877', + 'bidirectional_promoter_lncrna' => 'SO:0001877', + 'lincRNA' => 'SO:0001877', + 'macro_lncRNA' => 'SO:0001877', + 'ncrna_host' => 'SO:0001877', + 'non_coding' => 'SO:0001877', + 'processed_transcript' => 'SO:0001877', + 'retained_intron' => 'SO:0001877', + 'ribozyme' => 'SO:0001877', + 'sense_intronic' => 'SO:0001877', + 'sense_overlapping' => 'SO:0001877', + ## Short non coding RNAs - 'class_II_RNA' => 'SO:0000989', # class_II_RNA - 'class_I_RNA' => 'SO:0000990', # class_I_RNA - 'guide_RNA' => 'SO:0000602', # guide_RNA - 'miRNA' => 'SO:0000276', # miRNA - 'known_ncRNA' => 'SO:0000655', # ncRNA - 'misc_RNA' => 'SO:0000655', # ncRNA - 'ncRNA' => 'SO:0000655', # ncRNA - 'piRNA' => 'SO:0001035', # piRNA - 'pre_miRNA' => 'SO:0001244', # pre_miRNA - 'RNase_MRP_RNA' => 'SO:0000385', # RNase_MRP_RNA - 'RNase_P_RNA' => 'SO:0000386', # RNase_P_RNA - 'rRNA' => 'SO:0000252', # rRNA - 'Mt_rRNA' => 'SO:0000252', # rRNA - 'scaRNA' => 'SO:0000013', # scRNA - 'scRNA' => 'SO:0000013', # scRNA - 'snoRNA' => 'SO:0000275', # snoRNA - 'sRNA' => 'SO:0000274', # snRNA - 'snlRNA' => 'SO:0000274', # snRNA - 'snRNA' => 'SO:0000274', # snRNA - 'SRP_RNA' => 'SO:0000590', # SRP_RNA - 'telomerase_RNA' => 'SO:0000390', # telomerase_RNA - 'tmRNA' => 'SO:0000584', # tmRNA - 'tRNA' => 'SO:0000253', # tRNA - 'Mt_tRNA' => 'SO:0000253', # tRNA - 'vaultRNA' => 'SO:0002040', # vaultRNA_primary_transcript - 'vault_RNA' => 'SO:0002040', # vaultRNA_primary_transcript - 'Y_RNA' => 'SO:0000405', # Y_RNA - + 'class_II_RNA' => 'SO:0000989', # class_II_RNA + 'class_I_RNA' => 'SO:0000990', # class_I_RNA + 'guide_RNA' => 'SO:0000602', # guide_RNA + 'miRNA' => 'SO:0000276', # miRNA + 'known_ncRNA' => 'SO:0000655', # ncRNA + 'misc_RNA' => 'SO:0000655', # ncRNA + 'ncRNA' => 'SO:0000655', # ncRNA + 'piRNA' => 'SO:0001035', # piRNA + 'pre_miRNA' => 'SO:0001244', # pre_miRNA + 'RNase_MRP_RNA' => 'SO:0000385', # RNase_MRP_RNA + 'RNase_P_RNA' => 'SO:0000386', # RNase_P_RNA + 'rRNA' => 'SO:0000252', # rRNA + 'Mt_rRNA' => 'SO:0000252', # rRNA + 'scaRNA' => 'SO:0000013', # scRNA + 'scRNA' => 'SO:0000013', # scRNA + 'snoRNA' => 'SO:0000275', # snoRNA + 'sRNA' => 'SO:0000274', # snRNA + 'snlRNA' => 'SO:0000274', # snRNA + 'snRNA' => 'SO:0000274', # snRNA + 'SRP_RNA' => 'SO:0000590', # SRP_RNA + 'telomerase_RNA' => 'SO:0000390', # telomerase_RNA + 'tmRNA' => 'SO:0000584', # tmRNA + 'tRNA' => 'SO:0000253', # tRNA + 'Mt_tRNA' => 'SO:0000253', # tRNA + 'vaultRNA' => 'SO:0002040', # vaultRNA_primary_transcript + 'vault_RNA' => 'SO:0002040', # vaultRNA_primary_transcript + 'Y_RNA' => 'SO:0000405', # Y_RNA ); my %utr_so_mapping = @@ -272,28 +276,28 @@ my %region_so_mapping = 'contig' => 'SO:0000149' # contig ); -my %feature_so_mapping = +my %feature_so_mapping = ( - 'Bio::EnsEMBL::Feature' => 'SO:0000001', # region - 'Bio::EnsEMBL::Gene' => 'SO:0000704', # gene - 'Bio::EnsEMBL::Transcript' => 'SO:0000673', # transcript - 'Bio::EnsEMBL::PredictionTranscript' => 'SO:0000673', # transcript - 'Bio::EnsEMBL::Exon' => 'SO:0000147', # exon - 'Bio::EnsEMBL::PredictionExon' => 'SO:0000147', # exon - 'Bio::EnsEMBL::UTR' => 'SO:0000203', # UTR - 'Bio::EnsEMBL::ExonTranscript' => 'SO:0000147', # Exon - 'Bio::EnsEMBL::CDS' => 'SO:0000316', # CDS - 'Bio::EnsEMBL::Slice' => 'SO:0000001', # region - 'Bio::EnsEMBL::SimpleFeature' => 'SO:0001411', # biological_region - 'Bio::EnsEMBL::MiscFeature' => 'SO:0001411', # biological_region - 'Bio::EnsEMBL::RepeatFeature' => 'SO:0000657', # repeat region - 'Bio::EnsEMBL::Variation::VariationFeature' => 'SO:0001060', # sequence variant + 'Bio::EnsEMBL::Feature' => 'SO:0000001', # region + 'Bio::EnsEMBL::Gene' => 'SO:0000704', # gene + 'Bio::EnsEMBL::Transcript' => 'SO:0000673', # transcript + 'Bio::EnsEMBL::PredictionTranscript' => 'SO:0000673', # transcript + 'Bio::EnsEMBL::Exon' => 'SO:0000147', # exon + 'Bio::EnsEMBL::PredictionExon' => 'SO:0000147', # exon + 'Bio::EnsEMBL::UTR' => 'SO:0000203', # UTR + 'Bio::EnsEMBL::ExonTranscript' => 'SO:0000147', # Exon + 'Bio::EnsEMBL::CDS' => 'SO:0000316', # CDS + 'Bio::EnsEMBL::Slice' => 'SO:0000001', # region + 'Bio::EnsEMBL::SimpleFeature' => 'SO:0001411', # biological_region + 'Bio::EnsEMBL::MiscFeature' => 'SO:0001411', # biological_region + 'Bio::EnsEMBL::RepeatFeature' => 'SO:0000657', # repeat region + 'Bio::EnsEMBL::Variation::VariationFeature' => 'SO:0001060', # sequence variant 'Bio::EnsEMBL::Variation::StructuralVariationFeature' => 'SO:0001537', # structural variant - 'Bio::EnsEMBL::Compara::ConstrainedElement' => 'SO:0001009', #DNA_constraint_sequence ???? - 'Bio::EnsEMBL::Funcgen::RegulatoryFeature' => 'SO:0005836', # regulatory_region - 'Bio::EnsEMBL::DnaDnaAlignFeature' => 'SO:0000347', # nucleotide_match - 'Bio::EnsEMBL::DnaPepAlignFeature' => 'SO:0000349', # protein_match - 'Bio::EnsEMBL::KaryotypeBand' => 'SO:0000341', # chromosome_band + 'Bio::EnsEMBL::Compara::ConstrainedElement' => 'SO:0001009', #DNA_constraint_sequence ???? + 'Bio::EnsEMBL::Funcgen::RegulatoryFeature' => 'SO:0005836', # regulatory_region + 'Bio::EnsEMBL::DnaDnaAlignFeature' => 'SO:0000347', # nucleotide_match + 'Bio::EnsEMBL::DnaPepAlignFeature' => 'SO:0000349', # protein_match + 'Bio::EnsEMBL::KaryotypeBand' => 'SO:0000341', # chromosome_band ); @@ -312,8 +316,8 @@ sub new { my ($class, $oa) = @_; defined $oa or throw "No ontology term adaptor specified"; - my $self = - { + my $self = + { ontology_adaptor => $oa, feat_to_acc => \%feature_so_mapping, gene_to_acc => \%gene_so_mapping, @@ -321,8 +325,8 @@ sub new { region_to_acc => \%region_so_mapping, tran_to_acc => \%transcript_so_mapping }; - - $self->{ontology_adaptor}->isa('Bio::EnsEMBL::DBSQL::OntologyTermAdaptor') or + + $self->{ontology_adaptor}->isa('Bio::EnsEMBL::DBSQL::OntologyTermAdaptor') or throw "Argument is not an OntologyTermAdaptor object"; tie my %cache, 'Bio::EnsEMBL::Utils::Cache', 100; @@ -348,15 +352,15 @@ sub to_accession { my $so_accession; my $ref = ref($feature); - - my ($gene_to_acc, $tran_to_acc, $feat_to_acc, $utr_to_acc, $region_to_acc) = + + my ($gene_to_acc, $tran_to_acc, $feat_to_acc, $utr_to_acc, $region_to_acc) = ($self->{gene_to_acc}, $self->{tran_to_acc}, $self->{feat_to_acc}, $self->{utr_to_acc}, $self->{region_to_acc}); - - if ($feature->isa('Bio::EnsEMBL::Gene') and + + if ($feature->isa('Bio::EnsEMBL::Gene') and exists $gene_to_acc->{$feature->biotype}) { $so_accession = $gene_to_acc->{$feature->biotype}; - } elsif ($feature->isa('Bio::EnsEMBL::Transcript') and - exists $tran_to_acc->{$feature->biotype}) { + } elsif ($feature->isa('Bio::EnsEMBL::Transcript') and + exists $tran_to_acc->{$feature->biotype}) { $so_accession = $tran_to_acc->{$feature->biotype}; } elsif ($feature->isa('Bio::EnsEMBL::UTR') and exists $utr_to_acc->{$feature->type}) { @@ -375,7 +379,7 @@ sub to_accession { throw sprintf "%s: mapping to sequence ontology accession not found", $ref unless $so_accession; - + return $so_accession; } diff --git a/modules/t/biotype.t b/modules/t/biotype.t new file mode 100644 index 0000000000000000000000000000000000000000..2de5367999ea40ddd8e25a3bebc1885f59ddca11 --- /dev/null +++ b/modules/t/biotype.t @@ -0,0 +1,104 @@ +# Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute +# Copyright [2016-2018] EMBL-European Bioinformatics Institute +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +use strict; +use warnings; + +use Test::More; +use Test::Warnings qw( warning ); +use Test::Exception; + +use Bio::EnsEMBL::Test::MultiTestDB; +use Bio::EnsEMBL::Test::TestUtils; + +our $verbose = 0; #set to 1 to turn on debug printouts + + +# Get a DBAdaptor to from the test system +my $multi = Bio::EnsEMBL::Test::MultiTestDB->new; +ok($multi, "Test DB loaded successfully"); +my $db = $multi->get_DBAdaptor("core"); +ok($db, "Core DB adaptor loaded successfully"); + +# Should get meaningful type back +debug("get biotype adaptor"); +my $biotype_adaptor = $db->get_BiotypeAdaptor(); +ok($biotype_adaptor->isa("Bio::EnsEMBL::DBSQL::BiotypeAdaptor"), "Biotype adaptor loaded successfully"); + +# fetch a protein_coding gene object +debug("fetch gene"); +my $ga = $db->get_GeneAdaptor(); +my $gene = $ga->fetch_by_stable_id("ENSG00000171456"); +ok($gene, "Gene object loaded successfully"); + +# test gene biotype object +debug("gene biotype"); +is($gene->biotype, 'protein_coding', "Gene biotype is protein_coding"); +my $biotype1 = $gene->biotype; +ok($biotype1->isa("Bio::EnsEMBL::Biotype"), "Biotype object retrieved successfully"); +is($biotype1->object_type, 'gene', 'Biotype is from Gene object'); +is($biotype1->name, 'protein_coding', 'Biotype name is protein_coding'); +is($biotype1->biotype_group, 'coding', 'Biotype group is coding'); +is($biotype1->so_acc, 'SO:0001217', 'Biotype protein_coding refers to SO:0001217'); +throws_ok { $biotype1->so_acc('test') } qr/so_acc must be a Sequence Ontology accession/, 'so_acc() requires a SO acc like string'; +throws_ok { $biotype1->object_type('test') } qr/object_type must be gene or transcript/, 'object_type() must be gene or transcript'; + +# test transcript biotype object +my $transcript = $gene->canonical_transcript; +debug("transcript biotype"); +is($transcript->biotype, 'protein_coding', "Trancript biotype is protein_coding"); +my $biotype2 = $transcript->biotype; +ok($biotype2->isa("Bio::EnsEMBL::Biotype"), "Biotype object retrieved successfully"); +is($biotype2->object_type, 'transcript', 'Biotype is from Transcript object'); +is($biotype2->name, 'protein_coding', 'Biotype name is protein_coding'); +is($biotype2->biotype_group, 'coding', 'Biotype group is coding'); +is($biotype2->so_acc, 'SO:0000234', 'Biotype protein_coding refers to SO:0000234'); + +# set biotype with database term +debug("set biotype with db term"); +ok($gene->biotype('tRNA'), "Can successfully set biotype to tRNA"); +my $biotype3 = $gene->biotype; +ok($biotype3->isa("Bio::EnsEMBL::Biotype"), "Biotype object retrieved successfully"); +is($biotype3->object_type, 'gene', 'Biotype is from Gene object'); +is($biotype3->name, 'tRNA', 'Biotype name is tRNA'); +is($biotype3->biotype_group, 'snoncoding', 'Biotype group is snoncoding'); +is($biotype3->so_acc, 'SO:0001263', 'Biotype tRNA refers to SO:0001263'); + +# set biotype with term not in database +debug("set biotype with term not in db"); +ok($gene->biotype('dummy'), "Can successfully set biotype to dummy"); +my $biotype4 = $gene->biotype; +ok($biotype4->isa("Bio::EnsEMBL::Biotype"), "Biotype object retrieved successfully"); +is($biotype4->object_type, 'gene', 'Biotype is from Gene object'); +is($biotype4->name, 'dummy', 'Biotype name is dummy'); +is($biotype4->biotype_group, undef, 'Biotype group is not set'); +is($biotype4->so_acc, undef, 'Biotype SO acc is not set'); + +# test fetch biotypes of object_type gene +debug("fetch biotypes by object_type"); +my $biotypes = $biotype_adaptor->fetch_all_by_object_type('gene'); +is(ref $biotypes, 'ARRAY', 'Got an array'); +is(scalar @{$biotypes}, '2', 'of size 2'); +is_deeply($biotypes, [$biotype1, $biotype3], 'with the correct objects'); +my $warning = warning { $biotypes = $biotype_adaptor->fetch_all_by_object_type('none') }; +like( $warning, + qr/No objects retrieved. Check if object_type 'none' is correct./, + "Got a warning from fetch_all_by_object_type('none') ", +) or diag 'Got warning: ', explain($warning); +is(ref $biotypes, 'ARRAY', 'Got an array'); +is(scalar @{$biotypes}, '0', 'of size 0'); +is_deeply($biotypes, [], 'totally empty'); + +done_testing(); diff --git a/modules/t/test-genome-DBs/circ/core/SQLite/table.sql b/modules/t/test-genome-DBs/circ/core/SQLite/table.sql index 4bbbcd81f92d8b5ac820cc51e032063e59e7a08b..17afb48698e6cd27b96e5f881da2a86855d425cd 100644 --- a/modules/t/test-genome-DBs/circ/core/SQLite/table.sql +++ b/modules/t/test-genome-DBs/circ/core/SQLite/table.sql @@ -1,6 +1,6 @@ -- -- Created by SQL::Translator::Producer::SQLite --- Created on Fri Jan 12 13:37:14 2018 +-- Created on Fri Feb 16 15:28:14 2018 -- BEGIN TRANSACTION; @@ -131,6 +131,22 @@ CREATE TABLE attrib_type ( CREATE UNIQUE INDEX code_idx ON attrib_type (code); +-- +-- Table: biotype +-- +CREATE TABLE biotype ( + biotype_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + name varchar(64) NOT NULL, + object_type enum NOT NULL DEFAULT 'gene', + db_type varchar NOT NULL DEFAULT 'core', + attrib_type_id integer, + description text, + biotype_group enum, + so_acc varchar(64) +); + +CREATE UNIQUE INDEX name_type_idx ON biotype (name, object_type); + -- -- Table: coord_system -- diff --git a/modules/t/test-genome-DBs/circ/core/meta.txt b/modules/t/test-genome-DBs/circ/core/meta.txt index 28cac25569ab9802c89e8ebac825d9e0d7618a80..dffcb8c90f536ec4389b86a24851caaf906f25cf 100644 --- a/modules/t/test-genome-DBs/circ/core/meta.txt +++ b/modules/t/test-genome-DBs/circ/core/meta.txt @@ -117,3 +117,4 @@ 117 \N patch patch_91_92_a.sql|schema_version 118 \N patch patch_91_92_b.sql|add_cigar_line_align_type 119 \N patch patch_92_93_a.sql|schema_version +120 \N patch patch_92_93_b.sql|biotype_table diff --git a/modules/t/test-genome-DBs/circ/core/table.sql b/modules/t/test-genome-DBs/circ/core/table.sql index 471b8dd62e30de9d4fdbfaad2848de8430a63c7b..3c91030a518346f7c73a37cb94856e4f7b8e544f 100644 --- a/modules/t/test-genome-DBs/circ/core/table.sql +++ b/modules/t/test-genome-DBs/circ/core/table.sql @@ -105,6 +105,19 @@ CREATE TABLE `attrib_type` ( UNIQUE KEY `code_idx` (`code`) ) ENGINE=MyISAM AUTO_INCREMENT=391 DEFAULT CHARSET=latin1 COLLATE=latin1_bin; +CREATE TABLE `biotype` ( + `biotype_id` int(10) unsigned NOT NULL AUTO_INCREMENT, + `name` varchar(64) NOT NULL, + `object_type` enum('gene','transcript') NOT NULL DEFAULT 'gene', + `db_type` set('cdna','core','coreexpressionatlas','coreexpressionest','coreexpressiongnf','funcgen','otherfeatures','rnaseq','variation','vega','presite','sangervega') NOT NULL DEFAULT 'core', + `attrib_type_id` int(11) DEFAULT NULL, + `description` text, + `biotype_group` enum('coding','pseudogene','snoncoding','lnoncoding','mnoncoding','LRG','undefined','no_group') DEFAULT NULL, + `so_acc` varchar(64) DEFAULT NULL, + PRIMARY KEY (`biotype_id`), + UNIQUE KEY `name_type_idx` (`name`,`object_type`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1; + CREATE TABLE `coord_system` ( `coord_system_id` int(10) unsigned NOT NULL AUTO_INCREMENT, `species_id` int(10) unsigned NOT NULL DEFAULT '1', @@ -476,7 +489,7 @@ CREATE TABLE `meta` ( PRIMARY KEY (`meta_id`), UNIQUE KEY `species_key_value_idx` (`species_id`,`meta_key`,`meta_value`), KEY `species_value_idx` (`species_id`,`meta_value`) -) ENGINE=MyISAM AUTO_INCREMENT=120 DEFAULT CHARSET=latin1; +) ENGINE=MyISAM AUTO_INCREMENT=121 DEFAULT CHARSET=latin1; CREATE TABLE `meta_coord` ( `table_name` varchar(40) COLLATE latin1_bin NOT NULL DEFAULT '', diff --git a/modules/t/test-genome-DBs/homo_sapiens/core/SQLite/table.sql b/modules/t/test-genome-DBs/homo_sapiens/core/SQLite/table.sql index c3d9b866d9cb08d1a8767b594db048764a403e07..17fb6115b7009a238f6d17c006b4b3d9c4200b9e 100644 --- a/modules/t/test-genome-DBs/homo_sapiens/core/SQLite/table.sql +++ b/modules/t/test-genome-DBs/homo_sapiens/core/SQLite/table.sql @@ -1,6 +1,6 @@ -- -- Created by SQL::Translator::Producer::SQLite --- Created on Fri Jan 12 13:37:26 2018 +-- Created on Fri Feb 16 15:28:17 2018 -- BEGIN TRANSACTION; @@ -131,6 +131,22 @@ CREATE TABLE attrib_type ( CREATE UNIQUE INDEX code_idx ON attrib_type (code); +-- +-- Table: biotype +-- +CREATE TABLE biotype ( + biotype_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + name varchar(64) NOT NULL, + object_type enum NOT NULL DEFAULT 'gene', + db_type varchar NOT NULL DEFAULT 'core', + attrib_type_id integer, + description text, + biotype_group enum, + so_acc varchar(64) +); + +CREATE UNIQUE INDEX name_type_idx ON biotype (name, object_type); + -- -- Table: coord_system -- diff --git a/modules/t/test-genome-DBs/homo_sapiens/core/biotype.txt b/modules/t/test-genome-DBs/homo_sapiens/core/biotype.txt new file mode 100644 index 0000000000000000000000000000000000000000..b268b7031417fb127ebd3f8d00312e09285b4a40 --- /dev/null +++ b/modules/t/test-genome-DBs/homo_sapiens/core/biotype.txt @@ -0,0 +1,3 @@ +64 protein_coding gene core,otherfeatures,rnaseq,vega,presite \N \N coding SO:0001217 +65 protein_coding transcript core,otherfeatures,rnaseq,vega,presite \N \N coding SO:0000234 +87 tRNA gene core,otherfeatures,presite 76 \N snoncoding SO:0001263 diff --git a/modules/t/test-genome-DBs/homo_sapiens/core/meta.txt b/modules/t/test-genome-DBs/homo_sapiens/core/meta.txt index 0955043ba26b88e88208acf8735d9c3852d3134c..ce0c68d889e0beaca3a9aa2f7941640d9e0e00d1 100644 --- a/modules/t/test-genome-DBs/homo_sapiens/core/meta.txt +++ b/modules/t/test-genome-DBs/homo_sapiens/core/meta.txt @@ -96,3 +96,4 @@ 163 \N patch patch_91_92_a.sql|schema_version 164 \N patch patch_91_92_b.sql|add_cigar_line_align_type 165 \N patch patch_92_93_a.sql|schema_version +166 \N patch patch_92_93_b.sql|biotype_table diff --git a/modules/t/test-genome-DBs/homo_sapiens/core/table.sql b/modules/t/test-genome-DBs/homo_sapiens/core/table.sql index 17285d829505143bba54e17b39920c05e3a93624..74c697e37c6a1ffdff5e4bf9edcf3a5e04f7455d 100644 --- a/modules/t/test-genome-DBs/homo_sapiens/core/table.sql +++ b/modules/t/test-genome-DBs/homo_sapiens/core/table.sql @@ -105,6 +105,19 @@ CREATE TABLE `attrib_type` ( UNIQUE KEY `code_idx` (`code`) ) ENGINE=MyISAM AUTO_INCREMENT=392 DEFAULT CHARSET=latin1 COLLATE=latin1_bin; +CREATE TABLE `biotype` ( + `biotype_id` int(10) unsigned NOT NULL AUTO_INCREMENT, + `name` varchar(64) NOT NULL, + `object_type` enum('gene','transcript') NOT NULL DEFAULT 'gene', + `db_type` set('cdna','core','coreexpressionatlas','coreexpressionest','coreexpressiongnf','funcgen','otherfeatures','rnaseq','variation','vega','presite','sangervega') NOT NULL DEFAULT 'core', + `attrib_type_id` int(11) DEFAULT NULL, + `description` text, + `biotype_group` enum('coding','pseudogene','snoncoding','lnoncoding','mnoncoding','LRG','undefined','no_group') DEFAULT NULL, + `so_acc` varchar(64) DEFAULT NULL, + PRIMARY KEY (`biotype_id`), + UNIQUE KEY `name_type_idx` (`name`,`object_type`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1; + CREATE TABLE `coord_system` ( `coord_system_id` int(10) unsigned NOT NULL AUTO_INCREMENT, `species_id` int(10) unsigned NOT NULL DEFAULT '1', @@ -476,7 +489,7 @@ CREATE TABLE `meta` ( PRIMARY KEY (`meta_id`), UNIQUE KEY `species_key_value_idx` (`species_id`,`meta_key`,`meta_value`), KEY `species_value_idx` (`species_id`,`meta_value`) -) ENGINE=MyISAM AUTO_INCREMENT=166 DEFAULT CHARSET=latin1; +) ENGINE=MyISAM AUTO_INCREMENT=167 DEFAULT CHARSET=latin1; CREATE TABLE `meta_coord` ( `table_name` varchar(40) COLLATE latin1_bin NOT NULL DEFAULT '', diff --git a/modules/t/test-genome-DBs/homo_sapiens/empty/SQLite/table.sql b/modules/t/test-genome-DBs/homo_sapiens/empty/SQLite/table.sql index 22153c14926c4b10d17e6056bbed845e4867325f..1c4537079819a5f862d39eadd33f28e8fb843a76 100644 --- a/modules/t/test-genome-DBs/homo_sapiens/empty/SQLite/table.sql +++ b/modules/t/test-genome-DBs/homo_sapiens/empty/SQLite/table.sql @@ -1,6 +1,6 @@ -- -- Created by SQL::Translator::Producer::SQLite --- Created on Fri Jan 12 13:37:38 2018 +-- Created on Fri Feb 16 15:28:20 2018 -- BEGIN TRANSACTION; @@ -131,6 +131,22 @@ CREATE TABLE attrib_type ( CREATE UNIQUE INDEX code_idx ON attrib_type (code); +-- +-- Table: biotype +-- +CREATE TABLE biotype ( + biotype_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + name varchar(64) NOT NULL, + object_type enum NOT NULL DEFAULT 'gene', + db_type varchar NOT NULL DEFAULT 'core', + attrib_type_id integer, + description text, + biotype_group enum, + so_acc varchar(64) +); + +CREATE UNIQUE INDEX name_type_idx ON biotype (name, object_type); + -- -- Table: coord_system -- diff --git a/modules/t/test-genome-DBs/homo_sapiens/empty/meta.txt b/modules/t/test-genome-DBs/homo_sapiens/empty/meta.txt index 83f1c3852f28ebfbfe975b0d2d9f1756be9970e8..b278b9a9434745b59dfcaa2e5b70cd2a0ef4f94e 100644 --- a/modules/t/test-genome-DBs/homo_sapiens/empty/meta.txt +++ b/modules/t/test-genome-DBs/homo_sapiens/empty/meta.txt @@ -96,3 +96,4 @@ 145 \N patch patch_91_92_a.sql|schema_version 146 \N patch patch_91_92_b.sql|add_cigar_line_align_type 147 \N patch patch_92_93_a.sql|schema_version +148 \N patch patch_92_93_b.sql|biotype_table diff --git a/modules/t/test-genome-DBs/homo_sapiens/empty/table.sql b/modules/t/test-genome-DBs/homo_sapiens/empty/table.sql index 4cf4d739071929b41c70253ee02b2c7cbecfe6d1..74e6dce2fbc8c25343f73feb4e49fe5c13fcd088 100644 --- a/modules/t/test-genome-DBs/homo_sapiens/empty/table.sql +++ b/modules/t/test-genome-DBs/homo_sapiens/empty/table.sql @@ -105,6 +105,19 @@ CREATE TABLE `attrib_type` ( UNIQUE KEY `code_idx` (`code`) ) ENGINE=MyISAM AUTO_INCREMENT=391 DEFAULT CHARSET=latin1 COLLATE=latin1_bin; +CREATE TABLE `biotype` ( + `biotype_id` int(10) unsigned NOT NULL AUTO_INCREMENT, + `name` varchar(64) NOT NULL, + `object_type` enum('gene','transcript') NOT NULL DEFAULT 'gene', + `db_type` set('cdna','core','coreexpressionatlas','coreexpressionest','coreexpressiongnf','funcgen','otherfeatures','rnaseq','variation','vega','presite','sangervega') NOT NULL DEFAULT 'core', + `attrib_type_id` int(11) DEFAULT NULL, + `description` text, + `biotype_group` enum('coding','pseudogene','snoncoding','lnoncoding','mnoncoding','LRG','undefined','no_group') DEFAULT NULL, + `so_acc` varchar(64) DEFAULT NULL, + PRIMARY KEY (`biotype_id`), + UNIQUE KEY `name_type_idx` (`name`,`object_type`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1; + CREATE TABLE `coord_system` ( `coord_system_id` int(10) unsigned NOT NULL AUTO_INCREMENT, `species_id` int(10) unsigned NOT NULL DEFAULT '1', @@ -476,7 +489,7 @@ CREATE TABLE `meta` ( PRIMARY KEY (`meta_id`), UNIQUE KEY `species_key_value_idx` (`species_id`,`meta_key`,`meta_value`), KEY `species_value_idx` (`species_id`,`meta_value`) -) ENGINE=MyISAM AUTO_INCREMENT=148 DEFAULT CHARSET=latin1; +) ENGINE=MyISAM AUTO_INCREMENT=149 DEFAULT CHARSET=latin1; CREATE TABLE `meta_coord` ( `table_name` varchar(40) COLLATE latin1_bin NOT NULL DEFAULT '', diff --git a/modules/t/test-genome-DBs/homo_sapiens/patch/SQLite/table.sql b/modules/t/test-genome-DBs/homo_sapiens/patch/SQLite/table.sql index 349936ad5ad702ad784401e9d829e1128ff807e4..e2a3a8a6962ac58d6997c4e73606ca2c3d6c39b7 100644 --- a/modules/t/test-genome-DBs/homo_sapiens/patch/SQLite/table.sql +++ b/modules/t/test-genome-DBs/homo_sapiens/patch/SQLite/table.sql @@ -1,6 +1,6 @@ -- -- Created by SQL::Translator::Producer::SQLite --- Created on Fri Jan 12 13:37:50 2018 +-- Created on Fri Feb 16 15:28:23 2018 -- BEGIN TRANSACTION; @@ -131,6 +131,22 @@ CREATE TABLE attrib_type ( CREATE UNIQUE INDEX code_idx ON attrib_type (code); +-- +-- Table: biotype +-- +CREATE TABLE biotype ( + biotype_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + name varchar(64) NOT NULL, + object_type enum NOT NULL DEFAULT 'gene', + db_type varchar NOT NULL DEFAULT 'core', + attrib_type_id integer, + description text, + biotype_group enum, + so_acc varchar(64) +); + +CREATE UNIQUE INDEX name_type_idx ON biotype (name, object_type); + -- -- Table: coord_system -- diff --git a/modules/t/test-genome-DBs/homo_sapiens/patch/meta.txt b/modules/t/test-genome-DBs/homo_sapiens/patch/meta.txt index 2469440ef6f48e949ea97c6ea396f679f5691913..38b3c829d04b02a6b3f6875c1c3a36bdad7b21ed 100644 --- a/modules/t/test-genome-DBs/homo_sapiens/patch/meta.txt +++ b/modules/t/test-genome-DBs/homo_sapiens/patch/meta.txt @@ -101,3 +101,4 @@ 2108 \N patch patch_91_92_a.sql|schema_version 2109 \N patch patch_91_92_b.sql|add_cigar_line_align_type 2110 \N patch patch_92_93_a.sql|schema_version +2111 \N patch patch_92_93_b.sql|biotype_table diff --git a/modules/t/test-genome-DBs/homo_sapiens/patch/table.sql b/modules/t/test-genome-DBs/homo_sapiens/patch/table.sql index 1e5f50227fca4970fb8bba8eed0cb4be52bd93ba..296a1fc8d92384c7968dcf09281e9313a3a6082b 100644 --- a/modules/t/test-genome-DBs/homo_sapiens/patch/table.sql +++ b/modules/t/test-genome-DBs/homo_sapiens/patch/table.sql @@ -105,6 +105,19 @@ CREATE TABLE `attrib_type` ( UNIQUE KEY `code_idx` (`code`) ) ENGINE=MyISAM AUTO_INCREMENT=407 DEFAULT CHARSET=latin1 COLLATE=latin1_bin; +CREATE TABLE `biotype` ( + `biotype_id` int(10) unsigned NOT NULL AUTO_INCREMENT, + `name` varchar(64) NOT NULL, + `object_type` enum('gene','transcript') NOT NULL DEFAULT 'gene', + `db_type` set('cdna','core','coreexpressionatlas','coreexpressionest','coreexpressiongnf','funcgen','otherfeatures','rnaseq','variation','vega','presite','sangervega') NOT NULL DEFAULT 'core', + `attrib_type_id` int(11) DEFAULT NULL, + `description` text, + `biotype_group` enum('coding','pseudogene','snoncoding','lnoncoding','mnoncoding','LRG','undefined','no_group') DEFAULT NULL, + `so_acc` varchar(64) DEFAULT NULL, + PRIMARY KEY (`biotype_id`), + UNIQUE KEY `name_type_idx` (`name`,`object_type`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1; + CREATE TABLE `coord_system` ( `coord_system_id` int(10) unsigned NOT NULL AUTO_INCREMENT, `species_id` int(10) unsigned NOT NULL DEFAULT '1', @@ -476,7 +489,7 @@ CREATE TABLE `meta` ( PRIMARY KEY (`meta_id`), UNIQUE KEY `species_key_value_idx` (`species_id`,`meta_key`,`meta_value`), KEY `species_value_idx` (`species_id`,`meta_value`) -) ENGINE=MyISAM AUTO_INCREMENT=2111 DEFAULT CHARSET=latin1; +) ENGINE=MyISAM AUTO_INCREMENT=2112 DEFAULT CHARSET=latin1; CREATE TABLE `meta_coord` ( `table_name` varchar(40) COLLATE latin1_bin NOT NULL DEFAULT '', diff --git a/modules/t/test-genome-DBs/mapping/core/SQLite/table.sql b/modules/t/test-genome-DBs/mapping/core/SQLite/table.sql index 0db2d3854365064020270df91d1b4b41e8fc7f8b..cb493e4fb5c42d297903ee27d529e2c31c69c02d 100644 --- a/modules/t/test-genome-DBs/mapping/core/SQLite/table.sql +++ b/modules/t/test-genome-DBs/mapping/core/SQLite/table.sql @@ -1,6 +1,6 @@ -- -- Created by SQL::Translator::Producer::SQLite --- Created on Fri Jan 12 13:38:03 2018 +-- Created on Fri Feb 16 15:28:26 2018 -- BEGIN TRANSACTION; @@ -131,6 +131,22 @@ CREATE TABLE attrib_type ( CREATE UNIQUE INDEX code_idx ON attrib_type (code); +-- +-- Table: biotype +-- +CREATE TABLE biotype ( + biotype_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + name varchar(64) NOT NULL, + object_type enum NOT NULL DEFAULT 'gene', + db_type varchar NOT NULL DEFAULT 'core', + attrib_type_id integer, + description text, + biotype_group enum, + so_acc varchar(64) +); + +CREATE UNIQUE INDEX name_type_idx ON biotype (name, object_type); + -- -- Table: coord_system -- diff --git a/modules/t/test-genome-DBs/mapping/core/meta.txt b/modules/t/test-genome-DBs/mapping/core/meta.txt index 1d0729ea246dea8469e631201d9b3e67f00192e3..821b22a77d83746d22f863cf0398f8bd786a232a 100644 --- a/modules/t/test-genome-DBs/mapping/core/meta.txt +++ b/modules/t/test-genome-DBs/mapping/core/meta.txt @@ -57,3 +57,4 @@ 150 \N patch patch_91_92_a.sql|schema_version 151 \N patch patch_91_92_b.sql|add_cigar_line_align_type 152 \N patch patch_92_93_a.sql|schema_version +153 \N patch patch_92_93_b.sql|biotype_table diff --git a/modules/t/test-genome-DBs/mapping/core/table.sql b/modules/t/test-genome-DBs/mapping/core/table.sql index 908015077cdb646b5ba66b411ed26ff78d15f42f..eb478505b7e8030fa573aa4a88f64a2241ed828d 100644 --- a/modules/t/test-genome-DBs/mapping/core/table.sql +++ b/modules/t/test-genome-DBs/mapping/core/table.sql @@ -105,6 +105,19 @@ CREATE TABLE `attrib_type` ( UNIQUE KEY `code_idx` (`code`) ) ENGINE=MyISAM AUTO_INCREMENT=391 DEFAULT CHARSET=latin1 COLLATE=latin1_bin; +CREATE TABLE `biotype` ( + `biotype_id` int(10) unsigned NOT NULL AUTO_INCREMENT, + `name` varchar(64) NOT NULL, + `object_type` enum('gene','transcript') NOT NULL DEFAULT 'gene', + `db_type` set('cdna','core','coreexpressionatlas','coreexpressionest','coreexpressiongnf','funcgen','otherfeatures','rnaseq','variation','vega','presite','sangervega') NOT NULL DEFAULT 'core', + `attrib_type_id` int(11) DEFAULT NULL, + `description` text, + `biotype_group` enum('coding','pseudogene','snoncoding','lnoncoding','mnoncoding','LRG','undefined','no_group') DEFAULT NULL, + `so_acc` varchar(64) DEFAULT NULL, + PRIMARY KEY (`biotype_id`), + UNIQUE KEY `name_type_idx` (`name`,`object_type`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1; + CREATE TABLE `coord_system` ( `coord_system_id` int(10) unsigned NOT NULL AUTO_INCREMENT, `species_id` int(10) unsigned NOT NULL DEFAULT '1', @@ -476,7 +489,7 @@ CREATE TABLE `meta` ( PRIMARY KEY (`meta_id`), UNIQUE KEY `species_key_value_idx` (`species_id`,`meta_key`,`meta_value`), KEY `species_value_idx` (`species_id`,`meta_value`) -) ENGINE=MyISAM AUTO_INCREMENT=153 DEFAULT CHARSET=latin1; +) ENGINE=MyISAM AUTO_INCREMENT=154 DEFAULT CHARSET=latin1; CREATE TABLE `meta_coord` ( `table_name` varchar(40) COLLATE latin1_bin NOT NULL DEFAULT '', diff --git a/modules/t/test-genome-DBs/multi/compara/SQLite/table.sql b/modules/t/test-genome-DBs/multi/compara/SQLite/table.sql index 36c17b641cc7887bc1db37dce350a4098b34e26a..ebb012cd1cfbdc5a87d395f6b73bb30c9549c4f3 100644 --- a/modules/t/test-genome-DBs/multi/compara/SQLite/table.sql +++ b/modules/t/test-genome-DBs/multi/compara/SQLite/table.sql @@ -1,6 +1,6 @@ -- -- Created by SQL::Translator::Producer::SQLite --- Created on Fri Jan 12 13:38:11 2018 +-- Created on Fri Feb 16 15:28:28 2018 -- BEGIN TRANSACTION; diff --git a/modules/t/test-genome-DBs/mus_musculus/core/SQLite/table.sql b/modules/t/test-genome-DBs/mus_musculus/core/SQLite/table.sql index 2c76e6cffc8ffa25a4db0801306c6642a75f9ceb..2665152efbd1531ac1937384f49afd5bcabbe5dc 100644 --- a/modules/t/test-genome-DBs/mus_musculus/core/SQLite/table.sql +++ b/modules/t/test-genome-DBs/mus_musculus/core/SQLite/table.sql @@ -1,6 +1,6 @@ -- -- Created by SQL::Translator::Producer::SQLite --- Created on Fri Jan 12 13:38:22 2018 +-- Created on Fri Feb 16 15:28:31 2018 -- BEGIN TRANSACTION; @@ -131,6 +131,22 @@ CREATE TABLE attrib_type ( CREATE UNIQUE INDEX code_idx ON attrib_type (code); +-- +-- Table: biotype +-- +CREATE TABLE biotype ( + biotype_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + name varchar(64) NOT NULL, + object_type enum NOT NULL DEFAULT 'gene', + db_type varchar NOT NULL DEFAULT 'core', + attrib_type_id integer, + description text, + biotype_group enum, + so_acc varchar(64) +); + +CREATE UNIQUE INDEX name_type_idx ON biotype (name, object_type); + -- -- Table: coord_system -- diff --git a/modules/t/test-genome-DBs/mus_musculus/core/meta.txt b/modules/t/test-genome-DBs/mus_musculus/core/meta.txt index d816a2a67fb8dbdd5983508cec77ca674099f1cc..ff86c58769022661cafa1837043a8746dde06cea 100644 --- a/modules/t/test-genome-DBs/mus_musculus/core/meta.txt +++ b/modules/t/test-genome-DBs/mus_musculus/core/meta.txt @@ -174,3 +174,4 @@ 1686 \N patch patch_91_92_a.sql|schema_version 1687 \N patch patch_91_92_b.sql|add_cigar_line_align_type 1688 \N patch patch_92_93_a.sql|schema_version +1689 \N patch patch_92_93_b.sql|biotype_table diff --git a/modules/t/test-genome-DBs/mus_musculus/core/table.sql b/modules/t/test-genome-DBs/mus_musculus/core/table.sql index 2236b585330ebea7636cf605477960cf6541949f..d9f10455113302da903c98078434459e60c2e6cf 100644 --- a/modules/t/test-genome-DBs/mus_musculus/core/table.sql +++ b/modules/t/test-genome-DBs/mus_musculus/core/table.sql @@ -105,6 +105,19 @@ CREATE TABLE `attrib_type` ( UNIQUE KEY `code_idx` (`code`) ) ENGINE=MyISAM AUTO_INCREMENT=508 DEFAULT CHARSET=latin1 COLLATE=latin1_bin; +CREATE TABLE `biotype` ( + `biotype_id` int(10) unsigned NOT NULL AUTO_INCREMENT, + `name` varchar(64) NOT NULL, + `object_type` enum('gene','transcript') NOT NULL DEFAULT 'gene', + `db_type` set('cdna','core','coreexpressionatlas','coreexpressionest','coreexpressiongnf','funcgen','otherfeatures','rnaseq','variation','vega','presite','sangervega') NOT NULL DEFAULT 'core', + `attrib_type_id` int(11) DEFAULT NULL, + `description` text, + `biotype_group` enum('coding','pseudogene','snoncoding','lnoncoding','mnoncoding','LRG','undefined','no_group') DEFAULT NULL, + `so_acc` varchar(64) DEFAULT NULL, + PRIMARY KEY (`biotype_id`), + UNIQUE KEY `name_type_idx` (`name`,`object_type`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1; + CREATE TABLE `coord_system` ( `coord_system_id` int(10) unsigned NOT NULL AUTO_INCREMENT, `species_id` int(10) unsigned NOT NULL DEFAULT '1', @@ -476,7 +489,7 @@ CREATE TABLE `meta` ( PRIMARY KEY (`meta_id`), UNIQUE KEY `species_key_value_idx` (`species_id`,`meta_key`,`meta_value`), KEY `species_value_idx` (`species_id`,`meta_value`) -) ENGINE=MyISAM AUTO_INCREMENT=1689 DEFAULT CHARSET=latin1; +) ENGINE=MyISAM AUTO_INCREMENT=1690 DEFAULT CHARSET=latin1; CREATE TABLE `meta_coord` ( `table_name` varchar(40) COLLATE latin1_bin NOT NULL DEFAULT '', diff --git a/modules/t/test-genome-DBs/nameless/core/SQLite/table.sql b/modules/t/test-genome-DBs/nameless/core/SQLite/table.sql index 892c8e39122c737a6602491c511321982e745517..fd07f4343e34a059108a0b3c168e6ff6ba1153ee 100644 --- a/modules/t/test-genome-DBs/nameless/core/SQLite/table.sql +++ b/modules/t/test-genome-DBs/nameless/core/SQLite/table.sql @@ -1,6 +1,6 @@ -- -- Created by SQL::Translator::Producer::SQLite --- Created on Fri Jan 12 13:38:33 2018 +-- Created on Fri Feb 16 15:28:34 2018 -- BEGIN TRANSACTION; @@ -131,6 +131,22 @@ CREATE TABLE attrib_type ( CREATE UNIQUE INDEX code_idx ON attrib_type (code); +-- +-- Table: biotype +-- +CREATE TABLE biotype ( + biotype_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + name varchar(64) NOT NULL, + object_type enum NOT NULL DEFAULT 'gene', + db_type varchar NOT NULL DEFAULT 'core', + attrib_type_id integer, + description text, + biotype_group enum, + so_acc varchar(64) +); + +CREATE UNIQUE INDEX name_type_idx ON biotype (name, object_type); + -- -- Table: coord_system -- diff --git a/modules/t/test-genome-DBs/nameless/core/meta.txt b/modules/t/test-genome-DBs/nameless/core/meta.txt index 9d6ec55e30b071db49964c2f0c69150b77c93832..212944a0daccf81172655ba76b1a656f428ba803 100644 --- a/modules/t/test-genome-DBs/nameless/core/meta.txt +++ b/modules/t/test-genome-DBs/nameless/core/meta.txt @@ -95,3 +95,4 @@ 149 \N patch patch_91_92_a.sql|schema_version 150 \N patch patch_91_92_b.sql|add_cigar_line_align_type 151 \N patch patch_92_93_a.sql|schema_version +152 \N patch patch_92_93_b.sql|biotype_table diff --git a/modules/t/test-genome-DBs/nameless/core/table.sql b/modules/t/test-genome-DBs/nameless/core/table.sql index caa481fe87238dbd267da2872e300a83e49f6176..5205f74b4320519bcb13da92eb61db2f31446ca1 100644 --- a/modules/t/test-genome-DBs/nameless/core/table.sql +++ b/modules/t/test-genome-DBs/nameless/core/table.sql @@ -105,6 +105,19 @@ CREATE TABLE `attrib_type` ( UNIQUE KEY `code_idx` (`code`) ) ENGINE=MyISAM AUTO_INCREMENT=391 DEFAULT CHARSET=latin1 COLLATE=latin1_bin; +CREATE TABLE `biotype` ( + `biotype_id` int(10) unsigned NOT NULL AUTO_INCREMENT, + `name` varchar(64) NOT NULL, + `object_type` enum('gene','transcript') NOT NULL DEFAULT 'gene', + `db_type` set('cdna','core','coreexpressionatlas','coreexpressionest','coreexpressiongnf','funcgen','otherfeatures','rnaseq','variation','vega','presite','sangervega') NOT NULL DEFAULT 'core', + `attrib_type_id` int(11) DEFAULT NULL, + `description` text, + `biotype_group` enum('coding','pseudogene','snoncoding','lnoncoding','mnoncoding','LRG','undefined','no_group') DEFAULT NULL, + `so_acc` varchar(64) DEFAULT NULL, + PRIMARY KEY (`biotype_id`), + UNIQUE KEY `name_type_idx` (`name`,`object_type`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1; + CREATE TABLE `coord_system` ( `coord_system_id` int(10) unsigned NOT NULL AUTO_INCREMENT, `species_id` int(10) unsigned NOT NULL DEFAULT '1', @@ -466,7 +479,7 @@ CREATE TABLE `meta` ( PRIMARY KEY (`meta_id`), UNIQUE KEY `species_key_value_idx` (`species_id`,`meta_key`,`meta_value`), KEY `species_value_idx` (`species_id`,`meta_value`) -) ENGINE=MyISAM AUTO_INCREMENT=152 DEFAULT CHARSET=latin1; +) ENGINE=MyISAM AUTO_INCREMENT=153 DEFAULT CHARSET=latin1; CREATE TABLE `meta_coord` ( `table_name` varchar(40) COLLATE latin1_bin NOT NULL DEFAULT '', diff --git a/modules/t/test-genome-DBs/ontology/ontology/SQLite/table.sql b/modules/t/test-genome-DBs/ontology/ontology/SQLite/table.sql index 75c3743a884c86a99a0f312280883a7b75e26246..da737bcf2def51421f73b8ab25dfb20520819f98 100644 --- a/modules/t/test-genome-DBs/ontology/ontology/SQLite/table.sql +++ b/modules/t/test-genome-DBs/ontology/ontology/SQLite/table.sql @@ -1,6 +1,6 @@ -- -- Created by SQL::Translator::Producer::SQLite --- Created on Fri Jan 12 13:38:37 2018 +-- Created on Fri Feb 16 15:28:35 2018 -- BEGIN TRANSACTION; diff --git a/modules/t/test-genome-DBs/polyploidy/core/SQLite/table.sql b/modules/t/test-genome-DBs/polyploidy/core/SQLite/table.sql index 48e15c0fcf510d796ee436137c89f6c99d92045e..c0a1547915b024740bde290ea22a64a319bc13e4 100644 --- a/modules/t/test-genome-DBs/polyploidy/core/SQLite/table.sql +++ b/modules/t/test-genome-DBs/polyploidy/core/SQLite/table.sql @@ -1,6 +1,6 @@ -- -- Created by SQL::Translator::Producer::SQLite --- Created on Fri Jan 12 13:38:48 2018 +-- Created on Fri Feb 16 15:28:38 2018 -- BEGIN TRANSACTION; @@ -131,6 +131,22 @@ CREATE TABLE attrib_type ( CREATE UNIQUE INDEX code_idx ON attrib_type (code); +-- +-- Table: biotype +-- +CREATE TABLE biotype ( + biotype_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + name varchar(64) NOT NULL, + object_type enum NOT NULL DEFAULT 'gene', + db_type varchar NOT NULL DEFAULT 'core', + attrib_type_id integer, + description text, + biotype_group enum, + so_acc varchar(64) +); + +CREATE UNIQUE INDEX name_type_idx ON biotype (name, object_type); + -- -- Table: coord_system -- diff --git a/modules/t/test-genome-DBs/polyploidy/core/meta.txt b/modules/t/test-genome-DBs/polyploidy/core/meta.txt index 4bfae9f1880fd70bb529a702734d9fcfeaff3f83..e9a2779ee13cec84a47f0e1f35a277413c433eb2 100644 --- a/modules/t/test-genome-DBs/polyploidy/core/meta.txt +++ b/modules/t/test-genome-DBs/polyploidy/core/meta.txt @@ -150,3 +150,4 @@ 230 \N patch patch_91_92_a.sql|schema_version 231 \N patch patch_91_92_b.sql|add_cigar_line_align_type 232 \N patch patch_92_93_a.sql|schema_version +233 \N patch patch_92_93_b.sql|biotype_table diff --git a/modules/t/test-genome-DBs/polyploidy/core/table.sql b/modules/t/test-genome-DBs/polyploidy/core/table.sql index 60080e68ef3d34f0aac878f1b164d4b5423791f6..026969d58e10b54181d5e68d258a1ad6ddf4b995 100644 --- a/modules/t/test-genome-DBs/polyploidy/core/table.sql +++ b/modules/t/test-genome-DBs/polyploidy/core/table.sql @@ -105,6 +105,19 @@ CREATE TABLE `attrib_type` ( UNIQUE KEY `code_idx` (`code`) ) ENGINE=MyISAM AUTO_INCREMENT=437 DEFAULT CHARSET=latin1 COLLATE=latin1_bin; +CREATE TABLE `biotype` ( + `biotype_id` int(10) unsigned NOT NULL AUTO_INCREMENT, + `name` varchar(64) NOT NULL, + `object_type` enum('gene','transcript') NOT NULL DEFAULT 'gene', + `db_type` set('cdna','core','coreexpressionatlas','coreexpressionest','coreexpressiongnf','funcgen','otherfeatures','rnaseq','variation','vega','presite','sangervega') NOT NULL DEFAULT 'core', + `attrib_type_id` int(11) DEFAULT NULL, + `description` text, + `biotype_group` enum('coding','pseudogene','snoncoding','lnoncoding','mnoncoding','LRG','undefined','no_group') DEFAULT NULL, + `so_acc` varchar(64) DEFAULT NULL, + PRIMARY KEY (`biotype_id`), + UNIQUE KEY `name_type_idx` (`name`,`object_type`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1; + CREATE TABLE `coord_system` ( `coord_system_id` int(10) unsigned NOT NULL AUTO_INCREMENT, `species_id` int(10) unsigned NOT NULL DEFAULT '1', @@ -476,7 +489,7 @@ CREATE TABLE `meta` ( PRIMARY KEY (`meta_id`), UNIQUE KEY `species_key_value_idx` (`species_id`,`meta_key`,`meta_value`), KEY `species_value_idx` (`species_id`,`meta_value`) -) ENGINE=MyISAM AUTO_INCREMENT=233 DEFAULT CHARSET=latin1; +) ENGINE=MyISAM AUTO_INCREMENT=234 DEFAULT CHARSET=latin1; CREATE TABLE `meta_coord` ( `table_name` varchar(40) COLLATE latin1_bin NOT NULL DEFAULT '', diff --git a/modules/t/test-genome-DBs/test_collection/core/SQLite/table.sql b/modules/t/test-genome-DBs/test_collection/core/SQLite/table.sql index c778813752fcc507bcdc6a2bb3ace327bb43eaba..30e34243b3d6ed21ca8726af1e0ca13f605dd48c 100644 --- a/modules/t/test-genome-DBs/test_collection/core/SQLite/table.sql +++ b/modules/t/test-genome-DBs/test_collection/core/SQLite/table.sql @@ -1,6 +1,6 @@ -- -- Created by SQL::Translator::Producer::SQLite --- Created on Fri Jan 12 13:38:59 2018 +-- Created on Fri Feb 16 15:28:41 2018 -- BEGIN TRANSACTION; @@ -131,6 +131,22 @@ CREATE TABLE attrib_type ( CREATE UNIQUE INDEX code_idx ON attrib_type (code); +-- +-- Table: biotype +-- +CREATE TABLE biotype ( + biotype_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + name varchar(64) NOT NULL, + object_type enum NOT NULL DEFAULT 'gene', + db_type varchar NOT NULL DEFAULT 'core', + attrib_type_id integer, + description text, + biotype_group enum, + so_acc varchar(64) +); + +CREATE UNIQUE INDEX name_type_idx ON biotype (name, object_type); + -- -- Table: coord_system -- diff --git a/modules/t/test-genome-DBs/test_collection/core/meta.txt b/modules/t/test-genome-DBs/test_collection/core/meta.txt index 08490b7da9f8f8206bce3ff575468ebfebf85103..dfb025ae25824290a13fcebf3914e18f391c05f7 100644 --- a/modules/t/test-genome-DBs/test_collection/core/meta.txt +++ b/modules/t/test-genome-DBs/test_collection/core/meta.txt @@ -170,3 +170,4 @@ 212 \N patch patch_91_92_a.sql|schema_version 213 \N patch patch_91_92_b.sql|add_cigar_line_align_type 214 \N patch patch_92_93_a.sql|schema_version +215 \N patch patch_92_93_b.sql|biotype_table diff --git a/modules/t/test-genome-DBs/test_collection/core/table.sql b/modules/t/test-genome-DBs/test_collection/core/table.sql index a83c8221fad827c8540d83171fd0f582a0a93d3d..4b4bdca6fe5bdd44b61b733822c1cb75849bd0ec 100644 --- a/modules/t/test-genome-DBs/test_collection/core/table.sql +++ b/modules/t/test-genome-DBs/test_collection/core/table.sql @@ -105,6 +105,19 @@ CREATE TABLE `attrib_type` ( UNIQUE KEY `code_idx` (`code`) ) ENGINE=MyISAM AUTO_INCREMENT=391 DEFAULT CHARSET=latin1 COLLATE=latin1_bin; +CREATE TABLE `biotype` ( + `biotype_id` int(10) unsigned NOT NULL AUTO_INCREMENT, + `name` varchar(64) NOT NULL, + `object_type` enum('gene','transcript') NOT NULL DEFAULT 'gene', + `db_type` set('cdna','core','coreexpressionatlas','coreexpressionest','coreexpressiongnf','funcgen','otherfeatures','rnaseq','variation','vega','presite','sangervega') NOT NULL DEFAULT 'core', + `attrib_type_id` int(11) DEFAULT NULL, + `description` text, + `biotype_group` enum('coding','pseudogene','snoncoding','lnoncoding','mnoncoding','LRG','undefined','no_group') DEFAULT NULL, + `so_acc` varchar(64) DEFAULT NULL, + PRIMARY KEY (`biotype_id`), + UNIQUE KEY `name_type_idx` (`name`,`object_type`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1; + CREATE TABLE `coord_system` ( `coord_system_id` int(10) unsigned NOT NULL AUTO_INCREMENT, `species_id` int(10) unsigned NOT NULL DEFAULT '1', @@ -466,7 +479,7 @@ CREATE TABLE `meta` ( PRIMARY KEY (`meta_id`), UNIQUE KEY `species_key_value_idx` (`species_id`,`meta_key`,`meta_value`), KEY `species_value_idx` (`species_id`,`meta_value`) -) ENGINE=MyISAM AUTO_INCREMENT=215 DEFAULT CHARSET=latin1; +) ENGINE=MyISAM AUTO_INCREMENT=216 DEFAULT CHARSET=latin1; CREATE TABLE `meta_coord` ( `table_name` varchar(40) COLLATE latin1_bin NOT NULL DEFAULT '', diff --git a/sql/patch_92_93_b.sql b/sql/patch_92_93_b.sql new file mode 100644 index 0000000000000000000000000000000000000000..1fee232a2e1d791383823fac5e89c2e0b6d0a084 --- /dev/null +++ b/sql/patch_92_93_b.sql @@ -0,0 +1,38 @@ +-- Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute +-- Copyright [2016-2018] EMBL-European Bioinformatics Institute +-- +-- Licensed under the Apache License, Version 2.0 (the "License"); +-- you may not use this file except in compliance with the License. +-- You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. + +# patch_92_93_b.sql +# +# Title: Added biotype table +# +# Description: +# Added new table biotype + +CREATE TABLE biotype ( + biotype_id INTEGER UNSIGNED NOT NULL AUTO_INCREMENT, + name VARCHAR(64) NOT NULL, + object_type ENUM('gene','transcript') NOT NULL DEFAULT 'gene', + db_type set('cdna','core','coreexpressionatlas','coreexpressionest','coreexpressiongnf','funcgen','otherfeatures','rnaseq','variation','vega','presite','sangervega') NOT NULL DEFAULT 'core', + attrib_type_id INTEGER DEFAULT NULL, + description TEXT, + biotype_group ENUM('coding','pseudogene','snoncoding','lnoncoding','mnoncoding','LRG','undefined','no_group') DEFAULT NULL, + so_acc VARCHAR(64), + PRIMARY KEY (biotype_id), + UNIQUE KEY name_type_idx (name, object_type) +) ENGINE=MyISAM DEFAULT CHARSET=latin1; + +# patch identifier +INSERT INTO meta (species_id, meta_key, meta_value) + VALUES (NULL, 'patch', 'patch_92_93_b.sql|biotype_table'); diff --git a/sql/table.sql b/sql/table.sql index 18e6b1f44d9c37dab3e32bd7ca2d2646bca6ba8e..0dfb201ba1b535281f3f4c701ffc50fb07898ea5 100755 --- a/sql/table.sql +++ b/sql/table.sql @@ -310,6 +310,9 @@ INSERT INTO meta (species_id, meta_key, meta_value) VALUES INSERT INTO meta (species_id, meta_key, meta_value) VALUES (NULL, 'patch', 'patch_92_93_a.sql|schema_version'); +INSERT INTO meta (species_id, meta_key, meta_value) + VALUES (NULL, 'patch', 'patch_92_93_b.sql|biotype_table'); + /** @table meta_coord @desc Describes which co-ordinate systems the different feature tables use. @@ -2110,6 +2113,39 @@ CREATE TABLE external_db ( ) COLLATE=latin1_swedish_ci ENGINE=MyISAM; +/** +@table biotype +@desc Stores data about the biotypes and mappings to Sequence Ontology. + +@column biotype_id Primary key, internal identifier. +@column name Ensembl biotype name. +@column object_type Ensembl object type: 'gene' or 'transcript'. +@column db_type Type, e.g. 'cdna', 'core', 'coreexpressionatlas', 'coreexpressionest', 'coreexpressiongnf', 'funcgen', 'otherfeatures', 'rnaseq', 'variation', 'vega', 'presite', 'sangervega' +@column attrib_type_id Foreign key references to the @link attrib_type table. +@column description Description. +@column biotype_group Group, e.g. 'coding', 'pseudogene', 'snoncoding', 'lnoncoding', 'mnoncoding', 'LRG', 'undefined', 'no_group' +@column so_acc Sequence Ontology accession of the biotype. + + +@see attrib_type + +*/ + + +CREATE TABLE biotype ( + biotype_id INTEGER UNSIGNED NOT NULL AUTO_INCREMENT, + name VARCHAR(64) NOT NULL, + object_type ENUM('gene','transcript') NOT NULL DEFAULT 'gene', + db_type set('cdna','core','coreexpressionatlas','coreexpressionest','coreexpressiongnf','funcgen','otherfeatures','rnaseq','variation','vega','presite','sangervega') NOT NULL DEFAULT 'core', + attrib_type_id INTEGER DEFAULT NULL, + description TEXT, + biotype_group ENUM('coding','pseudogene','snoncoding','lnoncoding','mnoncoding','LRG','undefined','no_group') DEFAULT NULL, + so_acc VARCHAR(64), + PRIMARY KEY (biotype_id), + UNIQUE KEY name_type_idx (name, object_type) +) ENGINE=MyISAM DEFAULT CHARSET=latin1; + + /** @table external_synonym @desc Some xref objects can be referred to by more than one name. This table relates names to xref IDs.