Unverified Commit a5e85187 authored by Marek Szuba's avatar Marek Szuba Committed by GitHub
Browse files

Merge pull request #371 from Ensembl/feature/rna-products

Add support for mature RNA products of transcripts (e.g. MicroRNA) to the API and schema
parents 91f08607 558a113a
...@@ -12,6 +12,7 @@ core Exon ...@@ -12,6 +12,7 @@ core Exon
Translation Translation
Operon Operon
OperonTranscript OperonTranscript
RNAProduct
For Gene, Transcript and Translation, an archive_id_lookup is also created, containing all previously used ids For Gene, Transcript and Translation, an archive_id_lookup is also created, containing all previously used ids
......
...@@ -37,6 +37,7 @@ my %group_objects = ( ...@@ -37,6 +37,7 @@ my %group_objects = (
Transcript => 1, Transcript => 1,
Translation => 1, Translation => 1,
Operon => 1, Operon => 1,
RNAProduct => 1,
# OperonTranscript => 1, # these are in transcript table anyway # OperonTranscript => 1, # these are in transcript table anyway
GeneArchive => 1, GeneArchive => 1,
...@@ -417,7 +418,7 @@ sub load_ids { ...@@ -417,7 +418,7 @@ sub load_ids {
my $rows_inserted = build_insert_sql( $select_sql, $dbh_read, $dbh_write, $is_archive ); my $rows_inserted = build_insert_sql( $select_sql, $dbh_read, $dbh_write, $is_archive );
} }
elsif ( $object_name =~ /Translation/ ) { elsif ( $object_name =~ / RNAProduct | Translation /x ) {
my $sth = $dbh_read->prepare("SELECT COUNT(*) FROM $object"); my $sth = $dbh_read->prepare("SELECT COUNT(*) FROM $object");
$sth->execute(); $sth->execute();
my ($count) = $sth->fetchrow_array; my ($count) = $sth->fetchrow_array;
......
...@@ -291,6 +291,22 @@ sub store_on_Transcript { ...@@ -291,6 +291,22 @@ sub store_on_Transcript {
return; return;
} }
sub store_on_RNAProduct {
my ($self, $object, $attributes) = @_;
my $object_id;
if (!ref($object)) {
$object_id = $object;
}
else {
$object_id = $object->dbID();
}
$self->store_on_Object($object_id, $attributes, 'rnaproduct');
return;
}
sub store_on_Translation { sub store_on_Translation {
my ($self, $object, $attributes) = @_; my ($self, $object, $attributes) = @_;
...@@ -417,6 +433,18 @@ sub remove_from_Transcript { ...@@ -417,6 +433,18 @@ sub remove_from_Transcript {
} }
sub remove_from_RNAProduct {
my ($self, $object, $code) = @_;
assert_ref($object, 'Bio::EnsEMBL::RNAProduct');
my $object_id = $object->dbID();
$self->remove_from_Object($object_id, 'rnaproduct', $code);
return;
}
sub remove_from_Translation { sub remove_from_Translation {
my ($self, $object, $code) = @_; my ($self, $object, $code) = @_;
...@@ -576,6 +604,22 @@ sub fetch_all_by_Translation { ...@@ -576,6 +604,22 @@ sub fetch_all_by_Translation {
} }
sub fetch_all_by_RNAProduct {
my ($self, $object, $code) = @_;
my $object_id;
if (defined($object)) {
assert_ref($object, 'Bio::EnsEMBL::RNAProduct');
$object_id = $object->dbID();
}
my $results = $self->fetch_all_by_Object($object_id, 'rnaproduct', $code);
return $results;
}
sub fetch_all_by_DnaDnaAlignFeature { sub fetch_all_by_DnaDnaAlignFeature {
my ($self, $object, $code) = @_; my ($self, $object, $code) = @_;
......
...@@ -421,6 +421,7 @@ sub get_available_adaptors { ...@@ -421,6 +421,7 @@ sub get_available_adaptors {
PredictionTranscript => 'Bio::EnsEMBL::DBSQL::PredictionTranscriptAdaptor', PredictionTranscript => 'Bio::EnsEMBL::DBSQL::PredictionTranscriptAdaptor',
ProteinAlignFeature => 'Bio::EnsEMBL::DBSQL::ProteinAlignFeatureAdaptor', ProteinAlignFeature => 'Bio::EnsEMBL::DBSQL::ProteinAlignFeatureAdaptor',
ProteinFeature => 'Bio::EnsEMBL::DBSQL::ProteinFeatureAdaptor', ProteinFeature => 'Bio::EnsEMBL::DBSQL::ProteinFeatureAdaptor',
RNAProduct => 'Bio::EnsEMBL::DBSQL::RNAProductAdaptor',
RepeatConsensus => 'Bio::EnsEMBL::DBSQL::RepeatConsensusAdaptor', RepeatConsensus => 'Bio::EnsEMBL::DBSQL::RepeatConsensusAdaptor',
RepeatFeature => 'Bio::EnsEMBL::DBSQL::RepeatFeatureAdaptor', RepeatFeature => 'Bio::EnsEMBL::DBSQL::RepeatFeatureAdaptor',
SeqRegionSynonym => 'Bio::EnsEMBL::DBSQL::SeqRegionSynonymAdaptor', SeqRegionSynonym => 'Bio::EnsEMBL::DBSQL::SeqRegionSynonymAdaptor',
......
...@@ -1329,6 +1329,37 @@ sub fetch_all_by_Translation { ...@@ -1329,6 +1329,37 @@ sub fetch_all_by_Translation {
} }
=head2 fetch_all_by_RNAProduct
Arg [1] : Bio::EnsEMBL::RNAProduct $rp
(The rnaproduct to fetch database entries for)
Arg [2] : optional external database name. SQL wildcards are accepted
Arg [3] : optional externaldb type. SQL wildcards are accepted
Example : @db_entries = @{$db_entry_adptr->fetch_all_by_RNAProduct($rp)};
Description: Retrieves external database entries for an EnsEMBL rnaproduct
Returntype : listref of Bio::EnsEMBL::DBEntries; may be of type IdentityXref if
there is mapping data, or OntologyXref if there is linkage data.
Exceptions : throws if rnaproduct object not passed
Caller : general
Status : Stable
=cut
sub fetch_all_by_RNAProduct {
my ($self, $rp, $ex_db_reg, $ex_db_type) = @_;
if (!ref($rp) || !$rp->isa('Bio::EnsEMBL::RNAProduct')) {
throw('Bio::EnsEMBL::RNAProduct argument expected.');
}
if (!$rp->dbID()){
warning("Cannot fetch_all_by_RNAProduct without a dbID");
return [];
}
return $self->_fetch_by_object_type($rp->dbID(), 'RNAProduct', $ex_db_reg, $ex_db_type);
}
=head2 remove_from_object =head2 remove_from_object
...@@ -1850,6 +1881,29 @@ sub list_translation_ids_by_external_db_id { ...@@ -1850,6 +1881,29 @@ sub list_translation_ids_by_external_db_id {
return $self->_type_by_external_db_id( $external_db_id, 'Translation', undef, $linkage_type ), return $self->_type_by_external_db_id( $external_db_id, 'Translation', undef, $linkage_type ),
} }
=head2 list_rnaproduct_ids_by_extids
Arg [1] : string $external_name
Arg [2] : (optional) string $external_db_name
Arg [3] : Boolean override, see _type_by_external_id
Example : @rp_ids = $dbea->list_rnaproduct_ids_by_extids('GO:0004835');
Description: Gets a list of rnaproduct IDs by external display IDs
Returntype : list of Ints
Exceptions : none
Caller : unknown
Status : Stable
=cut
sub list_rnaproduct_ids_by_extids {
my ($self, $external_name, $external_db_name, $override) = @_;
return $self->_type_by_external_id($external_name, 'RNAProduct', undef,
$external_db_name, $override);
}
=head2 _type_by_external_id =head2 _type_by_external_id
Arg [1] : string $name - dbprimary_acc Arg [1] : string $name - dbprimary_acc
......
=head1 LICENSE
Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Copyright [2016-2019] EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
=cut
=head1 CONTACT
Please email comments or questions to the public Ensembl
developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
Questions may also be sent to the Ensembl help desk at
<http://www.ensembl.org/Help/Contact>.
=cut
=head1 NAME
Bio::EnsEMBL::DBSQL::RNAProductAdaptor - Provides a means to fetch and store
RNAProduct objects from a database.
=head1 DESCRIPTION
This adaptor provides a means to retrieve and store
Bio::EnsEMBL::RNAProduct objects from/in a database.
RNAProduct objects only truly make sense in the context of their
transcripts so the recommended means to retrieve RNAProducts is
by retrieving the Transcript object first, and then fetching the
RNAProduct.
=head1 SYNOPSIS
use Bio::EnsEMBL::Registry;
Bio::EnsEMBL::Registry->load_registry_from_db(
-host => 'ensembldb.ensembl.org',
-user => 'anonymous'
);
$rnaproduct_adaptor =
Bio::EnsEMBL::Registry->get_adaptor( "human", "core",
"rnaproduct" );
...
=head1 METHODS
=cut
package Bio::EnsEMBL::DBSQL::RNAProductAdaptor;
use strict;
use warnings;
use Bio::EnsEMBL::DBSQL::BaseAdaptor;
use Bio::EnsEMBL::MicroRNA;
use Bio::EnsEMBL::RNAProduct;
use Bio::EnsEMBL::Utils::Exception qw( throw warning );
use Bio::EnsEMBL::Utils::Scalar qw( assert_ref );
use parent qw( Bio::EnsEMBL::DBSQL::BaseAdaptor );
=head2 fetch_all_by_Transcript
Arg [1] : Bio::EnsEMBL::Transcript $transcript
Example : $rps = $rnaproduct_adaptor->fetch_by_Transcript($transcript);
Description: Retrieves RNAProducts via their associated transcript.
If no RNAProducts are found, an empty list is returned.
Returntype : arrayref of Bio::EnsEMBL::RNAProducts
Exceptions : throw on incorrect argument
Caller : Transcript
Status : Stable
=cut
sub fetch_all_by_Transcript {
my ($self, $transcript) = @_;
assert_ref($transcript, 'Bio::EnsEMBL::Transcript');
return $self->_fetch_direct_query(['rp.transcript_id', $transcript->dbID(), SQL_INTEGER]);
}
=head2 fetch_all_by_external_name
Arg [1] : String $external_name
An external identifier of the RNAProduct to be obtained
Arg [2] : (optional) String $external_db_name
The name of the external database from which the
identifier originates.
Arg [3] : Boolean override. Force SQL regex matching for users
who really do want to find all 'NM%'
Example : my @rnaproducts =
@{ $rp_a->fetch_all_by_external_name('MIMAT0000416') };
my @more_rnaproducts =
@{ $rp_a->fetch_all_by_external_name('hsa-miR-1-__') };
Description: Retrieves all RNAProducts which are associated with
an external identifier such as a GO term, miRBase
identifer, etc. Usually there will only be a single
RNAProduct returned in the list reference, but not
always. If no RNAProducts with the external identifier
are found, a reference to an empty list is returned.
SQL wildcards % and _ are supported in the $external_name
but their use is somewhat restricted for performance reasons.
Users that really do want % and _ in the first three characters
should use argument 3 to prevent optimisations
Returntype : listref of Bio::EnsEMBL::RNAProduct
Exceptions : none
Caller : general
Status : Stable
=cut
sub fetch_all_by_external_name {
my ($self, $external_name, $external_db_name, $override) = @_;
my $entry_adaptor = $self->db->get_DBEntryAdaptor();
my @ids = $entry_adaptor->list_rnaproduct_ids_by_extids($external_name,
$external_db_name,
$override);
my $transcript_adaptor = $self->db()->get_TranscriptAdaptor();
my @reference;
my @non_reference;
foreach my $id (@ids) {
my $transcript = $transcript_adaptor->fetch_by_rnaproduct_id($id);
if ( defined($transcript) ) {
my $rnaproduct = $self->fetch_by_dbID($id);
if ( $transcript->slice()->is_reference() ) {
push(@reference, $rnaproduct);
}
else {
push(@non_reference, $rnaproduct);
}
}
}
return [@reference, @non_reference];
}
=head2 fetch_all_by_type
Arg [1] : string $type_code
Example : $rps = $rp_a->fetch_all_by_type('miRNA');
Description: Retrieves RNAProducts via their type (e.g. miRNA, circRNA).
If no matching RNAProducts are found, an empty list is
returned.
Returntype : arrayref of Bio::EnsEMBL::RNAProducts
Exceptions : throws if type code is undefined
Caller : ?
Status : In Development
=cut
sub fetch_all_by_type {
my ($self, $type_code) = @_;
if ( !defined $type_code ) {
throw("type code argument is required");
}
return ($self->_fetch_direct_query(['pt.code', $type_code, SQL_VARCHAR]));
}
=head2 fetch_by_dbID
Arg [1] : int $dbID
The internal identifier of the RNAProduct to obtain
Example : $rnaproduct = $rnaproduct_adaptor->fetch_by_dbID(1234);
Description: This fetches a RNAProduct object via its internal id.
This is only debatably useful since RNAProducts do
not make much sense outside of the context of their
Transcript. Consider using fetch_by_Transcript instead.
Returntype : Bio::EnsEMBL::RNAProduct, or undef if the RNAProduct is not
found.
Caller : ?
Status : Stable
=cut
sub fetch_by_dbID {
my ($self, $dbID) = @_;
if ( !defined $dbID ) {
throw("dbID argument is required");
}
return ($self->_fetch_direct_query(['rp.rnaproduct_id', $dbID, SQL_INTEGER]))->[0];
}
=head2 fetch_by_stable_id
Arg [1] : string $stable_id
The stable identifier of the RNAProduct to obtain
Example : $rnaproduct = $rnaproduct_adaptor->fetch_by_stable_id("ENSS00001");
Description: This fetches a RNAProduct object via its stable id.
Returntype : Bio::EnsEMBL::RNAProduct, or undef if the RNAProduct is not
found.
Caller : ?
Status : Stable
=cut
sub fetch_by_stable_id {
my ($self, $stable_id) = @_;
if ( !defined $stable_id ) {
throw("stable id argument is required");
}
return ($self->_fetch_direct_query(['rp.stable_id', $stable_id, SQL_VARCHAR]))->[0];
}
=head2 list_dbIDs
Arg [1] : none
Example : @rnaproduct_ids = @{$rnaproduct_adaptor->list_dbIDs()};
Description: Gets an array of internal ids for all RNAProducts in the current db
Returntype : list of ints
Exceptions : none
Caller : ?
Status : Stable
=cut
sub list_dbIDs {
my ($self) = @_;
return $self->_list_dbIDs("rnaproduct");
}
=head2 remove
Arg [1] : Bio::EnsEMBL::RNAProduct $rnaproduct
The RNAProduct to be removed from the database
Example : $rpID = $rp_adaptor->remove($rnaproduct, $transcript->dbID());
Description: Removes a RNAProduct, along with all associated information
from the database.
Returntype : none
Exceptions : throw on incorrect arguments
Caller : ?
Status : Stable
=cut
sub remove {
my ($self, $rnaproduct) = @_;
if (!ref($rnaproduct) || !$rnaproduct->isa('Bio::EnsEMBL::RNAProduct')) {
throw("$rnaproduct is not a EnsEMBL rnaproduct");
}
my $db = $self->db();
# Do nothing if the object is not stored to begin with
if (!$rnaproduct->is_stored($db)) {
return;
}
# Remove xrefs
my $dbe_adaptor = $db->get_DBEntryAdaptor();
for my $dbe (@{ $rnaproduct->get_all_DBEntries() }) {
$dbe_adaptor->remove_from_object($dbe, $rnaproduct, 'RNAProduct');
}
# Remove attributes
my $attr_adaptor = $db->get_AttributeAdaptor();
$attr_adaptor->remove_from_RNAProduct($rnaproduct);
# Remove rnaproduct itself
my $sth = $self->prepare("DELETE FROM rnaproduct WHERE rnaproduct_id = ?");
$sth->bind_param(1, $rnaproduct->dbID(), SQL_INTEGER);
$sth->execute();
$sth->finish();
# Mark the object as local
$rnaproduct->dbID(undef);
$rnaproduct->adaptor(undef);
return;
}
=head2 store
Arg [1] : Bio::EnsEMBL::RNAProduct $rnaproduct
The RNAProduct to be written to the database
Arg [2] : Int $transcript_dbID
The identifier of the transcript that this RNAProduct is
associated with
Example : $rpID = $rp_adaptor->store($rnaproduct, $transcript->dbID());
Description: Stores a RNAProduct in the database and returns the new
internal identifier for the stored RNAProduct.
Returntype : Int
Exceptions : throw on incorrect arguments
Caller : general
Status : Stable
=cut
sub store {
my ($self, $rnaproduct, $transcript_dbID) = @_;
if (!ref($rnaproduct) || !$rnaproduct->isa('Bio::EnsEMBL::RNAProduct')) {
throw("$rnaproduct is not a EnsEMBL rnaproduct - not storing");
}
my $db = $self->db();
# Avoid creating duplicate entries
if ($rnaproduct->is_stored($db)) {
return $rnaproduct->dbID();
}
my ( $start_exon_dbID, $end_exon_dbID );
if ( $rnaproduct->start_Exon() ) {
$start_exon_dbID = $rnaproduct->start_Exon()->dbID();
}
if ( $rnaproduct->end_Exon() ) {
$end_exon_dbID = $rnaproduct->end_Exon()->dbID();
}
# Store rnaproduct
my @columns = qw{ transcript_id seq_start start_exon_id seq_end end_exon_id };
my @canned_columns = ( 'rnaproduct_type_id' );
my @canned_values
= ( '(SELECT rnaproduct_type_id FROM rnaproduct_type WHERE code=?)' );
if (defined($rnaproduct->stable_id())) {
push @columns, 'stable_id', 'version';
my $created = $db->dbc->from_seconds_to_date($rnaproduct->created_date());
if ($created) {
push @canned_columns, 'created_date';
push @canned_values, $created;
}
my $modified = $db->dbc->from_seconds_to_date($rnaproduct->modified_date());
if ($modified) {
push @canned_columns, 'modified_date';
push @canned_values, $modified;
}
}
my $column_string = join(', ', @columns, @canned_columns);
my $value_string = join(', ', (q{?}) x @columns, @canned_values);
my $store_rnaproduct_sql
= "INSERT INTO rnaproduct ( $column_string ) VALUES ( $value_string )";
my $rp_st = $self->prepare($store_rnaproduct_sql);
$rp_st->bind_param( 1, $transcript_dbID, SQL_INTEGER);
$rp_st->bind_param( 2, $rnaproduct->start(), SQL_INTEGER);
$rp_st->bind_param( 3, $start_exon_dbID, SQL_INTEGER);
$rp_st->bind_param( 4, $rnaproduct->end(), SQL_INTEGER);
$rp_st->bind_param( 5, $end_exon_dbID, SQL_INTEGER);
if (defined($rnaproduct->stable_id())) {
$rp_st->bind_param(6, $rnaproduct->stable_id(), SQL_VARCHAR);
$rp_st->bind_param(7, $rnaproduct->version(), SQL_INTEGER);
}
$rp_st->bind_param( 8, $rnaproduct->type_code(), SQL_VARCHAR);
$rp_st->execute();
$rp_st->finish();
# Retrieve the newly assigned dbID
my $rp_dbID = $self->last_insert_id('rnaproduct_id', undef, 'rnaproduct');
# Store attributes
$rnaproduct->synchronise_attributes();
my $attr_adaptor = $db->get_AttributeAdaptor();