diff --git a/modules/Bio/EnsEMBL/DBSQL/ArchiveStableIdAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/ArchiveStableIdAdaptor.pm index 2ff810f49ec7e329a5aff121a65e6fcdb746cc4c..2ae6606ce9e37a6737cd4bd328905849f397f582 100644 --- a/modules/Bio/EnsEMBL/DBSQL/ArchiveStableIdAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/ArchiveStableIdAdaptor.pm @@ -319,6 +319,7 @@ sub fetch_by_transcript_archive_id { FROM gene_archive ga, mapping_session m WHERE ga.transcript_stable_id = ? AND ga.transcript_version = ? + AND ga.mapping_session_id = m.mapping_session_id ); my $sth = $self->prepare( $sql ); diff --git a/modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm index bc5b3e5e799f85e14f1d567175a2551245a43abb..d5ea2cf749ac15e2502a0098e60ebe2aa2503cdb 100644 --- a/modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm @@ -113,7 +113,6 @@ sub fetch_by_dbID { } - =head2 fetch_by_db_accession Arg [1] : string $dbname - The name of the database which the provided @@ -471,7 +470,6 @@ sub exists { } - =head2 fetch_all_by_Gene Arg [1] : Bio::EnsEMBL::Gene $gene @@ -812,7 +810,7 @@ sub _fetch_by_object_type { sub list_gene_ids_by_extids{ my ($self,$name) = @_; - my %T = map { ($_,1) } + my %T = map { ($_, 1) } $self->_type_by_external_id( $name, 'Translation', 'gene' ), $self->_type_by_external_id( $name, 'Transcript', 'gene' ), $self->_type_by_external_id( $name, 'Gene' ); @@ -820,7 +818,6 @@ sub list_gene_ids_by_extids{ } - =head2 list_transcript_ids_by_extids Arg [1] : string $external_id @@ -839,7 +836,7 @@ sub list_transcript_ids_by_extids{ my ($self,$name) = @_; my @transcripts; - my %T = map { ($_,1) } + my %T = map { ($_, 1) } $self->_type_by_external_id( $name, 'Translation', 'transcript' ), $self->_type_by_external_id( $name, 'Transcript' ); return keys %T; @@ -860,63 +857,82 @@ sub list_transcript_ids_by_extids{ sub list_translation_ids_by_extids{ my ($self,$name) = @_; - return $self->_type_by_external_id( $name, 'Translation' ); + return $self->_type_by_external_id($name, 'Translation'); } =head2 _type_by_external_id - Arg [1] : string $name - (dbprimary_acc) - Arg [2] : string $ensType - (Object_type) - Arg [3] : string $extraType - (other object type to be returned) - optional - Example : $self->_type_by_external_id( $name, 'Translation' ) + Arg [1] : string $name - dbprimary_acc + Arg [2] : string $ensType - ensembl_object_type + Arg [3] : (optional) string $extraType + other object type to be returned + Example : $self->_type_by_external_id($name, 'Translation'); Description: Gets - Returntype : list of ensembl_IDs + Returntype : list of dbIDs (gene_id, transcript_id, etc.) Exceptions : none Caller : list_translation_ids_by_extids translationids_by_extids - geneids_by_extids + geneids_by_extids Status : Stable =cut sub _type_by_external_id{ - my ($self,$name,$ensType,$extraType) = @_; + my ($self, $name, $ensType, $extraType) = @_; my $from_sql = ''; my $where_sql = ''; my $ID_sql = "oxr.ensembl_id"; - if(defined $extraType) { - if(lc($extraType) eq 'translation') { + + if (defined $extraType) { + if (lc($extraType) eq 'translation') { $ID_sql = "tl.translation_id"; } else { $ID_sql = "t.${extraType}_id"; } - if(lc($ensType) eq 'translation') { - $from_sql = 'transcript as t, translation as tl, '; - $where_sql = 't.transcript_id = tl.transcript_id and ' . - 'tl.translation_id = oxr.ensembl_id and '; + if (lc($ensType) eq 'translation') { + $from_sql = 'transcript t, translation tl, '; + $where_sql = qq( + t.transcript_id = tl.transcript_id AND + tl.translation_id = oxr.ensembl_id AND + t.is_current = 1 AND + ); } else { - $from_sql = 'transcript as t, '; - $where_sql = 't.'.lc($ensType).'_id = oxr.ensembl_id and '; + $from_sql = 'transcript t, '; + $where_sql = 't.'.lc($ensType).'_id = oxr.ensembl_id AND '. + 't.is_current = 1 AND '; } } + + if (lc($ensType) eq 'gene') { + $from_sql = 'gene g, '; + $where_sql = 'g.gene_id = oxr.ensembl_id AND g.is_current = 1 AND '; + } elsif (lc($ensType) eq 'transcript') { + $from_sql = 'transcript t, '; + $where_sql = 't.transcript_id = oxr.ensembl_id AND t.is_current = 1 AND '; + } elsif (lc($ensType) eq 'translation') { + $from_sql = 'transcript t, translation tl, '; + $where_sql = qq( + t.transcript_id = tl.transcript_id AND + tl.translation_id = oxr.ensembl_id AND + t.is_current = 1 AND + ); + } + my @queries = ( - "select $ID_sql - from $from_sql xref, object_xref as oxr - where $where_sql xref.dbprimary_acc = ? and - xref.xref_id = oxr.xref_id and oxr.ensembl_object_type= ?", - "select $ID_sql - from $from_sql xref, object_xref as oxr - where $where_sql xref.display_label = ? and - xref.xref_id = oxr.xref_id and oxr.ensembl_object_type= ?", - "select $ID_sql - from $from_sql object_xref as oxr, external_synonym as syn - where $where_sql syn.synonym = ? and - syn.xref_id = oxr.xref_id and oxr.ensembl_object_type= ?", + "SELECT $ID_sql + FROM $from_sql xref x, object_xref oxr + WHERE $where_sql x.dbprimary_acc = ? AND + x.xref_id = oxr.xref_id AND oxr.ensembl_object_type= ?", + "SELECT $ID_sql + FROM $from_sql xref x, object_xref oxr + WHERE $where_sql x.display_label = ? AND + x.xref_id = oxr.xref_id AND oxr.ensembl_object_type= ?", + "SELECT $ID_sql + FROM $from_sql object_xref oxr, external_synonym syn + WHERE $where_sql syn.synonym = ? AND + syn.xref_id = oxr.xref_id AND oxr.ensembl_object_type= ?", ); # Increase speed of query by splitting the OR in query into three separate @@ -929,8 +945,8 @@ sub _type_by_external_id{ foreach( @queries ) { my $sth = $self->prepare( $_ ); - $sth->bind_param(1,"$name",SQL_VARCHAR); - $sth->bind_param(2,$ensType,SQL_VARCHAR); + $sth->bind_param(1, $name, SQL_VARCHAR); + $sth->bind_param(2, $ensType, SQL_VARCHAR); $sth->execute(); while( my $r = $sth->fetchrow_array() ) { if( !exists $hash{$r} ) { @@ -942,6 +958,7 @@ sub _type_by_external_id{ return @result; } + =head2 fetch_all_by_description Arg [1] : string description to search for. Include % etc in this string diff --git a/modules/Bio/EnsEMBL/DBSQL/ExonAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/ExonAdaptor.pm index a56bc3898d39fbcaa097eaa70c359e026acaa3d0..66c0552ad234cb50af1c8b20e592f878b55b6c3a 100644 --- a/modules/Bio/EnsEMBL/DBSQL/ExonAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/ExonAdaptor.pm @@ -1,3 +1,5 @@ +package Bio::EnsEMBL::DBSQL::ExonAdaptor; + #EnsEMBL Exon reading writing adaptor for mySQL # # Copyright EMBL-EBI 2001 @@ -33,20 +35,16 @@ Post questions/comments to the Ensembl dev list: ensembl-dev@ebi.ac.uk =cut -package Bio::EnsEMBL::DBSQL::ExonAdaptor; - -use vars qw( @ISA ); use strict; - use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor; use Bio::EnsEMBL::Exon; use Bio::EnsEMBL::Utils::Exception qw( warning throw deprecate ); +use vars qw( @ISA ); @ISA = qw( Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor ); - #_tables # # Arg [1] : none @@ -82,22 +80,18 @@ sub _columns { my $created_date = $self->db->dbc->from_date_to_seconds("created_date"); my $modified_date = $self->db->dbc->from_date_to_seconds("modified_date"); - return ( 'e.exon_id', 'e.seq_region_id', 'e.seq_region_start' ,'e.seq_region_end', - 'e.seq_region_strand', 'e.phase','e.end_phase', - 'esi.stable_id', 'esi.version',$created_date, - $modified_date ); -# esi.stable_id esi.version UNIX_TIMESTAMP(created_date) -# UNIX_TIMESTAMP(modified_date) ); + return ( 'e.exon_id', 'e.seq_region_id', 'e.seq_region_start', + 'e.seq_region_end', 'e.seq_region_strand', 'e.phase','e.end_phase', + 'e.is_current', + 'esi.stable_id', 'esi.version', $created_date, $modified_date ); } - sub _left_join { return ( [ 'exon_stable_id', "esi.exon_id = e.exon_id" ]); } - # _final_clause # # Arg [1] : none @@ -114,14 +108,13 @@ sub _final_clause { } - =head2 fetch_by_stable_id Arg [1] : string $stable_id the stable id of the exon to retrieve Example : $exon = $exon_adaptor->fetch_by_stable_id('ENSE0000988221'); Description: Retrieves an Exon from the database via its stable id - Returntype : Bio::EnsEMBL::Exon in contig coordinates + Returntype : Bio::EnsEMBL::Exon in native coordinates. Exceptions : none Caller : general Status : Stable @@ -129,16 +122,36 @@ sub _final_clause { =cut sub fetch_by_stable_id { - my ( $self, $stable_id ) = @_; + my ($self, $stable_id) = @_; + + my $constraint = "esi.stable_id = '$stable_id' AND e.is_current = 1"; + my ($exon) = @{ $self->generic_fetch($constraint) }; + + return $exon; +} + - my $constraint = "esi.stable_id = \"$stable_id\""; +=head2 fetch_all_versions_by_stable_id - # should be only one :-) - my $exons = $self->SUPER::generic_fetch( $constraint ); + Arg [1] : String $stable_id + The stable ID of the exon to retrieve + Example : my $exon = $exon_adaptor->fetch_all_version_by_stable_id + ('ENSE00000309301'); + Description : Similar to fetch_by_stable_id, but retrieves all versions of an + exon stored in the database. + Returntype : listref of Bio::EnsEMBL::Exon objects + Exceptions : if we cant get the gene in given coord system + Caller : general + Status : At Risk + +=cut - if( ! @$exons ) { return undef } +sub fetch_all_versions_by_stable_id { + my ($self, $stable_id) = @_; - return $exons->[0]; + my $constraint = "esi.stable_id = '$stable_id'"; + + return $self->generic_fetch($constraint); } @@ -175,14 +188,13 @@ sub fetch_all_by_Transcript { $self->{'tables'} = \@tables; $self->{'final_clause'} = "ORDER BY et.transcript_id, et.rank"; - my $constraint = "et.transcript_id = ".$transcript->dbID() . " AND e.exon_id = et.exon_id"; # fetch all of the exons - my $exons = $self->fetch_all_by_Slice_constraint($slice,$constraint); + my $exons = $self->fetch_all_by_Slice_constraint($slice, $constraint); - #un-override the table definition + # un-override the table definition $self->{'tables'} = undef; $self->{'final_clause'} = undef; @@ -199,7 +211,6 @@ sub fetch_all_by_Transcript { } - =head2 store Arg [1] : Bio::EnsEMBL::Exon $exon @@ -216,7 +227,7 @@ sub fetch_all_by_Transcript { =cut sub store { - my ( $self, $exon ) = @_; + my ($self, $exon) = @_; if( ! $exon->isa('Bio::EnsEMBL::Exon') ) { throw("$exon is not a EnsEMBL exon - not storing."); @@ -233,11 +244,15 @@ sub store { throw("Exon does not have all attributes to store"); } + # default to is_current = 1 if this attribute is not set + my $is_current = $exon->is_current; + $is_current = 1 unless (defined($is_current)); + my $exon_sql = q{ INSERT into exon ( seq_region_id, seq_region_start, seq_region_end, seq_region_strand, phase, - end_phase ) - VALUES ( ?, ?, ?, ?, ?, ? ) + end_phase, is_current ) + VALUES ( ?, ?, ?, ?, ?, ?, ? ) }; my $exonst = $self->prepare($exon_sql); @@ -249,12 +264,13 @@ sub store { ($exon, $seq_region_id) = $self->_pre_store($exon); #store the exon - $exonst->bind_param(1,$seq_region_id, SQL_INTEGER); - $exonst->bind_param(2,$exon->start, SQL_INTEGER); - $exonst->bind_param(3,$exon->end, SQL_INTEGER); - $exonst->bind_param(4,$exon->strand, SQL_TINYINT); - $exonst->bind_param(5,$exon->phase, SQL_TINYINT); - $exonst->bind_param(6,$exon->end_phase, SQL_TINYINT); + $exonst->bind_param(1, $seq_region_id, SQL_INTEGER); + $exonst->bind_param(2, $exon->start, SQL_INTEGER); + $exonst->bind_param(3, $exon->end, SQL_INTEGER); + $exonst->bind_param(4, $exon->strand, SQL_TINYINT); + $exonst->bind_param(5, $exon->phase, SQL_TINYINT); + $exonst->bind_param(6, $exon->end_phase, SQL_TINYINT); + $exonst->bind_param(7, $is_current, SQL_TINYINT); $exonst->execute(); $exonId = $exonst->{'mysql_insertid'}; @@ -268,18 +284,10 @@ sub store { "stable_id = ?, " . "exon_id = ?, "; - $statement .= "created_date = " . $self->db->dbc->from_seconds_to_date($exon->created_date()) . ","; -# if( $exon->created_date() ) { -# $statement .= "created_date = from_unixtime( ".$exon->created_date()."),"; -# } else { -# $statement .= "created_date = \"0000-00-00 00:00:00\","; -# } - $statement .= "modified_date = " . $self->db->dbc->from_seconds_to_date($exon->modified_date()) ; -# if( $exon->modified_date() ) { -# $statement .= "modified_date = from_unixtime( ".$exon->modified_date().")"; -# } else { -# $statement .= "modified_date = \"0000-00-00 00:00:00\""; -# } + $statement .= "created_date = " . + $self->db->dbc->from_seconds_to_date($exon->created_date()) . ","; + $statement .= "modified_date = " . + $self->db->dbc->from_seconds_to_date($exon->modified_date()) ; my $sth = $self->prepare( $statement ); @@ -319,9 +327,9 @@ sub store { next; } - $sf_sth->bind_param(1,$exonId,SQL_INTEGER); - $sf_sth->bind_param(2,$sf->dbID,SQL_INTEGER); - $sf_sth->bind_param(3,$type,SQL_VARCHAR); + $sf_sth->bind_param(1, $exonId, SQL_INTEGER); + $sf_sth->bind_param(2, $sf->dbID, SQL_INTEGER); + $sf_sth->bind_param(3, $type, SQL_VARCHAR); $sf_sth->execute(); } @@ -382,7 +390,7 @@ sub remove { my $sth = $self->prepare("SELECT feature_type, feature_id " . "FROM supporting_feature " . "WHERE exon_id = ?"); - $sth->bind_param(1,$exon->dbID,SQL_INTEGER); + $sth->bind_param(1, $exon->dbID, SQL_INTEGER); $sth->execute(); while(my ($type, $feature_id) = $sth->fetchrow()){ @@ -403,21 +411,21 @@ sub remove { # delete the association to supporting features $sth = $self->prepare("DELETE FROM supporting_feature WHERE exon_id = ?"); - $sth->bind_param(1,$exon->dbID,SQL_INTEGER); + $sth->bind_param(1, $exon->dbID, SQL_INTEGER); $sth->execute(); $sth->finish(); # delete the exon stable identifier $sth = $self->prepare( "DELETE FROM exon_stable_id WHERE exon_id = ?" ); - $sth->bind_param(1,$exon->dbID,SQL_INTEGER); + $sth->bind_param(1, $exon->dbID, SQL_INTEGER); $sth->execute(); $sth->finish(); # delete the exon $sth = $self->prepare( "DELETE FROM exon WHERE exon_id = ?" ); - $sth->bind_param(1,$exon->dbID,SQL_INTEGER); + $sth->bind_param(1, $exon->dbID, SQL_INTEGER); $sth->execute(); $sth->finish(); @@ -446,6 +454,7 @@ sub list_dbIDs { return $self->_list_dbIDs("exon"); } + =head2 list_stable_ids Arg [1] : none @@ -492,14 +501,13 @@ sub _objs_from_sth { my ( $exon_id, $seq_region_id, $seq_region_start, $seq_region_end, $seq_region_strand, $phase, - $end_phase, $stable_id, $version, $created_date, + $end_phase, $is_current, $stable_id, $version, $created_date, $modified_date ); - $sth->bind_columns( \$exon_id, \$seq_region_id, - \$seq_region_start, - \$seq_region_end, \$seq_region_strand, \$phase, - \$end_phase, \$stable_id, \$version, \$created_date, - \$modified_date ); + $sth->bind_columns( \$exon_id, \$seq_region_id, \$seq_region_start, + \$seq_region_end, \$seq_region_strand, \$phase, + \$end_phase, \$is_current, \$stable_id, \$version, + \$created_date, \$modified_date ); my $asm_cs; my $cmp_cs; @@ -611,8 +619,8 @@ sub _objs_from_sth { } #finally, create the new repeat feature - push @exons, Bio::EnsEMBL::Exon->new - ( '-start' => $seq_region_start, + push @exons, Bio::EnsEMBL::Exon->new( + '-start' => $seq_region_start, '-end' => $seq_region_end, '-strand' => $seq_region_strand, '-adaptor' => $self, @@ -623,7 +631,9 @@ sub _objs_from_sth { '-created_date' => $created_date || undef, '-modified_date' => $modified_date || undef, '-phase' => $phase, - '-end_phase' => $end_phase ) + '-end_phase' => $end_phase, + 'is_current' => $is_current + ); } @@ -663,7 +673,7 @@ sub get_stable_entry_info { FROM exon_stable_id WHERE exon_id = "); - $sth->bind_param(1,$exon->dbID,SQL_INTEGER); + $sth->bind_param(1, $exon->dbID, SQL_INTEGER); $sth->execute(); # my @array = $sth->fetchrow_array(); @@ -677,6 +687,7 @@ sub get_stable_entry_info { return 1; } + =head2 fetch_all_by_gene_id Description: DEPRECATED. This method should not be needed - Exons can @@ -695,7 +706,9 @@ sub fetch_all_by_gene_id { if( !$gene_id ) { $self->throw("Gene dbID not defined"); } + $self->{rchash} = {}; + my $query = qq { SELECT STRAIGHT_JOIN @@ -740,3 +753,4 @@ sub fetch_all_by_gene_id { 1; + diff --git a/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm index a428da13b630988dbec06f692d972f114799cfc1..500556e94b21768748eb4968ca7c0c8d435ff106 100644 --- a/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/GeneAdaptor.pm @@ -1,25 +1,17 @@ -# -# Copyright EMBL-EBI 2001 -# -# Author: Arne Stabenau -# based on -# Elia Stupkas Gene_Obj -# -# Date : 20.02.2001 -# +package Bio::EnsEMBL::DBSQL::GeneAdaptor; =head1 NAME -Bio::EnsEMBL::DBSQL::GeneAdaptor - A database aware adaptor responsible for the -retrieval and storage of Gene objects. +Bio::EnsEMBL::DBSQL::GeneAdaptor - Database adaptor for the retrieval and +storage of Gene objects =head1 SYNOPSIS use Bio::EnsEMBL::DBSQL::DBAdaptor; - $db = Bio::EnsEMBL::DBSQL::DBAdaptor->new(...); + $dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new(...); - $gene_adaptor = $db->get_GeneAdaptor(); + $gene_adaptor = $dba->get_GeneAdaptor(); $gene = $gene_adaptor->fetch_by_dbID(1234); @@ -31,24 +23,30 @@ retrieval and storage of Gene objects. $slice = $slice_adaptor->fetch_by_region('chromosome', '1', 1, 1000000); @genes = @{$gene_adaptor->fetch_all_by_Slice($slice)}; +=head1 DESCRIPTION -=head1 CONTACT +This is a database aware adaptor for the retrieval and storage of gene objects. -Contact the EnsEMBL development list for questions or information: -ensembl-dev@ebi.ac.uk +=head1 LICENCE -=head1 METHODS +This code is distributed under an Apache style licence. Please see +http://www.ensembl.org/info/about/code_licence.html for details. -=cut +=head1 AUTHOR +Arne Stabenau <stabenau@ebi.ac.uk>, Ensembl core API team +Based on Elia Stupkas Gene_Obj -package Bio::EnsEMBL::DBSQL::GeneAdaptor; +=head1 CONTACT -use strict; +Please post comments/questions to the Ensembl development list +<ensembl-dev@ebi.ac.uk> +=cut -use Bio::EnsEMBL::Utils::Exception qw( deprecate throw warning ); +use strict; +use Bio::EnsEMBL::Utils::Exception qw( deprecate throw warning ); use Bio::EnsEMBL::DBSQL::SliceAdaptor; use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor; use Bio::EnsEMBL::DBSQL::DBAdaptor; @@ -58,11 +56,10 @@ use vars '@ISA'; @ISA = qw(Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor); - # _tables # Arg [1] : none -# Description: PROTECTED implementation of superclass abstract method -# returns the names, aliases of the tables to use for queries +# Description: PROTECTED implementation of superclass abstract method. +# Returns the names, aliases of the tables to use for queries. # Returntype : list of listrefs of strings # Exceptions : none # Caller : internal @@ -81,8 +78,8 @@ sub _tables { # _columns # Arg [1] : none # Example : none -# Description: PROTECTED implementation of superclass abstract method -# returns a list of columns to use for queries +# Description: PROTECTED implementation of superclass abstract method. +# Returns a list of columns to use for queries. # Returntype : list of strings # Exceptions : none # Caller : internal @@ -93,14 +90,15 @@ sub _columns { my $created_date = $self->db->dbc->from_date_to_seconds("gsi.created_date"); my $modified_date = $self->db->dbc->from_date_to_seconds("gsi.modified_date"); - return ( 'g.gene_id', 'g.seq_region_id', 'g.seq_region_start', 'g.seq_region_end', - 'g.seq_region_strand', 'g.analysis_id' ,'g.biotype', 'g.display_xref_id', - 'g.description', 'g.status', 'g.source', - 'gsi.stable_id', 'gsi.version', $created_date, - $modified_date, + + return ( 'g.gene_id', 'g.seq_region_id', 'g.seq_region_start', + 'g.seq_region_end', 'g.seq_region_strand', + 'g.analysis_id' ,'g.biotype', 'g.display_xref_id', + 'g.description', 'g.status', 'g.source', 'g.is_current', + 'gsi.stable_id', 'gsi.version', $created_date, $modified_date, 'x.display_label' ,'x.dbprimary_acc', 'x.description', 'x.version', - 'exdb.db_name', 'exdb.status', 'exdb.db_release' ,'exdb.db_display_name', - 'x.info_type', 'x.info_text'); + 'exdb.db_name', 'exdb.status', 'exdb.db_release', + 'exdb.db_display_name', 'x.info_type', 'x.info_text'); } @@ -111,15 +109,13 @@ sub _left_join { } - =head2 list_dbIDs - Arg [1] : none Example : @gene_ids = @{$gene_adaptor->list_dbIDs()}; Description: Gets an array of internal ids for all genes in the current db - Returntype : list of ints + Returntype : Listref of Ints Exceptions : none - Caller : ? + Caller : general Status : Stable =cut @@ -130,14 +126,14 @@ sub list_dbIDs { return $self->_list_dbIDs("gene"); } + =head2 list_stable_ids - Arg [1] : none Example : @stable_gene_ids = @{$gene_adaptor->list_stable_dbIDs()}; Description: Gets an listref of stable ids for all genes in the current db Returntype : reference to a list of strings Exceptions : none - Caller : ? + Caller : general Status : Stable =cut @@ -151,10 +147,11 @@ sub list_stable_ids { =head2 fetch_by_display_label - Arg [1] : string $label - Example : my $gene = $geneAdaptor->fetch_by_display_label( "BRCA2" ); - Description: returns the gene which has the given display label or undef if - there is none. If there are more than 1, only the first is reported. + Arg [1] : String $label - display label of gene to fetch + Example : my $gene = $geneAdaptor->fetch_by_display_label("BRCA2"); + Description: Returns the gene which has the given display label or undef if + there is none. If there are more than 1, only the first is + reported. Returntype : Bio::EnsEMBL::Gene Exceptions : none Caller : general @@ -166,7 +163,9 @@ sub fetch_by_display_label { my $self = shift; my $label = shift; - my ( $gene ) = @{$self->generic_fetch( "x.display_label = \"$label\"" )}; + my $constraint = "x.display_label = '$label' AND g.is_current = 1"; + my ($gene) = @{ $self->generic_fetch($constraint) }; + return $gene; } @@ -174,16 +173,16 @@ sub fetch_by_display_label { =head2 fetch_by_stable_id - Arg 1 : string $id - The stable id of the gene to retrieve + Arg [1] : String $id + The stable ID of the gene to retrieve Example : $gene = $gene_adaptor->fetch_by_stable_id('ENSG00000148944'); Description: Retrieves a gene object from the database via its stable id. The gene will be retrieved in its native coordinate system (i.e. - in the coordinate system it is stored in the database). It may + in the coordinate system it is stored in the database). It may be converted to a different coordinate system through a call to - transform() or transfer(). If the gene or exon is not found + transform() or transfer(). If the gene or exon is not found undef is returned instead. - Returntype : Bio::EnsEMBL::Gene in given coordinate system + Returntype : Bio::EnsEMBL::Gene or undef Exceptions : if we cant get the gene in given coord system Caller : general Status : Stable @@ -191,80 +190,96 @@ sub fetch_by_display_label { =cut sub fetch_by_stable_id { - my ($self,$id) = @_; + my ($self, $stable_id) = @_; - #because of the way this query is constructed (with a left join to the - # gene_stable_id table), it is faster to do 2 queries, getting the gene_id - # in the first query + my $constraint = "gsi.stable_id = '$stable_id' AND g.is_current = 1"; + my ($gene) = @{ $self->generic_fetch($constraint) }; - my $sth = $self->prepare("SELECT gene_id from gene_stable_id " . - "WHERE stable_id = ?"); - $sth->bind_param(1,$id,SQL_VARCHAR); - $sth->execute(); + return $gene; +} - my ($dbID) = $sth->fetchrow_array(); - $sth->finish; +=head2 fetch_all_versions_by_stable_id - return undef if(!$dbID); + Arg [1] : String $stable_id + The stable ID of the gene to retrieve + Example : $gene = $gene_adaptor->fetch_all_versions_by_stable_id + ('ENSG00000148944'); + Description : Similar to fetch_by_stable_id, but retrieves all versions of a + gene stored in the database. + Returntype : listref of Bio::EnsEMBL::Gene + Exceptions : if we cant get the gene in given coord system + Caller : general + Status : At Risk - return $self->fetch_by_dbID($dbID); - } +=cut + +sub fetch_all_versions_by_stable_id { + my ($self, $stable_id) = @_; + + my $constraint = "gsi.stable_id = '$stable_id'"; + + return $self->generic_fetch($constraint); +} =head2 fetch_by_exon_stable_id - Arg [1] : string $id + Arg [1] : String $id The stable id of an exon of the gene to retrieve Example : $gene = $gene_adptr->fetch_by_exon_stable_id('ENSE00000148944'); Description: Retrieves a gene object from the database via an exon stable id. The gene will be retrieved in its native coordinate system (i.e. - in the coordinate system it is stored in the database). It may + in the coordinate system it is stored in the database). It may be converted to a different coordinate system through a call to - transform() or transfer(). If the gene or exon is not found + transform() or transfer(). If the gene or exon is not found undef is returned instead. - Returntype : Bio::EnsEMBL::Gene (or undef) + Returntype : Bio::EnsEMBL::Gene or undef Exceptions : none Caller : general Status : Stable =cut -sub fetch_by_exon_stable_id{ - my ( $self, $id ) = @_; - - my $sth = $self->prepare( - "SELECT t.gene_id +sub fetch_by_exon_stable_id { + my ($self, $stable_id, $version) = @_; + + my $sql = qq( + SELECT t.gene_id FROM transcript as t, exon_transcript as et, + exon as e, exon_stable_id as esi WHERE t.transcript_id = et.transcript_id AND et.exon_id = esi.exon_id - AND esi.stable_id = ?"); - $sth->bind_param(1,"$id",SQL_VARCHAR); - $sth->execute(); + AND et.exon_id = e.exon_id + AND esi.stable_id = ? + AND e.is_current = 1 + ); - my ($dbID) = $sth->fetchrow_array(); - - return undef if(!defined($dbID)); + my $sth = $self->prepare($sql); + $sth->bind_param(1, $stable_id, SQL_VARCHAR); + $sth->execute(); - my $gene = $self->fetch_by_dbID( $dbID ); + my ($dbID) = $sth->fetchrow_array(); - return $gene; -} + return undef if(!defined($dbID)); + my $gene = $self->fetch_by_dbID($dbID); + return $gene; +} =head2 fetch_all_by_domain - Arg [1] : string $domain - the domain to fetch genes from + Arg [1] : String $domain + The domain to fetch genes from Example : my @genes = $gene_adaptor->fetch_all_by_domain($domain); - Description: retrieves a listref of genes whose translation contain interpro - domain $domain. The genes are returned in their native coord + Description: Retrieves a listref of genes whose translation contain interpro + domain $domain. The genes are returned in their native coord system (i.e. the coord_system they are stored in). If the coord - system needs to be changed, then tranform - or transfer should be called on the individual objects returned. + system needs to be changed, then tranform or transfer should be + called on the individual objects returned. Returntype : list of Bio::EnsEMBL::Genes Exceptions : none Caller : domainview @@ -275,22 +290,23 @@ sub fetch_by_exon_stable_id{ sub fetch_all_by_domain { my ($self, $domain) = @_; - unless($domain) { - throw("domain argument is required"); - } - - my $sth = $self->prepare("SELECT tr.gene_id " . - "FROM interpro i, " . - " protein_feature pf, " . - " transcript tr, " . - " translation tl " . - "WHERE i.interpro_ac = ? " . - "AND i.id = pf.hit_id " . - "AND pf.translation_id = tl.translation_id ". - "AND tr.transcript_id = tl.transcript_id " . - "GROUP BY tr.gene_id"); - - $sth->bind_param(1,$domain,SQL_VARCHAR); + throw("domain argument is required") unless ($domain); + + my $sth = $self->prepare(qq( + SELECT tr.gene_id + FROM interpro i, + protein_feature pf, + transcript tr, + translation tl + WHERE i.interpro_ac = ? + AND i.id = pf.hit_id + AND pf.translation_id = tl.translation_id + AND tr.transcript_id = tl.transcript_id + AND tr.is_current = 1 + GROUP BY tr.gene_id + )); + + $sth->bind_param(1, $domain, SQL_VARCHAR); $sth->execute(); my @array = @{$sth->fetchall_arrayref()}; @@ -328,12 +344,13 @@ sub fetch_all_by_Slice { my $self = shift; my $slice = shift; my $logic_name = shift; - my $load_exons = shift; + my $load_transcripts = shift; - my $genes = $self->SUPER::fetch_all_by_Slice($slice, $logic_name); + my $genes = $self->SUPER::fetch_all_by_Slice_constraint($slice, + 'g.is_current = 1', $logic_name); # if there are 0 or 1 genes still do lazy-loading - if(!$load_exons || @$genes < 2) { + if(!$load_transcripts || @$genes < 2) { return $genes; } @@ -390,7 +407,7 @@ sub fetch_all_by_Slice { $sth->finish(); my $ta = $self->db()->get_TranscriptAdaptor(); - my $transcripts = $ta->fetch_all_by_Slice($ext_slice,1); + my $transcripts = $ta->fetch_all_by_Slice($ext_slice, 1); # move transcripts onto gene slice, and add them to genes foreach my $tr (@$transcripts) { @@ -416,119 +433,117 @@ sub fetch_all_by_Slice { } - - - - =head2 fetch_by_transcript_id - Arg [1] : int $transid - unique database identifier for the transcript whose gene should + Arg [1] : Int $trans_id + Unique database identifier for the transcript whose gene should be retrieved. The gene is returned in its native coord system (i.e. the coord_system it is stored in). If the coord system needs to be changed, then tranform or transfer should - be called on the returned object. undef is returned if the + be called on the returned object. undef is returned if the gene or transcript is not found in the database. - Example : $gene = $gene_adaptor->fetch_by_transcript_id( 1241 ); + Example : $gene = $gene_adaptor->fetch_by_transcript_id(1241); Description: Retrieves a gene from the database via the database identifier of one of its transcripts. Returntype : Bio::EnsEMBL::Gene Exceptions : none - Caller : ? + Caller : general Status : Stable =cut sub fetch_by_transcript_id { - my ( $self, $trans_id ) = @_; + my ($self, $trans_id) = @_; - # this is a cheap SQL call - my $sth = $self->prepare("SELECT tr.gene_id " . - "FROM transcript as tr " . - "WHERE tr.transcript_id = ?"); - $sth->bind_param(1,$trans_id,SQL_INTEGER); - $sth->execute(); + # this is a cheap SQL call + my $sth = $self->prepare(qq( + SELECT tr.gene_id + FROM transcript tr + WHERE tr.transcript_id = ? + )); - my ($geneid) = $sth->fetchrow_array(); + $sth->bind_param(1, $trans_id, SQL_INTEGER); + $sth->execute(); - $sth->finish(); + my ($geneid) = $sth->fetchrow_array(); - return undef if( !defined $geneid ); + $sth->finish(); - my $gene = $self->fetch_by_dbID( $geneid ); - return $gene; + return undef if( !defined $geneid ); + + my $gene = $self->fetch_by_dbID($geneid); + return $gene; } =head2 fetch_by_transcript_stable_id - Arg [1] : string $transid - unique database identifier for the transcript whose gene should - be retrieved. - Example : none - Description: Retrieves a gene from the database via the database identifier - of one of its transcripts + Arg [1] : string $trans_stable_id + transcript stable ID whose gene should be retrieved + Example : my $gene = $gene_adaptor->fetch_by_transcript_stable_id + ('ENST0000234'); + Description: Retrieves a gene from the database via the stable ID of one of + its transcripts Returntype : Bio::EnsEMBL::Gene Exceptions : none - Caller : ? + Caller : general Status : Stable =cut sub fetch_by_transcript_stable_id { - my ( $self, $trans_stable_id) = @_; - - # this is a cheap SQL call - my $sth = $self->prepare( - "SELECT tr.gene_id " . - "FROM transcript as tr, transcript_stable_id tcl " . - "WHERE tcl.stable_id = ? " . - "AND tr.transcript_id = tcl.transcript_id"); - $sth->bind_param(1,"$trans_stable_id",SQL_VARCHAR); + my ($self, $trans_stable_id) = @_; + + my $sth = $self->prepare(qq( + SELECT tr.gene_id + FROM transcript tr, transcript_stable_id tcl + WHERE tcl.stable_id = ? + AND tr.transcript_id = tcl.transcript_id + AND tr.is_current = 1 + )); + + $sth->bind_param(1, $trans_stable_id, SQL_VARCHAR); $sth->execute(); my ($geneid) = $sth->fetchrow_array(); $sth->finish; - if( !defined $geneid ) { - return undef; - } - my $gene = $self->fetch_by_dbID( $geneid ); + + return undef if (!defined $geneid); + + my $gene = $self->fetch_by_dbID($geneid); return $gene; } - - - =head2 fetch_by_translation_stable_id - Arg [1] : string $translation_stable_id - the stable id of a translation of the gene that should - be obtained - Example : $gene = $gene_adaptor->fetch_by_translation_stable_id - ( 'ENSP00000278194' ); - Description: retrieves a gene via the stable id of one of its translations. + Arg [1] : String $translation_stable_id + The stable id of a translation of the gene to be obtained + Example : my $gene = $gene_adaptor->fetch_by_translation_stable_id + ('ENSP00000278194'); + Description: Retrieves a gene via the stable id of one of its translations. Returntype : Bio::EnsEMBL::Gene Exceptions : none - Caller : geneview + Caller : general Status : Stable =cut sub fetch_by_translation_stable_id { - my ( $self, $translation_stable_id ) = @_; - - # this is a cheap SQL call - my $sth = $self->prepare - ("SELECT tr.gene_id " . - "FROM transcript as tr, " . - " translation as tl, " . - " translation_stable_id as trs " . - "WHERE trs.stable_id = ? " . - "AND trs.translation_id = tl.translation_id " . - "AND tr.transcript_id = tl.transcript_id"); - - $sth->bind_param(1,"$translation_stable_id",SQL_VARCHAR); + my ($self, $translation_stable_id) = @_; + + my $sth = $self->prepare(qq( + SELECT tr.gene_id + FROM transcript tr, + translation tl, + translation_stable_id as trs + WHERE trs.stable_id = ? + AND trs.translation_id = tl.translation_id + AND tr.transcript_id = tl.transcript_id + AND tr.is_current = 1 + )); + + $sth->bind_param(1, $translation_stable_id, SQL_VARCHAR); $sth->execute(); my ($geneid) = $sth->fetchrow_array(); @@ -540,32 +555,31 @@ sub fetch_by_translation_stable_id { } - =head2 fetch_all_by_external_name - Arg [1] : string $external_id - the external identifier for the gene to be obtained + Arg [1] : String $external_id + The external identifier for the gene to be obtained Example : @genes = @{$gene_adaptor->fetch_all_by_external_name('BRCA2')} - Description: retrieves a list of genes with an external database - idenitifier $external_id. The genes returned are in their - native coordinate system. I.e. in the coordinate system they - are stored in the database in. If another coordinate system + Description: Retrieves a list of genes with an external database + idenitifier $external_id. The genes returned are in their + native coordinate system. I.e. in the coordinate system they + are stored in the database in. If another coordinate system is required then the Gene::transfer or Gene::transform method can be used. Returntype : listref of Bio::EnsEMBL::Genes Exceptions : none - Caller : goview + Caller : goview, general Status : Stable =cut sub fetch_all_by_external_name { - my ( $self, $external_id) = @_; + my ($self, $external_id) = @_; my $entryAdaptor = $self->db->get_DBEntryAdaptor(); - my ( @ids, @result ); - @ids = $entryAdaptor->list_gene_ids_by_extids( $external_id ); + my (@ids, @result); + @ids = $entryAdaptor->list_gene_ids_by_extids($external_id); my $genes = $self->fetch_all_by_dbID_list(\@ids); @@ -576,20 +590,20 @@ sub fetch_all_by_external_name { } - =head2 fetch_all_alt_alleles Arg [1] : Bio::EnsEMBL::Gene $gene - Example : my @alt_genes = @{$gene_adaptor->fetch_all_alt_alleles($gene);} + The gene to fetch alternative alleles for + Example : my @alt_genes = @{ $gene_adaptor->fetch_all_alt_alleles($gene) }; foreach my $alt_gene (@alt_genes) { print "Alternate allele: " . $alt_gene->stable_id() . "\n"; } Description: Retrieves genes which are alternate alleles to a provided gene. Alternate alleles in Ensembl are genes which are similar and are - on an alternative haplotype of the same region. There are not - currently very many of these. This method will return a + on an alternative haplotype of the same region. There are not + currently very many of these. This method will return a reference to an empty list if no alternative alleles are found. - Returntype : reference to a list of genes + Returntype : listref of Bio::EnsEMBL::Genes Exceptions : throw if incorrect arg provided warning if gene arg does not have dbID Caller : Gene::get_all_alt_alleles @@ -618,8 +632,8 @@ sub fetch_all_alt_alleles { "AND aa2.gene_id = ? " . "AND aa1.gene_id <> ?"); - $sth->bind_param(1,$gene_id,SQL_INTEGER); - $sth->bind_param(2,$gene_id,SQL_INTEGER); + $sth->bind_param(1, $gene_id, SQL_INTEGER); + $sth->bind_param(2, $gene_id, SQL_INTEGER); $sth->execute(); my @alt_ids; @@ -629,31 +643,28 @@ sub fetch_all_alt_alleles { } $sth->finish(); - if(@alt_ids) { + if (@alt_ids) { return $self->fetch_all_by_dbID_list(\@alt_ids); } return []; - } - - =head2 store_alt_alleles Arg [1] : reference to list of Bio::EnsEMBL::Genes $genes - Example : $gene_adaptor->store_alt_allele([$gene1, $gene2, $gene3]); - Description: This method creates a group of alternative aleles (i.e. locus) - from a set of genes. The genes should be genes from alternate - haplotypes which are similar. The genes must already be stored + Example : $gene_adaptor->store_alt_alleles([$gene1, $gene2, $gene3]); + Description: This method creates a group of alternative alleles (i.e. locus) + from a set of genes. The genes should be genes from alternate + haplotypes which are similar. The genes must already be stored in this database. At least 2 genes must be in the list reference provided. Returntype : none Exceptions : throw on incorrect arguments throw on sql error (e.g. duplicate unique id) - Caller : ? + Caller : general Status : Stable =cut @@ -686,12 +697,12 @@ sub store_alt_alleles { my $gene_id = $gene->dbID(); - if(!$gene_id) { + if (!$gene_id) { throw("Genes must have dbIDs in order to construct alternate alleles."); } my $sth = $self->prepare("INSERT INTO alt_allele (gene_id) VALUES (?)"); - $sth->bind_param(1,$gene->dbID,SQL_INTEGER); + $sth->bind_param(1, $gene->dbID, SQL_INTEGER); $sth->execute(); my $alt_allele_id = $sth->{'mysql_insertid'}; @@ -705,37 +716,37 @@ sub store_alt_alleles { $sth = $self->prepare("INSERT INTO alt_allele (alt_allele_id, gene_id) " . "VALUES (?,?)"); - for(my $i = 1; $i < $num_genes; $i++) { + for (my $i = 1; $i < $num_genes; $i++) { my $gene = $genes->[$i]; - if(!ref($gene) || !$gene->isa('Bio::EnsEMBL::Gene')) { + if (!ref($gene) || !$gene->isa('Bio::EnsEMBL::Gene')) { throw("List reference of Bio::EnsEMBL::Gene argument expected"); } $gene_id = $gene->dbID(); - if(!$gene_id) { - #This is an error but we have already inserted into the database - #delete the already inserted entries to restore the state of the - #database + if (!$gene_id) { + # This is an error but we have already inserted into the database + # delete the already inserted entries to restore the state of the + # database $sth->finish(); $sth->prepare("DELETE FROM alt_allele WHERE alt_allele_id = ?"); - $sth->bind_param(1,$alt_allele_id,SQL_INTEGER); + $sth->bind_param(1, $alt_allele_id, SQL_INTEGER); $sth->execute(); $sth->finish(); throw('Genes must have dbIDs in order to construct alternate alleles.'); } - $sth->bind_param(1,$alt_allele_id,SQL_INTEGER); - $sth->bind_param(2,$gene_id,SQL_INTEGER); + $sth->bind_param(1, $alt_allele_id, SQL_INTEGER); + $sth->bind_param(2, $gene_id, SQL_INTEGER); eval { $sth->execute(); }; - if($@) { - #an error occured, revert the db to the previous state + if ($@) { + # an error occured, revert the db to the previous state $sth = $self->prepare("DELETE FROM alt_allele WHERE alt_allele_id = ?"); - $sth->bind_param(1,$alt_allele_id,SQL_INTEGER); + $sth->bind_param(1, $alt_allele_id, SQL_INTEGER); $sth->execute(); $sth->finish(); throw("An SQL error occured inserting alternate alleles:\n$@"); @@ -750,10 +761,11 @@ sub store_alt_alleles { =head2 store - Arg [1] : Bio::EnsEMBL::Gene + Arg [1] : Bio::EnsEMBL::Gene $gene + The gene to store in the database Example : $gene_adaptor->store($gene); - Description: Stores a gene in the database - Returntype : the database identifier of the newly stored gene + Description: Stores a gene in the database. + Returntype : the database identifier (dbID) of the newly stored gene Exceptions : thrown if the $gene is not a Bio::EnsEMBL::Gene or if $gene does not have an analysis object Caller : general @@ -762,15 +774,15 @@ sub store_alt_alleles { =cut sub store { - my ( $self, $gene ) = @_; + my ($self, $gene) = @_; - if(!ref $gene || !$gene->isa('Bio::EnsEMBL::Gene') ) { + if (!ref $gene || !$gene->isa('Bio::EnsEMBL::Gene') ) { throw("Must store a gene object, not a $gene"); } my $db = $self->db(); - if($gene->is_stored($db)) { + if ($gene->is_stored($db)) { return $gene->dbID(); } @@ -781,7 +793,7 @@ sub store { throw("Genes must have an analysis object.") if(!defined($analysis)); my $analysis_id; - if($analysis->is_stored($db)) { + if ($analysis->is_stored($db)) { $analysis_id = $analysis->dbID(); } else { $analysis_id = $db->get_AnalysisAdaptor->store($analysis); @@ -789,34 +801,42 @@ sub store { my $type = $gene->biotype || ""; + # default to is_current = 1 if this attribute is not set + my $is_current = $gene->is_current; + $is_current = 1 unless (defined($is_current)); + my $original = $gene; my $original_transcripts = $gene->get_all_Transcripts(); my $seq_region_id; ($gene, $seq_region_id) = $self->_pre_store($gene); - my $store_gene_sql = - "INSERT INTO gene " . - "SET biotype = ?, " . - "analysis_id = ?, " . - "seq_region_id = ?, " . - "seq_region_start = ?, " . - "seq_region_end = ?, " . - "seq_region_strand = ?, ". - "description = ?, " . - "source = ?, ". - "status = ? "; - # colum status is used from schema version 34 onwards ( before it was confidence) + my $store_gene_sql = qq( + INSERT INTO gene + SET biotype = ?, + analysis_id = ?, + seq_region_id = ?, + seq_region_start = ?, + seq_region_end = ?, + seq_region_strand = ?, + description = ?, + source = ?, + status = ?, + is_current = ? + ); + # colum status is used from schema version 34 onwards (before it was + # confidence) my $sth = $self->prepare( $store_gene_sql ); - $sth->bind_param(1,$type,SQL_VARCHAR); - $sth->bind_param(2,$analysis_id,SQL_INTEGER); - $sth->bind_param(3,$seq_region_id,SQL_INTEGER); - $sth->bind_param(4,$gene->start,SQL_INTEGER); - $sth->bind_param(5,$gene->end,SQL_INTEGER); - $sth->bind_param(6,$gene->strand,SQL_TINYINT); - $sth->bind_param(7,$gene->description,SQL_LONGVARCHAR); - $sth->bind_param(8,$gene->source,SQL_VARCHAR); - $sth->bind_param(9,$gene->status,SQL_VARCHAR); + $sth->bind_param(1, $type, SQL_VARCHAR); + $sth->bind_param(2, $analysis_id, SQL_INTEGER); + $sth->bind_param(3, $seq_region_id, SQL_INTEGER); + $sth->bind_param(4, $gene->start, SQL_INTEGER); + $sth->bind_param(5, $gene->end, SQL_INTEGER); + $sth->bind_param(6, $gene->strand, SQL_TINYINT); + $sth->bind_param(7, $gene->description, SQL_LONGVARCHAR); + $sth->bind_param(8, $gene->source, SQL_VARCHAR); + $sth->bind_param(9, $gene->status, SQL_VARCHAR); + $sth->bind_param(10, $is_current, SQL_TINYINT); $sth->execute(); $sth->finish(); @@ -836,9 +856,9 @@ sub store { $self->db->dbc->from_seconds_to_date($gene->modified_date()); $sth = $self->prepare($statement); - $sth->bind_param(1,$gene_dbID,SQL_INTEGER); - $sth->bind_param(2,$gene->stable_id,SQL_VARCHAR); - $sth->bind_param(3,$gene->version,SQL_INTEGER); + $sth->bind_param(1, $gene_dbID, SQL_INTEGER); + $sth->bind_param(2, $gene->stable_id, SQL_VARCHAR); + $sth->bind_param(3, $gene->version, SQL_INTEGER); $sth->execute(); $sth->finish(); } @@ -899,8 +919,8 @@ sub store { if(defined($dxref_id)) { $sth = $self->prepare ("UPDATE gene SET display_xref_id = ? WHERE gene_id = ?"); - $sth->bind_param(1,$dxref_id,SQL_INTEGER); - $sth->bind_param(2,$gene_dbID,SQL_INTEGER); + $sth->bind_param(1, $dxref_id, SQL_INTEGER); + $sth->bind_param(2, $gene_dbID, SQL_INTEGER); $sth->execute(); $sth->finish(); $display_xref->dbID($dxref_id); @@ -929,14 +949,12 @@ sub store { } - - =head2 remove Arg [1] : Bio::EnsEMBL::Gene $gene the gene to remove from the database Example : $gene_adaptor->remove($gene); - Description: Removes a gene completely from the database. All associated + Description: Removes a gene completely from the database. All associated transcripts, exons, stable_identifiers, descriptions, etc. are removed as well. Use with caution! Returntype : none @@ -951,11 +969,11 @@ sub remove { my $self = shift; my $gene = shift; - if(!ref($gene) || !$gene->isa('Bio::EnsEMBL::Gene')) { + if (!ref($gene) || !$gene->isa('Bio::EnsEMBL::Gene')) { throw("Bio::EnsEMBL::Gene argument expected."); } - if( !$gene->is_stored($self->db()) ) { + if ( !$gene->is_stored($self->db()) ) { warning("Cannot remove gene " . $gene->dbID() . ". Is not stored in " . "this database."); return; @@ -970,7 +988,7 @@ sub remove { # remove all alternative allele entries associated with this gene my $sth = $self->prepare("delete from alt_allele where gene_id = ?"); - $sth->bind_param(1,$gene->dbID,SQL_INTEGER); + $sth->bind_param(1, $gene->dbID, SQL_INTEGER); $sth->execute(); $sth->finish(); @@ -988,14 +1006,14 @@ sub remove { # remove the gene stable identifier $sth = $self->prepare( "delete from gene_stable_id where gene_id = ? " ); - $sth->bind_param(1,$gene->dbID,SQL_INTEGER); + $sth->bind_param(1, $gene->dbID, SQL_INTEGER); $sth->execute(); $sth->finish(); # remove this gene from the database $sth = $self->prepare( "delete from gene where gene_id = ? " ); - $sth->bind_param(1,$gene->dbID,SQL_INTEGER); + $sth->bind_param(1, $gene->dbID, SQL_INTEGER); $sth->execute(); $sth->finish(); @@ -1008,66 +1026,64 @@ sub remove { } - =head2 get_Interpro_by_geneid - Arg [1] : string $gene - the stable if of the gene to obtain + Arg [1] : String $gene_stable_id + The stable ID of the gene to obtain Example : @i = $gene_adaptor->get_Interpro_by_geneid($gene->stable_id()); - Description: gets interpro accession numbers by gene stable id. - A hack really - we should have a much more structured - system than this - Returntype : listref of strings + Description: Gets interpro accession numbers by gene stable id. A hack really + - we should have a much more structured system than this. + Returntype : listref of strings (Interpro_acc:description) Exceptions : none - Caller : domainview? + Caller : domainview Status : Stable =cut sub get_Interpro_by_geneid { - my ($self,$gene) = @_; - my $sql=" + my ($self, $gene_stable_id) = @_; + + my $sql = qq( SELECT i.interpro_ac, x.description - FROM transcript t, - translation tl, + FROM transcript t, + translation tl, protein_feature pf, interpro i, - xref x, + xref x, gene_stable_id gsi - WHERE gsi.stable_id = '$gene' - AND t.gene_id = gsi.gene_id - AND tl.transcript_id = t.transcript_id - AND tl.translation_id = pf.translation_id - AND i.id = pf.hit_id - AND i.interpro_ac = x.dbprimary_acc"; + WHERE gsi.stable_id = '$gene_stable_id' + AND t.gene_id = gsi.gene_id + AND t.is_current = 1 + AND tl.transcript_id = t.transcript_id + AND tl.translation_id = pf.translation_id + AND i.id = pf.hit_id + AND i.interpro_ac = x.dbprimary_acc + ); - my $sth = $self->prepare($sql); - $sth->execute; - - my @out; - my %h; - while( (my $arr = $sth->fetchrow_arrayref()) ) { - if( $h{$arr->[0]} ) { next; } - $h{$arr->[0]}=1; - my $string = $arr->[0] .":".$arr->[1]; - - push(@out,$string); - } - + my $sth = $self->prepare($sql); + $sth->execute; + + my @out; + my %h; + while( (my $arr = $sth->fetchrow_arrayref()) ) { + if( $h{$arr->[0]} ) { next; } + $h{$arr->[0]}=1; + my $string = $arr->[0] .":".$arr->[1]; + push(@out,$string); + } - return \@out; + return \@out; } - - =head2 update - Arg [1] : Bio::EnsEMBL::Gene + Arg [1] : Bio::EnsEMBL::Gene $gene + The gene to update Example : $gene_adaptor->update($gene); - Description: Updates the type, analysis and display_xref of a gene in the - database. + Description: Updates the type, analysis, display_xref, status, is_current and + description of a gene in the database. Returntype : None Exceptions : thrown if the $gene is not a Bio::EnsEMBL::Gene Caller : general @@ -1076,50 +1092,54 @@ sub get_Interpro_by_geneid { =cut sub update { - my ($self,$gene) = @_; - my $update = 0; - - if( !defined $gene || !ref $gene || !$gene->isa('Bio::EnsEMBL::Gene') ) { - throw("Must update a gene object, not a $gene"); - } + my ($self, $gene) = @_; + my $update = 0; - my $update_gene_sql = " - UPDATE gene - SET biotype = ?, - analysis_id = ?, - display_xref_id = ?, - status = ?, - description = ? - WHERE gene_id = ?"; - - my $display_xref = $gene->display_xref(); - my $display_xref_id; + if ( !defined $gene || !ref $gene || !$gene->isa('Bio::EnsEMBL::Gene') ) { + throw("Must update a gene object, not a $gene"); + } - if( $display_xref && $display_xref->dbID() ) { - $display_xref_id = $display_xref->dbID(); - } else { - $display_xref_id = undef; - } + my $update_gene_sql = qq( + UPDATE gene + SET biotype = ?, + analysis_id = ?, + display_xref_id = ?, + status = ?, + description = ?, + is_current = ? + WHERE gene_id = ? + ); + + my $display_xref = $gene->display_xref(); + my $display_xref_id; + + if ( $display_xref && $display_xref->dbID() ) { + $display_xref_id = $display_xref->dbID(); + } else { + $display_xref_id = undef; + } - my $sth = $self->prepare( $update_gene_sql ); + my $sth = $self->prepare( $update_gene_sql ); - $sth->bind_param(1,$gene->biotype,SQL_VARCHAR); - $sth->bind_param(2,$gene->analysis->dbID,SQL_INTEGER); - $sth->bind_param(3,$display_xref_id,SQL_INTEGER); - $sth->bind_param(4,$gene->status,SQL_VARCHAR); - $sth->bind_param(5,$gene->description,SQL_VARCHAR); - $sth->bind_param(6,$gene->dbID,SQL_INTEGER); + $sth->bind_param(1, $gene->biotype, SQL_VARCHAR); + $sth->bind_param(2, $gene->analysis->dbID, SQL_INTEGER); + $sth->bind_param(3, $display_xref_id, SQL_INTEGER); + $sth->bind_param(4, $gene->status, SQL_VARCHAR); + $sth->bind_param(5, $gene->description, SQL_VARCHAR); + $sth->bind_param(6, $gene->is_current, SQL_TINYINT); + $sth->bind_param(7, $gene->dbID, SQL_INTEGER); - $sth->execute(); + $sth->execute(); - # maybe should update stable id ??? + # maybe should update stable id ??? } # _objs_from_sth # Arg [1] : StatementHandle $sth -# Example : none +# Arg [2] : Bio::EnsEMBL::AssemblyMapper $mapper +# Arg [3] : Bio::EnsEMBL::Slice $dest_slice # Description: PROTECTED implementation of abstract superclass method. # responsible for the creation of Genes # Returntype : listref of Bio::EnsEMBL::Genes in target coordinate system @@ -1149,17 +1169,19 @@ sub _objs_from_sth { my ( $gene_id, $seq_region_id, $seq_region_start, $seq_region_end, $seq_region_strand, $analysis_id, $biotype, $display_xref_id, $gene_description, $stable_id, $version, $created_date, - $modified_date, $xref_display_id, $status, $source, + $modified_date, $xref_display_id, $status, $source, $is_current, $xref_primary_acc, $xref_desc, $xref_version, $external_name, $external_db, $external_status, $external_release, $external_db_name, $info_type, $info_text); $sth->bind_columns( \$gene_id, \$seq_region_id, \$seq_region_start, - \$seq_region_end, \$seq_region_strand, \$analysis_id, \$biotype, - \$display_xref_id, \$gene_description, \$status, \$source, + \$seq_region_end, \$seq_region_strand, \$analysis_id, + \$biotype, \$display_xref_id, \$gene_description, + \$status, \$source, \$is_current, \$stable_id, \$version, \$created_date, \$modified_date, - \$xref_display_id, \$xref_primary_acc, \$xref_desc, \$xref_version, + \$xref_display_id, \$xref_primary_acc, \$xref_desc, + \$xref_version, \$external_db, \$external_status, \$external_release, \$external_db_name, \$info_type, \$info_text); @@ -1282,8 +1304,8 @@ sub _objs_from_sth { } #finally, create the new gene - push @genes, Bio::EnsEMBL::Gene->new - ( '-analysis' => $analysis, + push @genes, Bio::EnsEMBL::Gene->new( + '-analysis' => $analysis, '-biotype' => $biotype, '-start' => $seq_region_start, '-end' => $seq_region_end, @@ -1299,9 +1321,11 @@ sub _objs_from_sth { '-external_name' => $external_name, '-external_db' => $external_db, '-external_status' => $external_status, - '-display_xref' => $display_xref, - '-status' => $status, - '-source' => $source ); + '-display_xref' => $display_xref, + '-status' => $status, + '-source' => $source, + '-is_current' => $is_current + ); } return \@genes; @@ -1310,27 +1334,27 @@ sub _objs_from_sth { =head2 cache_gene_seq_mappings - Args : none Example : $gene_adaptor->cache_gene_seq_mappings(); Description: caches all the assembly mappings needed for genes Returntype : None Exceptions : None Caller : general Status : At Risk - : New experimental code. + : New experimental code =cut sub cache_gene_seq_mappings{ my ($self) = @_; - #get the sequence level to map too + # get the sequence level to map too - my $sql = (<<SSQL); - SELECT name - FROM coord_system + my $sql = qq( + SELECT name + FROM coord_system WHERE attrib like "%sequence_level%" -SSQL + ); + my $sth = $self->prepare($sql); $sth->execute(); @@ -1342,144 +1366,148 @@ SSQL my $ama = $self->db->get_AssemblyMapperAdaptor(); my $cs1 = $csa->fetch_by_name($sequence_level); - - #get level to map to two + # get level to map to two my $mcc = $self->db->get_MetaCoordContainerAdaptor(); my $csnew = $mcc->fetch_all_CoordSystems_by_feature_type('gene'); - foreach my $cs2 (@$csnew){ - my $am = $ama->fetch_by_CoordSystems($cs1,$cs2); + foreach my $cs2 (@$csnew) { + my $am = $ama->fetch_by_CoordSystems($cs1, $cs2); $am->register_all(); - }; + } } =head2 fetch_all_by_exon_supporting_evidence - Arg [1] : string hit_name - Arg [2] : string feature type - (one of "dna_align_feature" or "protein_align_feature") + Arg [1] : String $hit_name + Name of supporting feature + Arg [2] : String $feature_type + one of "dna_align_feature" or "protein_align_feature" Arg [3] : (optional) Bio::Ensembl::Analysis - Example : $genes = $gene_adaptor->fetch_all_by_exon_supporting_evidence(); - Description: Gets all the genes with transcripts with exons which have a specified hit on a particular - type of feature. Optionally filter by analysis. + Example : $genes = $gene_adaptor->fetch_all_by_exon_supporting_evidence( + 'XYZ', 'dna_align_feature'); + Description: Gets all the genes with transcripts with exons which have a + specified hit on a particular type of feature. Optionally filter + by analysis. Returntype : Listref of Bio::EnsEMBL::Gene Exceptions : If feature_type is not of correct type. - Caller : ? + Caller : general Status : At Risk =cut sub fetch_all_by_exon_supporting_evidence { + my ($self, $hit_name, $feature_type, $analysis) = @_; - my ($self, $hit_name, $feature_type, $analysis) = @_; - - if($feature_type !~ /(dna)|(protein)_align_feature/) { - throw("feature type must be dna_align_feature or protein_align_feature"); - } - - my $anal_from = ", analysis a " if ($analysis); - my $anal_where = "AND a.analysis_id = f.analysis_id AND a.analysis_id=? " if ($analysis); - - my $sql = "SELECT DISTINCT(g.gene_id) - FROM gene g, - transcript t, - exon_transcript et, - supporting_feature sf, - $feature_type f - $anal_from - WHERE g.gene_id=t.gene_id - AND t.transcript_id = et.transcript_id - AND et.exon_id = sf.exon_id - AND sf.feature_id = f.${feature_type}_id - AND sf.feature_type = ? - AND f.hit_name=? - $anal_where"; + if ($feature_type !~ /(dna)|(protein)_align_feature/) { + throw("feature type must be dna_align_feature or protein_align_feature"); + } - my $sth = $self->prepare($sql); + my $anal_from = ", analysis a " if ($analysis); + my $anal_where = "AND a.analysis_id = f.analysis_id AND a.analysis_id=? " if ($analysis); + + my $sql = qq( + SELECT DISTINCT(g.gene_id) + FROM gene g, + transcript t, + exon_transcript et, + supporting_feature sf, + $feature_type f + $anal_from + WHERE g.gene_id = t.gene_id + AND g.is_current = 1 + AND t.transcript_id = et.transcript_id + AND et.exon_id = sf.exon_id + AND sf.feature_id = f.${feature_type}_id + AND sf.feature_type = ? + AND f.hit_name=? + $anal_where + ); - $sth->bind_param(1, $feature_type, SQL_VARCHAR); - $sth->bind_param(2, $hit_name, SQL_VARCHAR); - $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER) if ($analysis); + my $sth = $self->prepare($sql); - $sth->execute(); + $sth->bind_param(1, $feature_type, SQL_VARCHAR); + $sth->bind_param(2, $hit_name, SQL_VARCHAR); + $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER) if ($analysis); - my @genes; + $sth->execute(); - while( my $id = $sth->fetchrow_array ) { - my $gene = $self->fetch_by_dbID( $id ); - push(@genes, $gene) if $gene; - } + my @genes; - return \@genes; + while ( my $id = $sth->fetchrow_array ) { + my $gene = $self->fetch_by_dbID($id); + push(@genes, $gene) if $gene; + } + return \@genes; } =head2 fetch_all_by_transcript_supporting_evidence - Arg [1] : string hit_name - Arg [2] : string feature type - (one of "dna_align_feature" or "protein_align_feature") + Arg [1] : String $hit_name + Name of supporting feature + Arg [2] : String $feature_type + one of "dna_align_feature" or "protein_align_feature" Arg [3] : (optional) Bio::Ensembl::Analysis - Example : $genes = $gene_adaptor->fetch_all_by_transcript_supporting_evidence(); - Description: Gets all the genes with transcripts with evidence for a specified hit on a particular - type of feature. Optionally filter by analysis. + Example : $genes = $gene_adaptor->fetch_all_by_transcript_supporting_evidence('XYZ', 'dna_align_feature'); + Description: Gets all the genes with transcripts with evidence for a + specified hit on a particular type of feature. Optionally filter + by analysis. Returntype : Listref of Bio::EnsEMBL::Gene Exceptions : If feature_type is not of correct type. - Caller : ? + Caller : general Status : At Risk =cut sub fetch_all_by_transcript_supporting_evidence { + my ($self, $hit_name, $feature_type, $analysis) = @_; - my ($self, $hit_name, $feature_type, $analysis) = @_; - - if($feature_type !~ /(dna)|(protein)_align_feature/) { - throw("feature type must be dna_align_feature or protein_align_feature"); - } - - my $anal_from = ", analysis a " if ($analysis); - my $anal_where = "AND a.analysis_id = f.analysis_id AND a.analysis_id=? " if ($analysis); - - my $sql = "SELECT DISTINCT(g.gene_id) - FROM gene g, - transcript t, - transcript_supporting_feature sf, - $feature_type f - $anal_from - WHERE g.gene_id = t.gene_id - AND t.transcript_id = sf.transcript_id - AND sf.feature_id = f.${feature_type}_id - AND sf.feature_type = ? - AND f.hit_name=? - $anal_where"; + if($feature_type !~ /(dna)|(protein)_align_feature/) { + throw("feature type must be dna_align_feature or protein_align_feature"); + } - my $sth = $self->prepare($sql); + my $anal_from = ", analysis a " if ($analysis); + my $anal_where = "AND a.analysis_id = f.analysis_id AND a.analysis_id=? " if ($analysis); + + my $sql = qq( + SELECT DISTINCT(g.gene_id) + FROM gene g, + transcript t, + transcript_supporting_feature sf, + $feature_type f + $anal_from + WHERE g.gene_id = t.gene_id + AND g.is_current = 1 + AND t.transcript_id = sf.transcript_id + AND sf.feature_id = f.${feature_type}_id + AND sf.feature_type = ? + AND f.hit_name=? + $anal_where + ); - $sth->bind_param(1, $feature_type, SQL_VARCHAR); - $sth->bind_param(2, $hit_name, SQL_VARCHAR); - $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER) if ($analysis); + my $sth = $self->prepare($sql); - $sth->execute(); + $sth->bind_param(1, $feature_type, SQL_VARCHAR); + $sth->bind_param(2, $hit_name, SQL_VARCHAR); + $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER) if ($analysis); - my @genes; + $sth->execute(); - while( my $id = $sth->fetchrow_array ) { - my $gene = $self->fetch_by_dbID($id); - push(@genes, $gene) if $gene; - } + my @genes; - return \@genes; + while( my $id = $sth->fetchrow_array ) { + my $gene = $self->fetch_by_dbID($id); + push(@genes, $gene) if $gene; + } + return \@genes; } - - ########################## # # # DEPRECATED METHODS # @@ -1491,25 +1519,24 @@ sub fetch_all_by_transcript_supporting_evidence { Description: DEPRECATED - use fetch_all_by_external_name instead - =cut sub fetch_by_maximum_DBLink { - my ( $self, $external_id ) = @_; + my ($self, $external_id) = @_; deprecate( "use fetch_all_by_external_name instead" ); - my $genes=$self->fetch_all_by_external_name( $external_id); + my $genes=$self->fetch_all_by_external_name($external_id); my $biggest; - my $max=0; - my $size=scalar(@$genes); + my $max = 0; + my $size = scalar(@$genes); if ($size > 0) { foreach my $gene (@$genes) { my $size = scalar(@{$gene->get_all_Exons}); if ($size > $max) { $biggest = $gene; - $max=$size; + $max = $size; } } return $biggest; @@ -1518,7 +1545,6 @@ sub fetch_by_maximum_DBLink { } - =head2 get_display_xref Description: DEPRECATED use $gene->display_xref @@ -1526,39 +1552,40 @@ sub fetch_by_maximum_DBLink { =cut sub get_display_xref { - my ($self, $gene ) = @_; + my ($self, $gene) = @_; deprecate( "display xref should retrieved from Gene object directly" ); - if( !defined $gene ) { - throw("Must call with a Gene object"); + if ( !defined $gene ) { + throw("Must call with a Gene object"); } - my $sth = $self->prepare("SELECT e.db_name, - x.display_label, - x.xref_id - FROM gene g, - xref x, - external_db e - WHERE g.gene_id = ? - AND g.display_xref_id = x.xref_id - AND x.external_db_id = e.external_db_id - "); - $sth->bind_param(1,$gene->dbID,SQL_INTEGER); + my $sth = $self->prepare(qq( + SELECT e.db_name, + x.display_label, + x.xref_id + FROM gene g, + xref x, + external_db e + WHERE g.gene_id = ? + AND g.display_xref_id = x.xref_id + AND x.external_db_id = e.external_db_id + )); + + $sth->bind_param(1, $gene->dbID, SQL_INTEGER); $sth->execute(); - - my ($db_name, $display_label, $xref_id ) = $sth->fetchrow_array(); - if( !defined $xref_id ) { + my ($db_name, $display_label, $xref_id) = $sth->fetchrow_array(); + if ( !defined $xref_id ) { return undef; } - my $db_entry = Bio::EnsEMBL::DBEntry->new - ( + + my $db_entry = Bio::EnsEMBL::DBEntry->new( -dbid => $xref_id, -adaptor => $self->db->get_DBEntryAdaptor(), -dbname => $db_name, -display_id => $display_label - ); + ); return $db_entry; } @@ -1575,23 +1602,22 @@ sub get_description { deprecate( "Gene description should be loaded on gene retrieval. Use gene->get_description()" ); - if( !defined $dbID ) { - throw("must call with dbID"); + if ( !defined $dbID ) { + throw("must call with dbID"); } my $sth = $self->prepare("SELECT description FROM gene_description WHERE gene_id = ?"); - $sth->bind_param(1,$dbID,SQL_INTEGER); - + + $sth->bind_param(1, $dbID, SQL_INTEGER); $sth->execute(); + my @array = $sth->fetchrow_array(); return $array[0]; } - - =head2 fetch_by_Peptide_id Description: DEPRECATED, use fetch_by_translation_stable_id() @@ -1599,11 +1625,12 @@ sub get_description { =cut sub fetch_by_Peptide_id { - my ( $self, $translation_stable_id) = @_; + my ( $self, $translation_stable_id) = @_; - deprecate( "Please use better named fetch_by_translation_stable_id \n".caller(2) ); + deprecate( "Please use better named fetch_by_translation_stable_id \n". + caller(2) ); - $self->fetch_by_translation_stable_id($translation_stable_id); + $self->fetch_by_translation_stable_id($translation_stable_id); } @@ -1616,10 +1643,10 @@ sub fetch_by_Peptide_id { sub get_stable_entry_info { my ($self,$gene) = @_; - deprecated( "stable id info is loaded on default, no lazy loading necessary" ); + deprecated("stable id info is loaded on default, no lazy loading necessary"); - if( !defined $gene || !ref $gene || !$gene->isa('Bio::EnsEMBL::Gene') ) { - throw("Needs a gene object, not a $gene"); + if ( !defined $gene || !ref $gene || !$gene->isa('Bio::EnsEMBL::Gene') ) { + throw("Needs a gene object, not a $gene"); } my $created_date = $self->db->dbc->from_date_to_seconds("created_date"); @@ -1630,7 +1657,7 @@ sub get_stable_entry_info { FROM gene_stable_id WHERE gene_id = ?"); - $sth->bind_param(1,$gene->dbID,SQL_INTEGER); + $sth->bind_param(1, $gene->dbID, SQL_INTEGER); $sth->execute(); my @array = $sth->fetchrow_array(); @@ -1643,7 +1670,6 @@ sub get_stable_entry_info { } - =head2 fetch_all_by_DBEntry Description: DEPRECATED - Use fetch_all_by_external_name instead @@ -1652,13 +1678,12 @@ sub get_stable_entry_info { sub fetch_all_by_DBEntry { my $self = shift; - deprecate('This method has been deprecated because there was another.' . - "Method which did exactly the same thing.\n" . - 'Use fetch_all_by_external_name instead.'); + + deprecate('Use fetch_all_by_external_name instead.'); + return $self->fetch_all_by_external_name(@_); } - 1; diff --git a/modules/Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm index 307074c427decaecb2d4a5aeef88d75fa9c90b48..665f9831d7759407f5fc8aa2b59da19a60de1682 100644 --- a/modules/Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/TranscriptAdaptor.pm @@ -1,26 +1,10 @@ -# EnsEMBL Transcript reading writing adaptor for mySQL -# -# Copyright EMBL-EBI 2001 -# -# Author: Arne Stabenau -# based on -# Elia Stupkas Gene_Obj -# -# Date : 20.02.2001 -# +package Bio::EnsEMBL::DBSQL::TranscriptAdaptor; =head1 NAME Bio::EnsEMBL::DBSQL::TranscriptAdaptor - An adaptor which performs database interaction relating to the storage and retrieval of Transcripts -=head1 DESCRIPTION - -This adaptor provides a means to retrieve and store information related to -Transcripts. Primarily this involves the retrieval or storage of -Bio::EnsEMBL::Transcript objects from a database. -See Bio::EnsEMBL::Transcript for details of the Transcript class. - =head1 SYNOPSIS $db = Bio::EnsEMBL::DBSQL::DBAdaptor->new(...); @@ -37,19 +21,31 @@ See Bio::EnsEMBL::Transcript for details of the Transcript class. ($transcript) = @{$transcript_adaptor->fetch_all_by_external_name('BRCA2')}; +=head1 DESCRIPTION -=head1 CONTACT +This adaptor provides a means to retrieve and store information related to +Transcripts. Primarily this involves the retrieval or storage of +Bio::EnsEMBL::Transcript objects from a database. - Post questions/comments to the EnsEMBL development list: - ensembl-dev@ebi.ac.uk +See Bio::EnsEMBL::Transcript for details of the Transcript class. -=head1 METHODS +=head1 LICENCE -=cut +This code is distributed under an Apache style licence. Please see +http://www.ensembl.org/info/about/code_licence.html for details. -package Bio::EnsEMBL::DBSQL::TranscriptAdaptor; +=head1 AUTHOR + +Arne Stabenau <stabenau@ebi.ac.uk>, Ensembl core API team +Based on Elia Stupkas Gene_Obj + +=head1 CONTACT + +Please post comments/questions to the Ensembl development list +<ensembl-dev@ebi.ac.uk> + +=cut -use vars qw(@ISA); use strict; use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor; @@ -57,20 +53,16 @@ use Bio::EnsEMBL::Gene; use Bio::EnsEMBL::Exon; use Bio::EnsEMBL::Transcript; use Bio::EnsEMBL::Translation; - use Bio::EnsEMBL::Utils::Exception qw( deprecate throw warning ); +use vars qw(@ISA); @ISA = qw( Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor ); - - # _tables # -# Arg [1] : none -# Example : none -# Description: PROTECTED implementation of superclass abstract method -# returns the names, aliases of the tables to use for queries +# Description: PROTECTED implementation of superclass abstract method. +# Returns the names, aliases of the tables to use for queries. # Returntype : list of listrefs of strings # Exceptions : none # Caller : internal @@ -86,10 +78,8 @@ sub _tables { #_columns # -# Arg [1] : none -# Example : none -# Description: PROTECTED implementation of superclass abstract method -# returns a list of columns to use for queries +# Description: PROTECTED implementation of superclass abstract method. +# Returns a list of columns to use for queries. # Returntype : list of strings # Exceptions : none # Caller : internal @@ -101,11 +91,13 @@ sub _columns { my $created_date = $self->db->dbc->from_date_to_seconds("created_date"); my $modified_date = $self->db->dbc->from_date_to_seconds("modified_date"); - return ( 't.transcript_id', 't.seq_region_id', 't.seq_region_start', 't.seq_region_end', - 't.seq_region_strand', 't.analysis_id', 't.gene_id', + return ( 't.transcript_id', 't.seq_region_id', 't.seq_region_start', + 't.seq_region_end', 't.seq_region_strand', 't.analysis_id', + 't.gene_id', 't.is_current', 't.display_xref_id', 'tsi.stable_id','tsi.version', $created_date, $modified_date, 't.description', 't.biotype', 't.status', - 'x.display_label', 'exdb.db_name' ,'exdb.status', 'exdb.db_display_name', + 'x.display_label', 'exdb.db_name' ,'exdb.status', + 'exdb.db_display_name', 'x.info_type', 'x.info_text'); } @@ -117,13 +109,12 @@ sub _left_join { } - =head2 fetch_by_stable_id - Arg [1] : string $stable_id + Arg [1] : String $stable_id The stable id of the transcript to retrieve - Example : $trans = $trans_adptr->fetch_by_stable_id('ENST00000309301'); - Description: Retrieves a transcript via its stable id + Example : my $tr = $tr_adaptor->fetch_by_stable_id('ENST00000309301'); + Description: Retrieves a transcript via its stable id. Returntype : Bio::EnsEMBL::Transcript Exceptions : none Caller : general @@ -131,33 +122,47 @@ sub _left_join { =cut -sub fetch_by_stable_id{ - my ($self,$id) = @_; +sub fetch_by_stable_id { + my ($self, $stable_id) = @_; - # because of the way this query is constructed (with a left join to the - # transcript_stable_id table), it is faster to do 2 queries, getting the - # transcript_id in the first query - my $sth = $self->prepare("SELECT transcript_id from transcript_stable_id ". - "WHERE stable_id = ?"); - $sth->bind_param(1,$id,SQL_VARCHAR); - $sth->execute(); + my $constraint = "tsi.stable_id = '$stable_id' AND t.is_current = 1"; + my ($transcript) = @{ $self->generic_fetch($constraint) }; - my ($dbID) = $sth->fetchrow_array(); + return $transcript; +} - return undef if(!$dbID); - return $self->fetch_by_dbID($dbID); +=head2 fetch_all_versions_by_stable_id -} + Arg [1] : String $stable_id + The stable ID of the transcript to retrieve + Example : my $tr = $tr_adaptor->fetch_all_version_by_stable_id + ('ENST00000309301'); + Description : Similar to fetch_by_stable_id, but retrieves all versions of a + transcript stored in the database. + Returntype : listref of Bio::EnsEMBL::Transcript objects + Exceptions : if we cant get the gene in given coord system + Caller : general + Status : At Risk +=cut + +sub fetch_all_versions_by_stable_id { + my ($self, $stable_id) = @_; + + my $constraint = "tsi.stable_id = '$stable_id'"; + + return $self->generic_fetch($constraint); +} =head2 fetch_by_translation_stable_id - Arg [1] : string $transl_stable_id + Arg [1] : String $transl_stable_id The stable identifier of the translation of the transcript to retrieve - Example : $t = $tadptr->fetch_by_translation_stable_id('ENSP00000311007'); + Example : my $tr = $tr_adaptor->fetch_by_translation_stable_id + ('ENSP00000311007'); Description: Retrieves a Transcript object using the stable identifier of its translation. Returntype : Bio::EnsEMBL::Transcript @@ -170,12 +175,18 @@ sub fetch_by_stable_id{ sub fetch_by_translation_stable_id { my ($self, $transl_stable_id ) = @_; - my $sth = $self->prepare( "SELECT t.transcript_id " . - "FROM translation_stable_id tsi, translation t ". - "WHERE tsi.stable_id = ? " . - "AND t.translation_id = tsi.translation_id"); + my $sth = $self->prepare(qq( + SELECT t.transcript_id + FROM translation_stable_id tsi, + transcript t, + translation tl + WHERE tsi.stable_id = ? + AND tl.translation_id = tsi.translation_id + AND tl.transcript_id = t.transcript_id + AND t.is_current = 1 + )); - $sth->bind_param(1,"$transl_stable_id",SQL_VARCHAR); + $sth->bind_param(1, $transl_stable_id, SQL_VARCHAR); $sth->execute(); my ($id) = $sth->fetchrow_array; @@ -190,10 +201,10 @@ sub fetch_by_translation_stable_id { =head2 fetch_by_translation_id - Arg [1] : int $id + Arg [1] : Int $id The internal identifier of the translation whose transcript - is to be retrieved. - Example : $tr = $tr_adaptor->fetch_by_translation_id($transl->dbID()); + is to be retrieved + Example : my $tr = $tr_adaptor->fetch_by_translation_id($transl->dbID); Description: Given the internal identifier of a translation this method retrieves the transcript associated with that translation. If the transcript cannot be found undef is returned instead. @@ -208,13 +219,13 @@ sub fetch_by_translation_id { my $self = shift; my $id = shift; - throw("id argument is required.") if(!$id); + throw("id argument is required.") unless ($id); my $sth = $self->prepare( "SELECT t.transcript_id " . "FROM translation t ". "WHERE t.translation_id = ?"); - $sth->bind_param(1,$id,SQL_INTEGER); + $sth->bind_param(1, $id, SQL_INTEGER); $sth->execute(); my ($dbID) = $sth->fetchrow_array; @@ -227,21 +238,21 @@ sub fetch_by_translation_id { } - =head2 fetch_all_by_Gene Arg [1] : Bio::EnsEMBL::Gene $gene - Example : none - Description: retrieves Transcript objects for given gene. Puts Genes slice + The gene to fetch transcripts of + Example : my $gene = $gene_adaptor->fetch_by_stable_id('ENSG0000123'); + my @transcripts = $tr_adaptor->fetch_all_by_Gene($gene); + Description: Retrieves Transcript objects for given gene. Puts Genes slice in each Transcript. - Returntype : listref Bio::EnsEMBL::Transcript + Returntype : Listref of Bio::EnsEMBL::Transcript objects Exceptions : none Caller : Gene->get_all_Transcripts() Status : Stable =cut - sub fetch_all_by_Gene { my $self = shift; my $gene = shift; @@ -259,19 +270,19 @@ sub fetch_all_by_Gene { my $gslice = $gene->slice(); my $slice; - if(!$gslice) { + if (!$gslice) { throw("Gene must have attached slice to retrieve transcripts."); } - if($gene->start() < 1 || $gene->end() > $gslice->length()) { + if ($gene->start() < 1 || $gene->end() > $gslice->length()) { $slice = $self->db->get_SliceAdaptor->fetch_by_Feature($gene); } else { $slice = $gslice; } - my $transcripts = $self->fetch_all_by_Slice_constraint($slice,$constraint); + my $transcripts = $self->fetch_all_by_Slice_constraint($slice, $constraint); - if($slice != $gslice) { + if ($slice != $gslice) { my @out; foreach my $tr (@$transcripts) { push @out, $tr->transfer($gslice); @@ -279,27 +290,25 @@ sub fetch_all_by_Gene { $transcripts = \@out; } - return $transcripts; } - =head2 fetch_all_by_Slice Arg [1] : Bio::EnsEMBL::Slice $slice - The slice to fetch transcripts on. - Arg [2] : (optional) boolean $load_exons - if true, exons will be loaded immediately rather than - lazy loaded later. - Arg [3] : (optional) string $logic_name - the logic name of the type of features to obtain - Example : @transcripts = @{ $tr_adaptor->fetch_all_by_Slice($slice) }; + The slice to fetch transcripts on + Arg [2] : (optional) Boolean $load_exons + If true, exons will be loaded immediately rather than + lazy loaded later + Arg [3] : (optional) String $logic_name + The logic name of the type of features to obtain + Example : my @transcripts = @{ $tr_adaptor->fetch_all_by_Slice($slice) }; Description: Overrides superclass method to optionally load exons - immediately rather than lazy-loading them later. This + immediately rather than lazy-loading them later. This is more efficient when there are a lot of transcripts whose exons are going to be used. - Returntype : reference to list of transcripts + Returntype : Listref of Bio::EnsEMBL::Transcript objects Exceptions : thrown if exon cannot be placed on transcript slice Caller : Slice::get_all_Transcripts Status : Stable @@ -312,10 +321,11 @@ sub fetch_all_by_Slice { my $load_exons = shift; my $logic_name = shift; - my $transcripts = $self->SUPER::fetch_all_by_Slice($slice, $logic_name); + my $transcripts = $self->SUPER::fetch_all_by_Slice_constraint($slice, + 't.is_current = 1', $logic_name); # if there are 0 or 1 transcripts still do lazy-loading - if(!$load_exons || @$transcripts < 2) { + if (!$load_exons || @$transcripts < 2) { return $transcripts; } @@ -325,7 +335,6 @@ sub fetch_all_by_Slice { # first check if the exons are already preloaded return $transcripts if( exists $transcripts->[0]->{'_trans_exon_array'}); - # get extent of region spanned by transcripts my ($min_start, $max_end); foreach my $tr (@$transcripts) { @@ -379,9 +388,9 @@ sub fetch_all_by_Slice { foreach my $ex (@$exons) { my $new_ex; - if($slice != $ext_slice) { + if ($slice != $ext_slice) { $new_ex = $ex->transfer($slice) if($slice != $ext_slice); - if(!$new_ex) { + if (!$new_ex) { throw("Unexpected. Exon could not be transfered onto transcript slice."); } } else { @@ -403,23 +412,23 @@ sub fetch_all_by_Slice { } - =head2 fetch_all_by_external_name - Arg [1] : string $external_id + Arg [1] : String $external_id An external identifier of the transcript to be obtained - Example : @trans = @{$tr_adaptor->fetch_all_by_external_name('ARSE')}; + Example : my @transcripts = @{ $tr_adaptor->fetch_all_by_external_name + ('ARSE') }; Description: Retrieves all transcripts which are associated with an external identifier such as a GO term, HUGO id, Swissprot - identifer, etc. Usually there will only be a single transcript - returned in the listref, but not always. Transcripts are - returned in their native coordinate system. That is, the + identifer, etc. Usually there will only be a single transcript + returned in the listref, but not always. Transcripts are + returned in their native coordinate system. That is, the coordinate system in which they are stored in the database. If they are required in another coordinate system the Transcript::transfer or Transcript::transform method can be - used to convert them. If no transcripts with the external + used to convert them. If no transcripts with the external identifier are found, a reference to an empty list is returned. - Returntype : reference to a list of transcripts + Returntype : Listref of Bio::EnsEMBL::Transcript objects Exceptions : none Caller : general Status : Stable @@ -441,10 +450,11 @@ sub fetch_all_by_external_name { =head2 fetch_by_display_label - Arg [1] : string $label - Example : my $transcript = $transcriptAdaptor->fetch_by_display_label( "BRCA2" ); - Description: returns the transcript which has the given display label or undef if - there is none. If there are more than 1, only the first is reported. + Arg [1] : String $label - display label of transcript to fetch + Example : my $tr = $tr_adaptor->fetch_by_display_label("BRCA2"); + Description: Returns the transcript which has the given display label or + undef if there is none. If there are more than 1, only the first + is reported. Returntype : Bio::EnsEMBL::Transcript Exceptions : none Caller : general @@ -456,18 +466,21 @@ sub fetch_by_display_label { my $self = shift; my $label = shift; - my ( $transcript ) = @{$self->generic_fetch( "x.display_label = \"$label\"" )}; + my $constraint = "x.display_label = '$label' AND t.is_current = 1"; + my ($transcript) = @{ $self->generic_fetch($constraint) }; + return $transcript; } =head2 fetch_all_by_exon_stable_id - Arg [1] : string $stable_id + Arg [1] : String $stable_id The stable id of an exon in a transcript - Example : $trans = $trans_adptr->fetch_all_by_exon_stable_id('ENSE00000309301'); - Description: Retrieves a list of transcripts via an exon stable id - Returntype : Bio::EnsEMBL::Transcript + Example : my $tr = $tr_adaptor->fetch_all_by_exon_stable_id + ('ENSE00000309301'); + Description: Retrieves a list of transcripts via an exon stable id. + Returntype : Listref of Bio::EnsEMBL::Transcript objects Exceptions : none Caller : general Status : Stable @@ -475,18 +488,24 @@ sub fetch_by_display_label { =cut sub fetch_all_by_exon_stable_id { - my ($self, $stable_id ) = @_; + my ($self, $stable_id) = @_; + my @trans ; - my $sth = $self->prepare( qq( SELECT et.transcript_id - FROM exon_transcript as et, - exon_stable_id as esi - WHERE esi.exon_id = et.exon_id and - esi.stable_id = ? )); - $sth->bind_param(1,"$stable_id",SQL_VARCHAR); + + my $sth = $self->prepare(qq( + SELECT t.transcript_id + FROM exon_transcript et, exon_stable_id esi, transcript t + WHERE esi.exon_id = et.exon_id + AND et.transcript_id = t.transcript_id + AND esi.stable_id = ? + AND t.is_current = 1 + )); + + $sth->bind_param(1, $stable_id, SQL_VARCHAR); $sth->execute(); while( my $id = $sth->fetchrow_array ) { - my $transcript = $self->fetch_by_dbID( $id ); + my $transcript = $self->fetch_by_dbID($id); push(@trans, $transcript) if $transcript; } @@ -502,18 +521,18 @@ sub fetch_all_by_exon_stable_id { Arg [1] : Bio::EnsEMBL::Transcript $transcript The transcript to be written to the database - Arg [2] : int $gene_dbID + Arg [2] : Int $gene_dbID The identifier of the gene that this transcript is associated with - Arg [3] : (optional) int $analysis_id + Arg [3] : DEPRECATED (optional) Int $analysis_id The analysis_id to use when storing this gene. This is for backward compatibility only and used to fall back to the gene analysis_id if no analysis object is attached to the transcript (which you should do for new code). - Example : $transID = $transcriptAdaptor->store($transcript, $gene->dbID); + Example : $transID = $tr_adaptor->store($transcript, $gene->dbID); Description: Stores a transcript in the database and returns the new internal identifier for the stored transcript. - Returntype : int + Returntype : Int Exceptions : none Caller : general Status : Stable @@ -536,6 +555,10 @@ sub store { #force lazy-loading of exons and ensure coords are correct $transcript->recalculate_coordinates(); + # default to is_current = 1 if this attribute is not set + my $is_current = $transcript->is_current; + $is_current = 1 unless (defined($is_current)); + # store analysis my $analysis = $transcript->analysis(); my $new_analysis_id; @@ -565,7 +588,7 @@ sub store { my $exons = $transcript->get_all_Exons(); my $exonAdaptor = $db->get_ExonAdaptor(); foreach my $exon ( @{$exons} ) { - $exonAdaptor->store( $exon ); + $exonAdaptor->store($exon); } my $original_translation = $transcript->translation(); @@ -585,19 +608,21 @@ sub store { my $tst = $self->prepare(qq( INSERT INTO transcript (gene_id, analysis_id, seq_region_id, seq_region_start, - seq_region_end, seq_region_strand, biotype, status, description) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + seq_region_end, seq_region_strand, biotype, status, description, + is_current) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) )); - $tst->bind_param(1,$gene_dbID,SQL_INTEGER); - $tst->bind_param(2,$new_analysis_id,SQL_INTEGER); - $tst->bind_param(3,$seq_region_id,SQL_INTEGER); - $tst->bind_param(4,$transcript->start,SQL_INTEGER); - $tst->bind_param(5,$transcript->end,SQL_INTEGER); - $tst->bind_param(6,$transcript->strand,SQL_TINYINT); - $tst->bind_param(7,$transcript->biotype,SQL_VARCHAR); - $tst->bind_param(8,$transcript->status,SQL_VARCHAR); - $tst->bind_param(9,$transcript->description,SQL_LONGVARCHAR); + $tst->bind_param(1, $gene_dbID, SQL_INTEGER); + $tst->bind_param(2, $new_analysis_id, SQL_INTEGER); + $tst->bind_param(3, $seq_region_id, SQL_INTEGER); + $tst->bind_param(4, $transcript->start, SQL_INTEGER); + $tst->bind_param(5, $transcript->end, SQL_INTEGER); + $tst->bind_param(6, $transcript->strand, SQL_TINYINT); + $tst->bind_param(7, $transcript->biotype, SQL_VARCHAR); + $tst->bind_param(8, $transcript->status, SQL_VARCHAR); + $tst->bind_param(9, $transcript->description, SQL_LONGVARCHAR); + $tst->bind_param(10, $is_current, SQL_INTEGER); $tst->execute(); $tst->finish(); @@ -679,8 +704,8 @@ sub store { if(defined($dxref_id)) { my $sth = $self->prepare( "update transcript set display_xref_id = ?". " where transcript_id = ?"); - $sth->bind_param(1,$dxref_id,SQL_INTEGER); - $sth->bind_param(2,$transc_dbID,SQL_INTEGER); + $sth->bind_param(1, $dxref_id, SQL_INTEGER); + $sth->bind_param(2, $transc_dbID, SQL_INTEGER); $sth->execute(); $dxref->dbID($dxref_id); $dxref->adaptor($dbEntryAdaptor); @@ -702,9 +727,9 @@ sub store { ." values (?,?,?)"); my $rank = 1; foreach my $exon ( @{$transcript->get_all_Exons} ) { - $etst->bind_param(1,$exon->dbID,SQL_INTEGER); - $etst->bind_param(2,$transc_dbID,SQL_INTEGER); - $etst->bind_param(3,$rank,SQL_INTEGER); + $etst->bind_param(1, $exon->dbID, SQL_INTEGER); + $etst->bind_param(2, $transc_dbID, SQL_INTEGER); + $etst->bind_param(3, $rank, SQL_INTEGER); $etst->execute(); $rank++; } @@ -726,20 +751,11 @@ sub store { " stable_id = ?, ". "version = ?, "; - $statement .= "created_date = " . $self->db->dbc->from_seconds_to_date($transcript->created_date()) . ","; - -# if( $transcript->created_date() ) { -# $statement .= "created_date = from_unixtime( ".$transcript->created_date()."),"; -# } else { -# $statement .= "created_date = \"0000-00-00 00:00:00\","; -# } - $statement .= "modified_date = " . $self->db->dbc->from_seconds_to_date($transcript->modified_date()) ; + $statement .= "created_date = " . + $self->db->dbc->from_seconds_to_date($transcript->created_date()) . ","; -# if( $transcript->modified_date() ) { -# $statement .= "modified_date = from_unixtime( ".$transcript->modified_date().")"; -# } else { -# $statement .= "modified_date = \"0000-00-00 00:00:00\""; -# } + $statement .= "modified_date = " . + $self->db->dbc->from_seconds_to_date($transcript->modified_date()) ; my $sth = $self->prepare($statement); $sth->bind_param(1,$transc_dbID,SQL_INTEGER); @@ -749,11 +765,10 @@ sub store { $sth->finish(); } - - # Now the supporting evidence # should be stored from featureAdaptor - my $sql = "insert into transcript_supporting_feature (transcript_id, feature_id, feature_type) + my $sql = "insert into transcript_supporting_feature + (transcript_id, feature_id, feature_type) values(?, ?, ?)"; my $sf_sth = $self->prepare($sql); @@ -780,9 +795,9 @@ sub store { next; } - $sf_sth->bind_param(1,$transc_dbID,SQL_INTEGER); - $sf_sth->bind_param(2,$sf->dbID,SQL_INTEGER); - $sf_sth->bind_param(3,$type,SQL_VARCHAR); + $sf_sth->bind_param(1, $transc_dbID, SQL_INTEGER); + $sf_sth->bind_param(2, $sf->dbID, SQL_INTEGER); + $sf_sth->bind_param(3, $type, SQL_VARCHAR); $sf_sth->execute(); } @@ -802,13 +817,13 @@ sub store { =head2 get_Interpro_by_transid - Arg [1] : string $trans - the stable if of the trans to obtain - Example : @i = $trans_adaptor->get_Interpro_by_transid($trans->stable_id()); - Description: gets interpro accession numbers by transcript stable id. + Arg [1] : String $trans_stable_id + The stable if of the transcript to obtain + Example : @i = $tr_adaptor->get_Interpro_by_transid($trans->stable_id()); + Description: Gets interpro accession numbers by transcript stable id. A hack really - we should have a much more structured - system than this - Returntype : listref of strings + system than this. + Returntype : listref of strings (Interpro_acc:description) Exceptions : none Caller : domainview? , GeneView Status : Stable @@ -816,48 +831,51 @@ sub store { =cut sub get_Interpro_by_transid { - my ($self,$transid) = @_; - - my $sth = $self->prepare - ("SELECT STRAIGHT_JOIN i.interpro_ac, x.description " . - "FROM transcript_stable_id tsi, ". - "translation tl, ". - "protein_feature pf, ". - "interpro i, " . - "xref x " . - "WHERE tsi.stable_id = ? " . - "AND tl.transcript_id = tsi.transcript_id " . - "AND tl.translation_id = pf.translation_id " . - "AND i.id = pf.hit_id " . - "AND i.interpro_ac = x.dbprimary_acc"); - - $sth->bind_param(1,$transid,SQL_VARCHAR); - $sth->execute(); - - my @out; - my %h; - while( (my $arr = $sth->fetchrow_arrayref()) ) { - if( $h{$arr->[0]} ) { next; } - $h{$arr->[0]}=1; - my $string = $arr->[0] .":".$arr->[1]; - push(@out,$string); - } - - return \@out; -} + my ($self,$trans_stable_id) = @_; + + my $sth = $self->prepare(qq( + SELECT STRAIGHT_JOIN i.interpro_ac, x.description + FROM transcript_stable_id tsi, + transcript t, + translation tl, + protein_feature pf, + interpro i, + xref x + WHERE tsi.stable_id = ? + AND tl.transcript_id = tsi.transcript_id + AND tl.translation_id = pf.translation_id + AND i.id = pf.hit_id + AND i.interpro_ac = x.dbprimary_acc + AND tsi.transcript_id = t.transcript_id + AND t.is_current = 1 + )); + $sth->bind_param(1, $trans_stable_id, SQL_VARCHAR); + $sth->execute(); + my @out; + my %h; + while( (my $arr = $sth->fetchrow_arrayref()) ) { + if( $h{$arr->[0]} ) { next; } + $h{$arr->[0]}=1; + my $string = $arr->[0] .":".$arr->[1]; + push(@out,$string); + } + + return \@out; +} =head2 remove Arg [1] : Bio::EnsEMBL::Transcript $transcript - Example : $transcript_adaptor->remove($transcript); + The transcript to remove from the database + Example : $tr_adaptor->remove($transcript); Description: Removes a transcript completely from the database, and all associated information. This method is usually called by the GeneAdaptor::remove method because this method will not preform the removal of genes - which are associated with this transcript. Do not call this + which are associated with this transcript. Do not call this method directly unless you know there are no genes associated with the transcript! Returntype : none @@ -883,7 +901,7 @@ sub remove { "not PredictionTranscripts"); } - if( !$transcript->is_stored($self->db()) ) { + if ( !$transcript->is_stored($self->db()) ) { warning("Cannot remove transcript ". $transcript->dbID .". Is not stored ". "in this database."); return; @@ -897,7 +915,7 @@ sub remove { my $sfsth = $self->prepare("SELECT feature_type, feature_id " . "FROM transcript_supporting_feature " . "WHERE transcript_id = ?"); - $sfsth->bind_param(1,$transcript->dbID,SQL_INTEGER); + $sfsth->bind_param(1, $transcript->dbID, SQL_INTEGER); $sfsth->execute(); while(my ($type, $feature_id) = $sfsth->fetchrow()){ if($type eq 'protein_align_feature'){ @@ -917,7 +935,7 @@ sub remove { # delete the association to supporting features $sfsth = $self->prepare("DELETE FROM transcript_supporting_feature WHERE transcript_id = ?"); - $sfsth->bind_param(1,$transcript->dbID,SQL_INTEGER); + $sfsth->bind_param(1, $transcript->dbID, SQL_INTEGER); $sfsth->execute(); $sfsth->finish(); @@ -950,7 +968,7 @@ sub remove { my $sth = $self->prepare( "SELECT count(*) FROM exon_transcript WHERE exon_id = ?" ); - $sth->bind_param(1,$exon->dbID,SQL_INTEGER); + $sth->bind_param(1, $exon->dbID, SQL_INTEGER); $sth->execute(); my ($count) = $sth->fetchrow_array(); $sth->finish(); @@ -962,18 +980,18 @@ sub remove { my $sth = $self->prepare( "DELETE FROM exon_transcript WHERE transcript_id = ?" ); - $sth->bind_param(1,$transcript->dbID,SQL_INTEGER); + $sth->bind_param(1, $transcript->dbID, SQL_INTEGER); $sth->execute(); $sth = $self->prepare( "DELETE FROM transcript_stable_id WHERE transcript_id = ?" ); - $sth->bind_param(1,$transcript->dbID,SQL_INTEGER); + $sth->bind_param(1, $transcript->dbID, SQL_INTEGER); $sth->execute(); $sth->finish(); $sth = $self->prepare( "DELETE FROM transcript WHERE transcript_id = ?" ); - $sth->bind_param(1,$transcript->dbID,SQL_INTEGER); + $sth->bind_param(1, $transcript->dbID, SQL_INTEGER); $sth->execute(); $sth->finish(); @@ -984,18 +1002,18 @@ sub remove { } - =head2 update - Arg [1] : Bio::EnsEMBL::Transcript - Example : $transcript_adaptor->update($transcript); - Description: Updates a transcript in the database + Arg [1] : Bio::EnsEMBL::Transcript $transcript + The transcript to update + Example : $tr_adaptor->update($transcript); + Description: Updates a transcript in the database. Returntype : None - Exceptions : thrown if the $transcript is not a Bio::EnsEMBL::Transcript - warn if trying to update the number of attached exons. This - is a far more complex process and is not yet implemented. + Exceptions : thrown if the $transcript is not a Bio::EnsEMBL::Transcript. warn if the method is called on a transcript that does not exist in the database. + Should warn if trying to update the number of attached exons, but + this is a far more complex process and is not yet implemented. Caller : general Status : Stable @@ -1003,6 +1021,7 @@ sub remove { sub update { my ($self, $transcript) = @_; + my $update = 0; if( !defined $transcript || !ref $transcript || @@ -1016,7 +1035,8 @@ sub update { display_xref_id = ?, description = ?, biotype = ?, - status = ? + status = ?, + is_current = ? WHERE transcript_id = ? ); @@ -1031,24 +1051,25 @@ sub update { my $sth = $self->prepare( $update_transcript_sql ); - $sth->bind_param(1,$transcript->analysis->dbID,SQL_INTEGER); - $sth->bind_param(2,$display_xref_id,SQL_INTEGER); - $sth->bind_param(3,$transcript->description,SQL_LONGVARCHAR); - $sth->bind_param(4,$transcript->biotype,SQL_VARCHAR); - $sth->bind_param(5,$transcript->status,SQL_VARCHAR); - $sth->bind_param(6,$transcript->dbID,SQL_INTEGER); + $sth->bind_param(1, $transcript->analysis->dbID, SQL_INTEGER); + $sth->bind_param(2, $display_xref_id, SQL_INTEGER); + $sth->bind_param(3, $transcript->description, SQL_LONGVARCHAR); + $sth->bind_param(4, $transcript->biotype, SQL_VARCHAR); + $sth->bind_param(5, $transcript->status, SQL_VARCHAR); + $sth->bind_param(6, $transcript->is_current, SQL_INTEGER); + $sth->bind_param(7, $transcript->dbID, SQL_INTEGER); $sth->execute(); } + =head2 list_dbIDs - Arg [1] : none - Example : @transcript_ids = @{$transcript_adaptor->list_dbIDs()}; - Description: Gets an array of internal ids for all transcripts in the current db - Returntype : list of ints + Example : @transcript_ids = @{ $t_adaptor->list_dbIDs }; + Description: Gets a list of internal ids for all transcripts in the db. + Returntype : Listref of Ints Exceptions : none - Caller : ? + Caller : general Status : Stable =cut @@ -1059,15 +1080,15 @@ sub list_dbIDs { return $self->_list_dbIDs("transcript"); } -=head2 list_stable_dbIDs - Arg [1] : none - Example : @stable_trans_ids = @{$transcript_adaptor->list_stable_dbIDs()}; - Description: Gets an array of stable ids for all transcripts in the current +=head2 list_stable_ids + + Example : @stable_trans_ids = @{ $transcript_adaptor->list_stable_ids }; + Description: Gets a list of stable ids for all transcripts in the current database. - Returntype : listref of ints + Returntype : Listref of Strings Exceptions : none - Caller : ? + Caller : general Status : Stable =cut @@ -1082,10 +1103,11 @@ sub list_stable_ids { #_objs_from_sth # Arg [1] : StatementHandle $sth -# Example : none +# Arg [2] : Bio::EnsEMBL::AssemblyMapper $mapper +# Arg [3] : Bio::EnsEMBL::Slice $dest_slice # Description: PROTECTED implementation of abstract superclass method. -# responsible for the creation of Transcripts -# Returntype : listref of Bio::EnsEMBL::Transcripts in target coord system +# Responsible for the creation of Transcripts. +# Returntype : Listref of Bio::EnsEMBL::Transcripts in target coord system # Exceptions : none # Caller : internal # Status : Stable @@ -1110,7 +1132,7 @@ sub _objs_from_sth { my %sr_cs_hash; my ( $transcript_id, $seq_region_id, $seq_region_start, $seq_region_end, - $seq_region_strand, $analysis_id, $gene_id, + $seq_region_strand, $analysis_id, $gene_id, $is_current, $display_xref_id, $stable_id, $version, $created_date, $modified_date, $description, $biotype, $status, $external_name, $external_db, $external_status, $external_db_name, @@ -1118,10 +1140,11 @@ sub _objs_from_sth { $sth->bind_columns( \$transcript_id, \$seq_region_id, \$seq_region_start, \$seq_region_end, \$seq_region_strand, \$analysis_id, - \$gene_id, \$display_xref_id, \$stable_id, \$version, - \$created_date, \$modified_date, + \$gene_id, \$is_current, \$display_xref_id, \$stable_id, + \$version, \$created_date, \$modified_date, \$description, \$biotype, \$status, - \$external_name, \$external_db, \$external_status, \$external_db_name, + \$external_name, \$external_db, \$external_status, + \$external_db_name, \$info_type, \$info_text); my $asm_cs; @@ -1243,18 +1266,19 @@ sub _objs_from_sth { my $display_xref; if( $display_xref_id ) { - $display_xref = Bio::EnsEMBL::DBEntry->new_fast - ({ 'dbID' => $display_xref_id, + $display_xref = Bio::EnsEMBL::DBEntry->new_fast({ + 'dbID' => $display_xref_id, 'adaptor' => $dbEntryAdaptor, 'display_id' => $external_name, 'db_display_name' => $external_db_name, - 'dbname' => $external_db}); + 'dbname' => $external_db + }); } #finally, create the new transcript - push @transcripts, Bio::EnsEMBL::Transcript->new - ( '-analysis' => $analysis, + push @transcripts, Bio::EnsEMBL::Transcript->new( + '-analysis' => $analysis, '-start' => $seq_region_start, '-end' => $seq_region_end, '-strand' => $seq_region_strand, @@ -1272,7 +1296,9 @@ sub _objs_from_sth { '-display_xref' => $display_xref, '-description' => $description, '-biotype' => $biotype, - '-status' => $status ); + '-status' => $status, + '-is_current' => $is_current + ); } return \@transcripts; @@ -1281,209 +1307,223 @@ sub _objs_from_sth { =head2 fetch_all_by_exon_supporting_evidence - Arg [1] : string hit_name - Arg [2] : string feature type - (one of "dna_align_feature" or "protein_align_feature") + Arg [1] : String $hit_name + Name of supporting feature + Arg [2] : String $feature_type + one of "dna_align_feature" or "protein_align_feature" Arg [3] : (optional) Bio::Ensembl::Analysis - Example : $transcripts = $transcript_adaptor->fetch_all_by_exon_supporting_evidence(); - Description: Gets all the transcripts with exons which have a specified hit on a particular - type of feature. Optionally filter by analysis. - Returntype : Listref of Bio::EnsEMBL::Transcript + Example : $tr = $tr_adaptor->fetch_all_by_exon_supporting_evidence + ('XYZ', 'dna_align_feature'); + Description: Gets all the transcripts with exons which have a specified hit + on a particular type of feature. Optionally filter by analysis. + Returntype : Listref of Bio::EnsEMBL::Transcript objects Exceptions : If feature_type is not of correct type. - Caller : ? + Caller : general Status : At Risk =cut sub fetch_all_by_exon_supporting_evidence { + my ($self, $hit_name, $feature_type, $analysis) = @_; - my ($self, $hit_name, $feature_type, $analysis) = @_; - - if($feature_type !~ /(dna)|(protein)_align_feature/) { - throw("feature type must be dna_align_feature or protein_align_feature"); - } - - my $anal_from = ", analysis a " if ($analysis); - my $anal_where = "AND a.analysis_id = f.analysis_id AND a.analysis_id=? " if ($analysis); - - my $sql = "SELECT DISTINCT(t.transcript_id) - FROM transcript t, - exon_transcript et, - supporting_feature sf, - $feature_type f - $anal_from - WHERE t.transcript_id = et.transcript_id - AND et.exon_id = sf.exon_id - AND sf.feature_id = f.${feature_type}_id - AND sf.feature_type = ? - AND f.hit_name=? - $anal_where"; + if($feature_type !~ /(dna)|(protein)_align_feature/) { + throw("feature type must be dna_align_feature or protein_align_feature"); + } - my $sth = $self->prepare($sql); + my $anal_from = ", analysis a " if ($analysis); + my $anal_where = "AND a.analysis_id = f.analysis_id AND a.analysis_id=? " + if ($analysis); + + my $sql = qq( + SELECT DISTINCT(t.transcript_id) + FROM transcript t, + exon_transcript et, + supporting_feature sf, + $feature_type f + $anal_from + WHERE t.transcript_id = et.transcript_id + AND t.is_current = 1 + AND et.exon_id = sf.exon_id + AND sf.feature_id = f.${feature_type}_id + AND sf.feature_type = ? + AND f.hit_name=? + $anal_where + ); - $sth->bind_param(1, $feature_type, SQL_VARCHAR); - $sth->bind_param(2, $hit_name, SQL_VARCHAR); - $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER) if ($analysis); + my $sth = $self->prepare($sql); - $sth->execute(); + $sth->bind_param(1, $feature_type, SQL_VARCHAR); + $sth->bind_param(2, $hit_name, SQL_VARCHAR); + $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER) if ($analysis); - my @transcripts; + $sth->execute(); - while( my $id = $sth->fetchrow_array ) { - my $transcript = $self->fetch_by_dbID( $id ); - push(@transcripts, $transcript) if $transcript; - } + my @transcripts; - return \@transcripts; + while( my $id = $sth->fetchrow_array ) { + my $transcript = $self->fetch_by_dbID( $id ); + push(@transcripts, $transcript) if $transcript; + } + return \@transcripts; } =head2 fetch_all_by_transcript_supporting_evidence - Arg [1] : string hit_name - Arg [2] : string feature type - (one of "dna_align_feature" or "protein_align_feature") + Arg [1] : String $hit_name + Name of supporting feature + Arg [2] : String $feature_type + one of "dna_align_feature" or "protein_align_feature" Arg [3] : (optional) Bio::Ensembl::Analysis - Example : $transcripts = $transcript_adaptor->fetch_all_by_transcript_supporting_evidence(); - Description: Gets all the transcripts with evidence for a specified hit on a particular - type of feature. Optionally filter by analysis. - Returntype : Listref of Bio::EnsEMBL::Transcript + Example : $transcripts = $transcript_adaptor->fetch_all_by_transcript_supporting_evidence('XYZ', 'dna_align_feature'); + Description: Gets all the transcripts with evidence for a specified hit on a + particular type of feature. Optionally filter by analysis. + Returntype : Listref of Bio::EnsEMBL::Transcript objects Exceptions : If feature_type is not of correct type. - Caller : ? + Caller : general Status : At Risk =cut sub fetch_all_by_transcript_supporting_evidence { + + my ($self, $hit_name, $feature_type, $analysis) = @_; - my ($self, $hit_name, $feature_type, $analysis) = @_; - - if($feature_type !~ /(dna)|(protein)_align_feature/) { - throw("feature type must be dna_align_feature or protein_align_feature"); - } + if($feature_type !~ /(dna)|(protein)_align_feature/) { + throw("feature type must be dna_align_feature or protein_align_feature"); + } - my $anal_from = ", analysis a " if ($analysis); - my $anal_where = "AND a.analysis_id = f.analysis_id AND a.analysis_id=? " if ($analysis); + my $anal_from = ", analysis a " if ($analysis); + my $anal_where = "AND a.analysis_id = f.analysis_id AND a.analysis_id=? " + if ($analysis); + + my $sql = qq( + SELECT DISTINCT(t.transcript_id) + FROM transcript t, + transcript_supporting_feature sf, + $feature_type f + $anal_from + WHERE t.transcript_id = sf.transcript_id + AND t.is_current = 1 + AND sf.feature_id = f.${feature_type}_id + AND sf.feature_type = ? + AND f.hit_name=? + $anal_where + ); - my $sql = "SELECT DISTINCT(t.transcript_id) - FROM transcript t, - transcript_supporting_feature sf, - $feature_type f - $anal_from - WHERE t.transcript_id = sf.transcript_id - AND sf.feature_id = f.${feature_type}_id - AND sf.feature_type = ? - AND f.hit_name=? - $anal_where"; + my $sth = $self->prepare($sql); - my $sth = $self->prepare($sql); + $sth->bind_param(1, $feature_type, SQL_VARCHAR); + $sth->bind_param(2, $hit_name, SQL_VARCHAR); + $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER) if ($analysis); - $sth->bind_param(1, $feature_type, SQL_VARCHAR); - $sth->bind_param(2, $hit_name, SQL_VARCHAR); - $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER) if ($analysis); + $sth->execute(); - $sth->execute(); + my @transcripts; - my @transcripts; + while( my $id = $sth->fetchrow_array ) { + my $transcript = $self->fetch_by_dbID( $id ); + push(@transcripts, $transcript) if $transcript; + } - while( my $id = $sth->fetchrow_array ) { - my $transcript = $self->fetch_by_dbID( $id ); - push(@transcripts, $transcript) if $transcript; - } + return \@transcripts; +} - return \@transcripts; -} +########################## +# # +# DEPRECATED METHODS # +# # +########################## =head2 get_display_xref - Description: DEPRECATED use $transcript->display_xref() + Description: DEPRECATED. Use $transcript->display_xref() instead. =cut sub get_display_xref { my ($self, $transcript) = @_; - deprecate( "display_xref should be retreived from Transcript object directly." ); + deprecate("display_xref should be retreived from Transcript object directly."); - if( !defined $transcript ) { - throw("Must call with a Transcript object"); + if ( !defined $transcript ) { + throw("Must call with a Transcript object"); } - my $sth = $self->prepare("SELECT e.db_name, - x.display_label, - e.db_external_name, - x.xref_id - FROM transcript t, - xref x, - external_db e - WHERE t.transcript_id = ? - AND t.display_xref_id = x.xref_id - AND x.external_db_id = e.external_db_id - "); - $sth->bind_param(1,$transcript->dbID,SQL_INTEGER); + my $sth = $self->prepare(qq( + SELECT e.db_name, + x.display_label, + e.db_external_name, + x.xref_id + FROM transcript t, + xref x, + external_db e + WHERE t.transcript_id = ? + AND t.display_xref_id = x.xref_id + AND x.external_db_id = e.external_db_id + )); + + $sth->bind_param(1, $transcript->dbID, SQL_INTEGER); $sth->execute(); - - my ($db_name, $display_label, $xref_id, $display_db_name ) = $sth->fetchrow_array(); - if( !defined $xref_id ) { + my ($db_name, $display_label, $xref_id, $display_db_name ) = + $sth->fetchrow_array(); + + if ( !defined $xref_id ) { return undef; } - my $db_entry = Bio::EnsEMBL::DBEntry->new - ( + my $db_entry = Bio::EnsEMBL::DBEntry->new( -dbid => $xref_id, -adaptor => $self->db->get_DBEntryAdaptor(), -dbname => $db_name, -display_id => $display_label -db_display_name => $display_db_name - ); + ); return $db_entry; } - - =head2 get_stable_entry_info - Description: DEPRECATED Use $transcript->stable_id() + Description: DEPRECATED. Use $transcript->stable_id() instead. =cut sub get_stable_entry_info { - my ($self,$transcript) = @_; + my ($self, $transcript) = @_; - deprecate( "Stable ids should be loaded directly now" ); + deprecate("Stable ids should be loaded directly now"); - unless( defined $transcript && ref $transcript && + unless ( defined $transcript && ref $transcript && $transcript->isa('Bio::EnsEMBL::Transcript') ) { throw("Needs a Transcript object, not a $transcript"); } - my $sth = $self->prepare("SELECT stable_id, version - FROM transcript_stable_id - WHERE transcript_id = ?"); - $sth->bind_param(1,$transcript->dbID,SQL_INTEGER); + my $sth = $self->prepare(qq( + SELECT stable_id, version + FROM transcript_stable_id + WHERE transcript_id = ? + )); + + $sth->bind_param(1, $transcript->dbID, SQL_INTEGER); $sth->execute(); my @array = $sth->fetchrow_array(); $transcript->{'_stable_id'} = $array[0]; $transcript->{'_version'} = $array[1]; - return 1; } - - =head2 fetch_all_b_DBEntry - Description: DEPRECATED this method has been renamed - fetch_all_by_external_name + Description: DEPRECATED. Use fetch_all_by_external_name() instead. =cut