Commit ed534385 authored by Andy Yates's avatar Andy Yates
Browse files

ENSCORESW-165. Committing the second attempt at the IntronSupportingEvidence...

ENSCORESW-165. Committing the second attempt at the IntronSupportingEvidence API for genebuild RNASeq data.
parent 3f6c618b
......@@ -342,7 +342,7 @@ sub get_available_adaptors {
SplicingEvent SplicingEventFeature SplicingTranscriptPair
Operon OperonTranscript
DataFile AssemblyInfo
IntronSupportingEvidence
) ),
# Those whose adaptors are in Map::DBSQL
map( { $_ => "Bio::EnsEMBL::Map::DBSQL::${_}Adaptor" } qw(
......
......@@ -1069,6 +1069,14 @@ sub store {
$attr_adaptor->store_on_Transcript( $transc_dbID,
$transcript->get_all_Attributes() );
# store the IntronSupportingEvidence features
my $ise_adaptor = $db->get_IntronSupportingEvidenceAdaptor();
my $intron_supporting_evidence = $transcript->get_all_IntronSupportingEvidence();
foreach my $ise (@{$intron_supporting_evidence}) {
$ise_adaptor->store($ise);
$ise_adaptor->store_transcript_linkage($ise, $transcript, $transc_dbID);
}
# Update the original transcript object - not the transfered copy that
# we might have created.
$original->dbID($transc_dbID);
......@@ -1251,6 +1259,16 @@ sub remove {
$sfsth->bind_param(1, $transcript->dbID, SQL_INTEGER);
$sfsth->execute();
$sfsth->finish();
# delete the associated IntronSupportingEvidence and if the ISE had no more
# linked transcripts remove it
my $ise_adaptor = $self->db->get_IntronSupportingEvidenceAdaptor();
foreach my $ise (@{$transcript->get_all_IntronSupportingEvidence()}) {
$ise_adaptor->remove_transcript_linkage($ise, $transcript);
if(! $ise->has_linked_transcripts()) {
$ise_adaptor->remove($ise);
}
}
# remove all xref linkages to this transcript
......
......@@ -22,30 +22,26 @@
=head1 SYNOPSIS
$ex = new Bio::EnsEMBL::Intron( exon1, exon2 );
$intron = Bio::EnsEMBL::Intron->new( exon1, exon2, $analysis );
=cut
package Bio::EnsEMBL::Intron;
use vars qw(@ISA);
use strict;
use warnings;
use Bio::EnsEMBL::Utils::Exception qw( warning throw );
use Bio::EnsEMBL::Feature;
use Bio::Seq; # introns have to have sequences...
use Bio::EnsEMBL::Utils::Exception qw( warning throw deprecate );
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
@ISA = qw(Bio::EnsEMBL::Feature);
use base qw(Bio::EnsEMBL::Feature);
=head2 new
Args : exon1, exon2. The two exons to build the Intron from.
Arg [1] : Bio::EnsEMBL::Exon The 5' exon for the intron; required
Arg [2] : Bio::EnsEMBL::Exon The 3' exon for the intron; required
Arg [3] : Bio::EnsEMBL::Analysis Analysis to link to this Intron
Example : $intron = new Bio::EnsEMBL::Intron($exon1, $exon2)
Description: create an Intron object from two exons.
Description: Create an Intron object from two exons and an optional analysis
Returntype : Bio::EnsEMBL::Intron
Exceptions : exons not on the same strand or slice.
Caller : general
......@@ -54,7 +50,7 @@ use Bio::EnsEMBL::Utils::Argument qw( rearrange );
=cut
sub new {
my ( $proto, $e1, $e2 ) = @_;
my ( $proto, $e1, $e2, $analysis ) = @_;
my $class = ref $proto || $proto;
......@@ -82,12 +78,15 @@ sub new {
{
throw("Exons on different slices. Not allowed");
} else {
warn( "Exons have different slice references "
. "to the same seq_region\n" );
warning("Exons have different slice references to the same seq_region");
}
} else {
$self->{'slice'} = $e1->slice();
}
if($analysis) {
$self->analysis($analysis);
}
$self->{'prev'} = $e1;
$self->{'next'} = $e2;
......@@ -154,21 +153,6 @@ sub next_Exon {
return $self->{'next'};
}
=head2 get_IntronSupportingEvidence
Example : my $evidence = $intron->get_IntronSupportingEvidence();
Description : Returns the evidence used to support this Intron
Returntype : Bio::EnsEMBL::IntronSupportingEvidence
Exceptions : None
=cut
sub get_IntronSupportingEvidence {
my ($self) = @_;
my $sea = $self->adaptor()->db()->get_IntronSupportingEvidenceAdaptor();
return $sea->fetch_by_Intron($self);
}
1;
......@@ -26,7 +26,16 @@ Bio::EnsEMBL::IntronSupportingEvidence
=head1 DESCRIPTION
Used to represent evidence used to delcare the Intron
Formalises an Intron with information about why it is a believed Intron. This
serves as a parallel object to Bio::EnsEMBL::Intron which you can use
to populate values in this field from. They are different objects though
due to Intron's non-existence as a DB data structure.
=head1 SYNOPSIS
#Example setups a ISE from the first two Exons
my ($five_prime_exon, $three_prime_exon) = @{$transcript->get_all_Exons()}[0..1];
my $intron = Bio::EnsEMBL::Intron->new($five_prime_exon, $three_prime_exon);
=head1 METHODS
......@@ -35,65 +44,285 @@ Used to represent evidence used to delcare the Intron
use strict;
use warnings;
use base qw/Bio::EnsEMBL::Storable/;
use base qw/Bio::EnsEMBL::Feature/;
use Bio::EnsEMBL::Intron;
use Bio::EnsEMBL::Utils::Argument qw/rearrange/;
use Bio::EnsEMBL::Utils::Exception qw/throw/;
use Bio::EnsEMBL::Utils::Scalar qw/assert_ref/;
our %SUPPORTED_TYPES = map { $_ => 1 } qw/NONE DEPTH/;
=head2 new
Arg [-ADAPTOR] : Bio::EnsEMBL::DBSQL::IntronSupportingEvidenceAdaptor
Arg [-DBID] : Integer $dbID
Arg [-INTRON] : Bio::EnsEMBL::Intron $intron
Arg [-ANALYSIS] : Bio::EnsEMBL::Analysis The analysis this intron is linked to
Arg [-START] : int - start postion of the IntronSupportingEvidence
Arg [-END] : int - end position of the IntronSupportingEvidence
Arg [-STRAND] : int - strand the IntronSupportingEvidence is on
Arg [-SLICE] : Bio::EnsEMBL::Slice - the slice the IntronSupportingEvidence is on
Arg [-INTRON] : Bio::EnsEMBL::Intron Intron the evidence is based
on. Useful if you are not specifying the location
parameters as we will take them from this
Arg [-HIT_NAME] : String The name of the hit
Arg [-SCORE] : Double The score associated with the supporting evidence
Arg [-SCORE_TYPE] : String The type of score we are representing
Example : Bio::EnsEMBL::IntronSupportingEvidence->new();
Description : Returns a new instance of this object
Returntype : Bio::EnsEMBL::IntronSupportEvidence
Exceptions : Thrown if data is not as expected
Example : Bio::EnsEMBL::IntronSupportingEvidence->new(
-ANALYSIS => $analysis, -INTRON => $intron,
-SCORE => 100, -SCORE_TYPE => 'DEPTH');
Description : Returns a new instance of this object
Returntype : Bio::EnsEMBL::IntronSupportEvidence
Exceptions : Thrown if data is not as requested
=cut
sub new {
my ($class, @args) = @_;
my $self = $class->SUPER::new(@args);
my ($intron, $hit_name, $score, $score_type) =
rearrange([qw/intron hit_name score score_type/], @args);
$self->intron($intron);
$self->hit_name($hit_name);
$self->score($score);
$self->score_type($score_type);
my ($intron, $hit_name, $score, $score_type, $is_splice_canonical) =
rearrange([qw/intron hit_name score score_type is_splice_canonical/], @args);
if($intron) {
$self->set_values_from_Intron($intron);
}
$self->hit_name($hit_name) if $hit_name;
$self->score($score) if $score;
$self->score_type($score_type) if $score_type;
$self->is_splice_canonical($is_splice_canonical) if $is_splice_canonical;
return $self;
}
sub intron {
=head2 set_values_from_Intron
Arg [1] : Bio::EnsEMBL::Intron The intron to base this object on
Example : $ise->set_values_from_Intron($intron);
Description : Sets the start, end, strand and slice of this ISE instance
using values from the given Intron object.
Returntype : None
Exceptions : Thrown if data is not as requested
=cut
sub set_values_from_Intron {
my ($self, $intron) = @_;
if(defined $intron) {
assert_ref($intron, 'Bio::EnsEMBL::Intron', 'intron');
$self->{'intron'} = $intron;
}
return $self->{'intron'};
assert_ref($intron, 'Bio::EnsEMBL::Intron', 'intron');
$self->start($intron->start());
$self->end($intron->end());
$self->strand($intron->strand());
$self->slice($intron->slice());
return;
}
=head2 is_splice_canonical
Arg [1] : Boolean
Example : $ise->is_splice_canonical(1);
Description : Getter/setter for is_splice_canonical. Splice canonical
indicates those Introns which have a splice junction which
is structured as expected
Returntype : Boolean
Exceptions :
=cut
sub is_splice_canonical {
my ($self, $is_splice_canonical) = @_;
$self->{'is_splice_canonical'} = $is_splice_canonical if defined $is_splice_canonical;
return $self->{'is_splice_canonical'};
}
=head2 get_Intron
Arg [1] : Bio::EnsEMBL::Transcript
Example : my $intron = $ise->intron($transcript);
Description : Provides access to an Intron object by using a given transcript
object and its associcated array of Exons.
Returntype : Bio::EnsEMBL::Intron
Exceptions : None
=cut
sub get_Intron {
my ($self, $transcript) = @_;
assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript');
my $five_prime = $self->find_previous_Exon($transcript);
my $three_prime = $self->find_next_Exon($transcript);
return Bio::EnsEMBL::Intron->new($five_prime, $three_prime);
}
=head2 hit_name
Arg [1] : String name of the hit
Example : $ise->hit_name('hit');
Description : Getter/setter for hit name i.e. an identifier for the alignments
Returntype : String
Exceptions : None
=cut
sub hit_name {
my ($self, $hit_name) = @_;
$self->{'hit_name'} = $hit_name if defined $hit_name;
return $self->{'hit_name'};
}
=head2 score
Arg [1] : Number; the score associated with this feature
Example : $ise->score(100);
Description : Getter/setter for score
Returntype : Number
Exceptions : None
=cut
sub score {
my ($self, $score) = @_;
$self->{'score'} = $score if defined $score;
return $self->{'score'};
}
=head2 score_type
Arg [1] : String the enum type. Currently only allowed NONE or DEPTH
Example : $ise->score_type('DEPTH');
Description : Gets and sets the type of score this instance represents
Returntype : String
Exceptions : Thrown if given an unsupported type of data
=cut
sub score_type {
my ($self, $score_type) = @_;
if(defined $score_type) {
if(! $SUPPORTED_TYPES{$score_type}) {
my $values = join(q{, }, keys %SUPPORTED_TYPES);
throw "The score_type '$score_type' is not allowed. Allowed values are [${values}]";
}
}
$self->{'score_type'} = $score_type if defined $score_type;
return $self->{'score_type'};
}
=head2 has_linked_transcripts
Example : $ise->has_linked_transcripts();
Description : Returns true if we have transcripts linked to this ISE
Returntype : Boolean
Exceptions : Thrown if we do not have an attached adaptor
=cut
sub has_linked_transcripts {
my ($self) = @_;
throw "No attached adaptor. Cannot find linked Transcripts unless this is a persisted object" unless $self->adaptor();
my $transcript_ids = $self->adaptor()->list_linked_transcript_ids($self);
return scalar(@{$transcript_ids}) ? 1 : 0;
}
=head2 equals
Arg [1] : Bio::EnsEMBL::IntronSupportEvidence Object to compare to
Example : $ise->equals($another_ise);
Description : Asserts if the given IntronSupportEvidence instance was equal to this
Returntype : Boolean
Exceptions : None
=cut
sub equals {
my ($self, $other) = @_;
my $equal = $self->SUPER::equals($other);
return 0 if ! $equal;
return (
($self->hit_name()||q{}) eq ($other->hit_name()||q{}) &&
($self->score_type() eq $other->score_type()) &&
($self->score() == $other->score())) ? 1 : 0;
}
=head2 find_previous_Exon
Arg [1] : Bio::EnsEMBL::Transcript Transcript to search for the Exons from
Example : $ise->find_previous_Exon($transcript);
Description : Loops through those Exons available from the Transcript and
attempts to find one which was the 5' flanking exon. If the
object has already been persisted we will use dbIDs to
find the Exons
Returntype : Bio::EnsEMBL::Exon
Exceptions : None
=cut
sub find_previous_Exon {
my ($self, $transcript) = @_;
#Use DB IDs if we have them
my $exon_id;
if($self->adaptor()) {
my @ids = $self->adaptor()->fetch_flanking_exon_ids($self, $transcript);
$exon_id = $ids[0] if @ids;
}
my $exons = $transcript->get_all_Exons();
my $start = $self->start();
my $end = $self->end();
foreach my $exon (@{$exons}) {
if($exon_id) {
return $exon if $exon->dbID() == $exon_id;
next;
}
if($self->strand() == 1) {
return $exon if $exon->end() == $start-1;
}
else {
return $exon if $exon->start() == $end+1;
}
}
return;
}
=head2 find_next_Exon
Arg [1] : Bio::EnsEMBL::Transcript Transcript to search for the Exons from
Example : $ise->find_next_Exon($transcript);
Description : Loops through those Exons available from the Transcript and
attempts to find one which was the 3' flanking exon. If the
object has already been persisted we will use dbIDs to
find the Exons
Returntype : Bio::EnsEMBL::Exon
Exceptions : None
=cut
sub find_next_Exon {
my ($self, $transcript) = @_;
#Use DB IDs if we have them
my $exon_id;
if($self->adaptor()) {
my @ids = $self->adaptor()->fetch_flanking_exon_ids($self, $transcript);
$exon_id = $ids[1] if @ids;
}
my $exons = $transcript->get_all_Exons();
my $start = $self->start();
my $end = $self->end();
foreach my $exon (@{$exons}) {
if($exon_id) {
return $exon if $exon->dbID() == $exon_id;
next;
}
if($self->strand() == 1) {
return $exon if $exon->start() == $end+1;
}
else {
return $exon if $exon->end() == $start-1;
}
}
return;
}
1;
......@@ -438,7 +438,7 @@ sub add_supporting_features {
}
if ((defined $self->slice() && defined $feature->slice())&&
( $self->slice()->name() ne $feature->slice()->name())){
( $self->slice()->name() ne $feature->slice()->name())){
throw("Supporting feat not in same coord system as exon\n" .
"exon is attached to [".$self->slice()->name()."]\n" .
"feat is attached to [".$feature->slice()->name()."]");
......@@ -447,8 +447,8 @@ sub add_supporting_features {
foreach my $added_feature ( @{ $self->{_supporting_evidence} } ){
# compare objects
if ( $feature == $added_feature ){
#this feature has already been added
next FEATURE;
#this feature has already been added
next FEATURE;
}
}
......@@ -1060,7 +1060,7 @@ sub coding_region_start {
if( defined $value ) {
$self->{'coding_region_start'} = $value;
} elsif(!defined $self->{'coding_region_start'} &&
defined $self->translation) {
defined $self->translation) {
#calculate the coding start from the translation
my $start;
my $strand = $self->translation()->start_Exon->strand();
......@@ -1108,7 +1108,7 @@ sub coding_region_end {
if( defined $value ) {
$self->{'coding_region_end'} = $value;
} elsif( ! defined $self->{'coding_region_end'}
&& defined $self->translation() ) {
&& defined $self->translation() ) {
$strand = $self->translation()->start_Exon->strand();
if( $strand == 1 ) {
$end = $self->translation()->end_Exon->start();
......@@ -1470,6 +1470,55 @@ sub get_all_constitutive_Exons {
return $self->get_all_Exons( '-constitutive' => 1 );
}
=head2 get_all_IntronSupportingEvidence
Example : $ise->get_all_IntronSupportingEvidence();
Description : Fetches all ISE instances linked to this Transript
Returntype : ArrayRef[Bio::EnsEMBL::IntronSupportEvidence] retrieved from
the DB or from those added via C<add_IntronSupportingEvidence>
Exceptions : None
=cut
sub get_all_IntronSupportingEvidence {
my ($self) = @_;
if(! defined $self->{_ise_array} && defined $self->adaptor()) {
my $isea = $self->adaptor()->db()->get_IntronSupportingEvidenceAdaptor();
$self->{_ise_array} = $isea->fetch_all_by_Transcript($self);
}
return $self->{_ise_array};
}
=head2 add_IntronSupportingEvidence
Arg [1] : Bio::EnsEMBL::IntronSupportEvidence Object to add
Example : $ise->add_IntronSupportingEvidence($ise);
Description : Adds the IntronSupportEvidence instance to this Transcript. The
code checks to see if it is a unique ISE instance
Returntype : Boolean; true means it was added. False means it was not
as this ISE was already attached
Exceptions : None
=cut
sub add_IntronSupportingEvidence {
my ($self, $ise) = @_;
assert_ref($ise, 'Bio::EnsEMBL::IntronSupportingEvidence', 'IntronSupportingEvidence');
my $unique = 1;
foreach my $other_ise (@{$self->{_ise_array}}) {
if($ise->equals($other_ise)) {
$unique = 0;
last;
}
}
if($unique) {
push(@{$self->{_ise_array}}, $ise);
return 1;
}
return 0;
}
=head2 get_all_Introns
Arg [1] : none
......@@ -1560,6 +1609,22 @@ sub flush_Exons {
$self->{'_trans_exon_array'} = [];
}
=head2 flush_IntronSupportingEvidence
Example : $transcript->flush_IntronSupportingEvidence();
Description: Removes all IntronSupportingEvidence from this transcript
Returntype : none
Exceptions : none
Caller : general
Status : Stable
=cut
sub flush_IntronSupportingEvidence {
my ($self) = @_;
$self->{_ise_array} = [];
return;
}
=head2 five_prime_utr
......@@ -1679,7 +1744,7 @@ sub get_all_translateable_Exons {
if ($ex == $start_exon ) {
if ($t_start < 1 or $t_start > $length) {
warning("WARN: Translation start '$t_start' is outside exon $ex length=$length");
return [];
return [];
}
$adjust_start = $t_start - 1;
}
......@@ -2204,8 +2269,8 @@ sub transform {
# ordering. This assumes 5->3 order. No complaints on transsplicing.
my ( $last_new_start, $last_old_strand,
$last_new_strand, $start_exon, $end_exon,
$last_seq_region_name );
$last_new_strand, $start_exon, $end_exon,
$last_seq_region_name );
my $first = 1;
my $ignore_order = 0;
my $order_broken = 0;
......@@ -2214,48 +2279,48 @@ sub transform {
my $new_exon = $old_exon->transform( @_ );
return undef if( !defined $new_exon );
if( ! defined $new_transcript ) {
if( !$first ) {
if( $old_exon->strand() != $last_old_strand ) {
# transsplicing, ignore ordering
$ignore_order = 1;
}
if( $new_exon->slice()->seq_region_name() ne
$last_seq_region_name ) {
return undef;
}
if( $last_new_strand == 1 and
$new_exon->start() < $last_new_start ) {