Exon.pm 44.6 KB
Newer Older
1 2
=head1 LICENSE

3
  Copyright (c) 1999-2013 The European Bioinformatics Institute and
4
  Genome Research Limited.  All rights reserved.
5

6 7
  This software is distributed under a modified Apache license.
  For license details, please see
Ewan Birney's avatar
Ewan Birney committed
8

9 10 11
    http://www.ensembl.org/info/about/code_licence.html

=head1 CONTACT
Ewan Birney's avatar
Ewan Birney committed
12

13
  Please email comments or questions to the public Ensembl
14
  developers list at <dev@ensembl.org>.
15 16 17 18 19

  Questions may also be sent to the Ensembl help desk at
  <helpdesk@ensembl.org>.

=cut
20

21 22 23
=head1 NAME

Bio::EnsEMBL::Exon - A class representing an Exon
Ewan Birney's avatar
Ewan Birney committed
24 25 26

=head1 SYNOPSIS

27
    $exon = new Bio::EnsEMBL::Exon(
28 29 30 31 32 33 34 35 36
      -START     => 100,
      -END       => 200,
      -STRAND    => 1,
      -SLICE     => $slice,
      -DBID      => $dbID,
      -ANALYSIS  => $analysis,
      -STABLE_ID => 'ENSE000000123',
      -VERSION   => 2
    );
37

38 39
  # seq() returns a Bio::Seq
  my $seq = $exon->seq->seq();
40

41 42
  # Peptide only makes sense within transcript context
  my $pep = $exon->peptide($transcript)->seq();
43

44 45 46 47
  # Normal feature operations can be performed:
  $exon = $exon->transform('clone');
  $exon->move( $new_start, $new_end, $new_strand );
  print $exon->slice->seq_region_name();
Ewan Birney's avatar
Ewan Birney committed
48 49 50

=head1 DESCRIPTION

51 52
This is a class which represents an exon which is part of a transcript.
See Bio::EnsEMBL:Transcript
Ewan Birney's avatar
Ewan Birney committed
53

Graham McVicker's avatar
Graham McVicker committed
54
=head1 METHODS
Ewan Birney's avatar
Ewan Birney committed
55 56 57

=cut

58 59
package Bio::EnsEMBL::Exon;

Ewan Birney's avatar
Ewan Birney committed
60 61
use strict;

62
use Bio::EnsEMBL::Feature;
Ewan Birney's avatar
Ewan Birney committed
63
use Bio::Seq; # exons have to have sequences...
Ewan Birney's avatar
Ewan Birney committed
64

65 66
use Bio::EnsEMBL::Utils::Exception qw( warning throw deprecate );
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
67
use Bio::EnsEMBL::Utils::Scalar qw( assert_ref );
Bronwen Aken's avatar
Bronwen Aken committed
68
use Bio::EnsEMBL::DBSQL::SupportingFeatureAdaptor;
Ewan Birney's avatar
Ewan Birney committed
69

70
use vars qw(@ISA);
71
@ISA = qw(Bio::EnsEMBL::Feature);
Graham McVicker's avatar
Graham McVicker committed
72 73


74 75
=head2 new

76 77 78 79 80 81 82 83 84 85
  Arg [-SLICE]: Bio::EnsEMBL::SLice - Represents the sequence that this
                feature is on. The coordinates of the created feature are
                relative to the start of the slice.
  Arg [-START]: The start coordinate of this feature relative to the start
                of the slice it is sitting on.  Coordinates start at 1 and
                are inclusive.
  Arg [-END]  : The end coordinate of this feature relative to the start of
                the slice it is sitting on.  Coordinates start at 1 and are
                inclusive.
  Arg [-STRAND]: The orientation of this feature.  Valid values are 1,-1,0.
86 87
  Arg [-SEQNAME] : (optional) A seqname to be used instead of the default name
                of the of the slice.  Useful for features that do not have an
88 89 90
                attached slice such as protein features.
  Arg [-dbID]   : (optional) internal database id
  Arg [-ADAPTOR]: (optional) Bio::EnsEMBL::DBSQL::BaseAdaptor
91 92 93 94 95 96
  Arg [-PHASE]    : the phase. 
  Arg [-END_PHASE]: the end phase
  Arg [-STABLE_ID]: (optional) the stable id of the exon
  Arg [-VERSION]  : (optional) the version
  Arg [-CREATED_DATE] : (optional) the created date
  Arg [-MODIFIED_DATE]: (optional) the last midifeid date
97

98 99
  Example    : none
  Description: create an Exon object
Graham McVicker's avatar
Graham McVicker committed
100
  Returntype : Bio::EnsEMBL::Exon
101
  Exceptions : if phase is not valid (i.e. 0,1, 2 -1)
102
  Caller     : general
103
  Status     : Stable
104 105

=cut
Ewan Birney's avatar
Ewan Birney committed
106

107
sub new {
108
  my $class = shift;
Ewan Birney's avatar
Ewan Birney committed
109

110 111 112 113
  $class = ref $class || $class;

  my $self = $class->SUPER::new( @_ );

114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
  my ( $phase, $end_phase, $stable_id, $version, $created_date,
    $modified_date, $is_current, $is_constitutive )
    = rearrange( [
      "PHASE",        "END_PHASE",
      "STABLE_ID",    "VERSION",
      "CREATED_DATE", "MODIFIED_DATE",
      "IS_CURRENT",   "IS_CONSTITUTIVE"
    ],
    @_
    );

  if ( defined($phase) ) {    # make sure phase is valid.
    $self->phase($phase);
  }

  $self->{'end_phase'}     = $end_phase;
  $self->{'stable_id'}     = $stable_id;
  $self->{'version'}       = $version;
  $self->{'created_date'}  = $created_date;
133
  $self->{'modified_date'} = $modified_date;
134

135 136
  # Default is_current
  if ( !defined($is_current) ) { $is_current = 1 }
137
  $self->{'is_current'} = $is_current;
138

139 140 141 142
  # Default is_constitutive
  if ( !defined($is_constitutive) ) { $is_constitutive = 0 }
  $self->{'is_constitutive'} = $is_constitutive;

143
  return $self;
Ewan Birney's avatar
Ewan Birney committed
144 145
}

Graham McVicker's avatar
Graham McVicker committed
146

147 148
=head2 end_phase

Graham McVicker's avatar
Graham McVicker committed
149 150 151 152 153 154 155 156 157 158 159 160
  Arg [1]    : (optional) int $end_phase
  Example    : $end_phase = $feat->end_phase;
  Description: Gets/Sets the end phase of the exon.
               end_phase = number of bases from the last incomplete codon of 
               this exon.
               Usually, end_phase = (phase + exon_length)%3
               but end_phase could be -1 if the exon is half-coding and its 3 
               prime end is UTR.
  Returntype : int
  Exceptions : warning if end_phase is called without an argument and the
               value is not set.
  Caller     : general
161
  Status     : Stable
162 163 164 165

=cut

sub end_phase {
166
  my $self = shift;
167
  if (@_) {
168
    $self->{'end_phase'} = shift;
169 170 171 172
  }
  else {
    if ( !defined( $self->{'end_phase'} ) ) {
      warning("No end phase set in Exon. You must set it explicitly.");
173
    }
174
  }
175
  return $self->{'end_phase'};
176
}
177

178

179 180
=head2 phase

181 182 183 184 185 186 187 188 189
  Arg [1]    : (optional) int $phase
  Example    :  my $phase = $exon->phase;
                $exon->phase(2);
  Description: Gets/Sets the phase of the exon.
  Returntype : int
  Exceptions : throws if phase is not (0, 1 2 or -1).
  Caller     : general
  Status     : Stable

190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208

Get or set the phase of the Exon, which tells the
translation machinery, which makes a peptide from
the DNA, where to start.

The Ensembl phase convention can be thought of as
"the number of bases of the first codon which are
on the previous exon".  It is therefore 0, 1 or 2
(or -1 if the exon is non-coding).  In ascii art,
with alternate codons represented by B<###> and
B<+++>:

       Previous Exon   Intron   This Exon
    ...-------------            -------------...

    5'                    Phase                3'
    ...#+++###+++###          0 +++###+++###+...
    ...+++###+++###+          1 ++###+++###++...
    ...++###+++###++          2 +###+++###+++...
209

210 211 212 213 214 215 216 217
Here is another explanation from Ewan:

Phase means the place where the intron lands
inside the codon - 0 between  codons, 1 between
the 1st and second base, 2 between the second and
3rd  base. Exons therefore have a start phase and
a end phase, but introns have just one phase.

218 219 220 221 222 223
=cut

sub phase {
  my ($self,$value) = @_;
  
  if (defined($value)) {
James Gilbert's avatar
 
James Gilbert committed
224 225
    # Value must be 0,1,2, or -1 for non-coding
    if ($value =~ /^(-1|0|1|2)$/) {
Laura Clarke's avatar
 
Laura Clarke committed
226
      #print STDERR "Setting phase to $value\n";
227
      $self->{'phase'} = $value;
James Gilbert's avatar
 
James Gilbert committed
228
    } else {
229 230
      throw("Bad value ($value) for exon phase. Should only be" .
            " -1,0,1,2\n");
231 232 233 234 235
    }
  }
  return $self->{'phase'};
}

Graham McVicker's avatar
Graham McVicker committed
236

Michele Clamp's avatar
Michele Clamp committed
237 238
=head2 frame

Graham McVicker's avatar
Graham McVicker committed
239 240 241 242 243 244 245
  Arg [1]    : none
  Example    : $frame = $exon->frame
  Description: Gets the frame of this exon
  Returntype : int
  Exceptions : thrown if an arg is passed
               thrown if frame cannot be calculated due to a bad phase value
  Caller     : general
246
  Status     : Stable
Michele Clamp's avatar
Michele Clamp committed
247 248 249 250 251

=cut

sub frame {
  my ($self,$value) = @_;
252 253

  if( defined $value ) {
254
    throw("Cannot set frame. Deduced from seq_start and phase");
255 256 257 258 259 260 261 262
  }

  # frame is mod 3 of the translation point

  if( $self->phase == -1 ) {
    return '.'; # gff convention for no frame info
  }
  if( $self->phase == 0 ) {
Val Curwen's avatar
 
Val Curwen committed
263
    return $self->start%3;
Michele Clamp's avatar
Michele Clamp committed
264
  }
265 266

  if( $self->phase == 1 ) {
Val Curwen's avatar
 
Val Curwen committed
267
    return ($self->start+2)%3;
268 269 270
  }

  if( $self->phase == 2 ) {
Val Curwen's avatar
 
Val Curwen committed
271
    return ($self->start+1)%3;
272 273
  }

274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
  throw("bad phase in exon ".$self->phase);

}


=head2 start

  Arg [1]    : int $start (optional)
  Example    : $start = $exon->start();
  Description: Getter/Setter for the start of this exon.  The superclass
               implmentation is overridden to flush the internal sequence
               cache if this value is altered
  Returntype : int
  Exceptions : none
  Caller     : general
289
  Status     : Stable
290 291 292 293 294

=cut

sub start {
  my $self = shift;
295
  # if an arg was provided, flush the internal sequence cache
296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
  delete $self->{'_seq_cache'} if(@_);
  return $self->SUPER::start(@_);
}


=head2 end

  Arg [1]    : int $end (optional)
  Example    : $end = $exon->end();
  Description: Getter/Setter for the end of this exon.  The superclass
               implmentation is overridden to flush the internal sequence
               cache if this value is altered
  Returntype : int
  Exceptions : none
  Caller     : general
311
  Status     : Stable
312 313 314 315 316

=cut

sub end {
  my $self = shift;
317
  # if an arg was provided, flush the internal sequence cache
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332
  delete $self->{'_seq_cache'} if(@_);
  return $self->SUPER::end(@_);
}


=head2 strand

  Arg [1]    : int $strand (optional)
  Example    : $start = $exon->strand();
  Description: Getter/Setter for the strand of this exon.  The superclass
               implmentation is overridden to flush the internal sequence
               cache if this value is altered
  Returntype : int
  Exceptions : none
  Caller     : general
333
  Status     : Stable
334 335 336 337 338

=cut

sub strand {
  my $self = shift;
339
  # if an arg was provided, flush the internal sequence cache
340 341 342 343
  delete $self->{'_seq_cache'} if(@_);
  return $self->SUPER::strand(@_);
}

344 345
=head2 cdna_start

346 347 348
    Arg [1]     : Bio::EnsEMBL::Transcript $transcript
                  The transcript for which cDNA coordinates should be
                  relative to.
349 350 351
    Example     : $cdna_start = $exon->cdna_start($transcript);
    Description : Returns the start position of the exon in cDNA
                  coordinates.
352 353 354
                  Since an exon may be part of one or more transcripts,
                  the relevant transcript must be given as argument to
                  this method.
355 356 357 358 359
    Return type : Integer
    Exceptions  : Throws if the given argument is not a transcript.
                  Throws if the first part of the exon maps into a gap.
                  Throws if the exon can not be mapped at all.
    Caller      : General
360
    Status      : Stable
361 362 363 364

=cut

sub cdna_start {
365 366
  my ($self, $transcript) = @_;
  assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript');
367

368 369 370 371
  my $id = $transcript->dbID();
  
  if(defined $id && exists $self->{cdna_start}->{$id}) {
    return $self->{cdna_start}->{$id};
372
  }
373 374 375 376 377 378 379
  
  my $cdna_start;
  my @coords = $transcript->genomic2cdna($self->start(), $self->end(), $self->strand());
  if(@coords && !$coords[0]->isa('Bio::EnsEMBL::Mapper::Gap')) {
    $cdna_start = $coords[0]->start();
  }
  elsif(@coords) {
380
    throw "First part of exon maps into gap";
381 382 383 384 385 386 387
  }
  else {
    throw "Can not map exon";
  }
  
  if(defined $id) {
    $self->{cdna_start}->{$id} = $cdna_start;
388
  }
389

390
  return $cdna_start;
391 392 393 394
} ## end sub cdna_start

=head2 cdna_end

395 396 397
    Arg [1]     : Bio::EnsEMBL::Transcript $transcript
                  The transcript for which cDNA coordinates should be
                  relative to.
398 399 400
    Example     : $cdna_end = $exon->cdna_end($transcript);
    Description : Returns the end position of the exon in cDNA
                  coordinates.
401 402 403
                  Since an exon may be part of one or more transcripts,
                  the relevant transcript must be given as argument to
                  this method.
404 405 406 407 408
    Return type : Integer
    Exceptions  : Throws if the given argument is not a transcript.
                  Throws if the last part of the exon maps into a gap.
                  Throws if the exon can not be mapped at all.
    Caller      : General
409
    Status      : Stable
410 411 412 413

=cut

sub cdna_end {
414 415
  my ($self, $transcript) = @_;
  assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript');
416

417 418 419 420
  my $id = $transcript->dbID();
  
  if(defined $id && exists $self->{cdna_end}->{$id}) {
    return $self->{cdna_end}->{$id};
421
  }
422 423 424 425 426 427 428 429 430 431 432 433 434 435 436
  
  my $cdna_end;
  my @coords = $transcript->genomic2cdna($self->start(), $self->end(), $self->strand());
  if(@coords && !$coords[-1]->isa('Bio::EnsEMBL::Mapper::Gap')) {
    $cdna_end = $coords[-1]->end();
  }
  elsif(@coords) {
    throw "Last part of exon maps into gap";
  }
  else {
    throw "Can not map exon";
  }
  
  if(defined $id) {
    $self->{cdna_end}->{$id} = $cdna_end;
437
  }
438

439
  return $cdna_end;
440 441 442 443
} ## end sub cdna_end

=head2 cdna_coding_start

444 445 446
    Arg [1]     : Bio::EnsEMBL::Transcript $transcript
                  The transcript for which cDNA coordinates should be
                  relative to.
447 448 449 450
    Example     : $cdna_coding_start = $exon->cdna_coding_start($transcript);
    Description : Returns the start position of the coding region of the
                  exon in cDNA coordinates.  Returns undef if the whole
                  exon is non-coding.
451 452 453
                  Since an exon may be part of one or more transcripts,
                  the relevant transcript must be given as argument to
                  this method.
454 455 456
    Return type : Integer or undef
    Exceptions  : Throws if the given argument is not a transcript.
    Caller      : General
457
    Status      : Stable
458 459 460 461

=cut

sub cdna_coding_start {
462 463
  my ($self, $transcript) = @_;
  assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript');
464

465 466 467 468
  my $id = $transcript->dbID();
  
  if(defined $id && exists $self->{cdna_coding_start}->{$id}) {
    return $self->{cdna_coding_start}->{$id};
469
  }
470 471 472 473 474 475 476 477
  
  my $cdna_coding_start;
  my $transcript_coding_start = $transcript->cdna_coding_start();
  if(defined $transcript_coding_start) {
    my $cdna_start = $self->cdna_start($transcript);
    
    if ( $transcript_coding_start < $cdna_start ) {
      # Coding region starts upstream of this exon...
478

479 480 481 482 483 484 485 486
      if ( $transcript->cdna_coding_end() < $cdna_start ) {
        # ... and also ends upstream of this exon.
        $cdna_coding_start = undef;
      }
      else {
        # ... and does not end upstream of this exon.
        $cdna_coding_start = $cdna_start;
      }
487
    } else {
488 489
      # Coding region starts either within or downstream of this
      # exon.
490

491 492 493
      if ( $transcript_coding_start <= $self->cdna_end($transcript) ) {
        # Coding region starts within this exon.
        $cdna_coding_start = $transcript_coding_start;
494
      }
495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510
      else {
        # Coding region starts downstream of this exon.
        $cdna_coding_start = undef;
      }
    }
  }
  else {
    $cdna_coding_start = undef;
  }
  
  if(defined $id) {
    $self->{cdna_coding_start}->{$id} = $cdna_coding_start;
    $self->{cdna_coding_end}->{$id} = undef if ! defined $cdna_coding_start;
  }
  
  return $cdna_coding_start;
511 512 513 514
} ## end sub cdna_coding_start

=head2 cdna_coding_end

515 516 517
    Arg [1]     : Bio::EnsEMBL::Transcript $transcript
                  The transcript for which cDNA coordinates should be
                  relative to.
518 519 520 521
    Example     : $cdna_coding_end = $exon->cdna_coding_end($transcript);
    Description : Returns the end position of the coding region of the
                  exon in cDNA coordinates.  Returns undef if the whole
                  exon is non-coding.
522 523 524
                  Since an exon may be part of one or more transcripts,
                  the relevant transcript must be given as argument to
                  this method.
525 526 527
    Return type : Integer or undef
    Exceptions  : Throws if the given argument is not a transcript.
    Caller      : General
528
    Status      : Stable
529 530 531 532

=cut

sub cdna_coding_end {
533 534
  my ($self, $transcript) = @_;
  assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript');
535

536 537 538 539
  my $id = $transcript->dbID();
  
  if(defined $id && exists $self->{cdna_coding_end}->{$id}) {
    return $self->{cdna_coding_end}->{$id};
540
  }
541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556
  
  my $cdna_coding_end;
  my $transcript_coding_end = $transcript->cdna_coding_end();
  if(defined $transcript_coding_end) {
    my $cdna_end = $self->cdna_end($transcript);
    
    if ( $transcript_coding_end > $cdna_end ) {
      
      # Coding region ends downstream of this exon...
      if ( $transcript->cdna_coding_start() > $cdna_end ) {
        # ... and also starts downstream of this exon.
        $cdna_coding_end = undef;
      } 
      else {
        # ... and does not start downstream of this exon.
        $cdna_coding_end = $cdna_end;
557
      }
558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582
    }  
    else {
      # Coding region ends either within or upstream of this
      # exon.

      if ( $transcript_coding_end >= $self->cdna_start($transcript) ) {
        # Coding region ends within this exon.
        $cdna_coding_end = $transcript_coding_end;
      } 
      else {
        # Coding region ends upstream of this exon.
        $cdna_coding_end = undef;
      }
    }
  }
  else {
    $cdna_coding_end = undef;
  }
  
  if(defined $id) {
    $self->{cdna_coding_end}->{$id} = $cdna_coding_end;
    $self->{cdna_coding_start}->{$id} = undef if ! defined $cdna_coding_end;
  }
  
  return $cdna_coding_end;
583 584 585 586
} ## end sub cdna_coding_end

=head2 coding_region_start

587
    Arg [1]     : Bio::EnsEMBL::Transcript $transcript
588 589
    Example     : $coding_region_start =
                    $exon->coding_region_start($transcript);
590 591 592 593
    Description : Returns the start position of the coding region
                  of the exon in slice-relative coordinates on the
                  forward strand.  Returns undef if the whole exon is
                  non-coding.
594 595 596
                  Since an exon may be part of one or more transcripts,
                  the relevant transcript must be given as argument to
                  this method.
597
    Return type : Integer or undef
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
598
    Exceptions  : Throws if the given argument is not a transcript.
599
    Caller      : General
600
    Status      : Stable
601 602 603

=cut

604
# The implementation of this method is analogous to the implementation
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
605
# of cdna_coding_start().
606 607

sub coding_region_start {
608 609
  my ($self, $transcript) = @_;
  assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript');
610

611 612 613 614
  my $id = $transcript->dbID();
  
  if(defined $id && exists $self->{coding_region_start}->{$id}) {
    return $self->{coding_region_start}->{$id};
615
  }
616 617
  
  my $coding_region_start;
618
  my $transcript_coding_start = $transcript->coding_region_start();
619 620 621 622 623 624 625 626 627 628 629 630 631
  if(defined $transcript_coding_start) {
    my $start = $self->start();
    
    if ( $transcript_coding_start < $start ) {
      # Coding region starts upstream of this exon...

      if ( $transcript->coding_region_end() < $start ) {
        # ... and also ends upstream of this exon.
        $coding_region_start = undef;
      } 
      else {
        # ... and does not end upstream of this exon.
        $coding_region_start = $start;
632
      }
633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657
    } 
    else {
      # Coding region starts either within or downstream of this
      # exon.

      if ( $transcript_coding_start <= $self->end() ) {
        # Coding region starts within this exon.
        $coding_region_start = $transcript_coding_start;
      } 
      else {
        # Coding region starts downstream of this exon.
        $coding_region_start = undef;
      }
    }
  }
  else {
    $coding_region_start = undef;
  }
  
  if(defined $id) {
    $self->{coding_region_start}->{$id} = $coding_region_start;
    $self->{coding_region_end}->{$id} = undef if ! defined $coding_region_start;
  }
  
  return $coding_region_start;
658 659 660 661
} ## end sub coding_region_start

=head2 coding_region_end

662
    Arg [1]     : Bio::EnsEMBL::Transcript $transcript
663 664
    Example     : $coding_region_end =
                    $exon->coding_region_end($transcript);
665 666 667 668
    Description : Returns the end position of the coding region of
                  the exon in slice-relative coordinates on the
                  forward strand.  Returns undef if the whole exon is
                  non-coding.
669 670 671
                  Since an exon may be part of one or more transcripts,
                  the relevant transcript must be given as argument to
                  this method.
672
    Return type : Integer or undef
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
673
    Exceptions  : Throws if the given argument is not a transcript.
674
    Caller      : General
675
    Status      : Stable
676 677 678

=cut

679
# The implementation of this method is analogous to the implementation
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
680
# of cdna_coding_end().
681 682

sub coding_region_end {
683 684
  my ($self, $transcript) = @_;
  assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript');
685

686 687 688 689
  my $id = $transcript->dbID();
  
  if(defined $id && exists $self->{coding_region_end}->{$id}) {
    return $self->{coding_region_end}->{$id};
690
  }
691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706
  
  my $coding_region_end;
  my $transcript_coding_end = $transcript->coding_region_end();
  if(defined $transcript_coding_end) {
    
    my $end = $self->end();
    if($transcript_coding_end > $end) {
      # Coding region ends downstream of this exon...

      if ( $transcript->coding_region_start() > $end ) {
        # ... and also starts downstream of this exon.
        $coding_region_end = undef;
      } 
      else {
        # ... and does not start downstream of this exon.
        $coding_region_end = $end;
707
      }
708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730
    }
    else {
      # Coding region ends either within or upstream of this
      # exon.
      if ( $transcript_coding_end >= $self->start() ) {
        $coding_region_end = $transcript_coding_end;
      }
      else {
        $coding_region_end = undef;
      }
    }
  }
  else {
    # This is a non-coding transcript.
    $coding_region_end = undef;
  }
  
  if(defined $id) {
    $self->{coding_region_end}->{$id} = $coding_region_end;
    $self->{coding_region_start}->{$id} = undef if ! defined $coding_region_end;
  }
  
  return $coding_region_end;
731
} ## end sub coding_region_end
732 733 734 735 736 737 738 739 740 741 742

=head2 slice

  Arg [1]    : Bio::EnsEMBL::Slice
  Example    : $slice = $exon->slice();
  Description: Getter/Setter for the slice this exon is on.  The superclass
               implmentation is overridden to flush the internal sequence
               cache if this value is altered
  Returntype : Bio::EnsEMBL::Slice
  Exceptions : none
  Caller     : general
743
  Status     : Stable
744 745 746

=cut

747
sub slice {
748 749 750 751
  my ( $self, $slice ) = @_;

  if ( defined($slice) ) {
    # If a new slice was provided, flush the internal sequence cache and
752
    # transfer all supporting evidence to the new slice.
753 754 755 756 757 758 759

    delete $self->{'_seq_cache'};

    if ( exists( $self->{'_supporting_evidence'} ) ) {
      my @new_features;

      for my $old_feature ( @{ $self->{'_supporting_evidence'} } ) {
760 761 762 763 764 765 766 767 768 769 770

        my $new_feature;

        if ( defined( $old_feature->slice() ) ) {
          $new_feature = $old_feature->transfer($slice);
        } else {
          # If the old feature does not have a slice, assume transfer is
          # not necessary.
          $new_feature = $old_feature;
        }

771 772 773 774 775 776 777 778 779 780 781 782 783
        push( @new_features, $new_feature );
      }

      $self->{'_supporting_evidence'} = \@new_features;
    }

    return $self->SUPER::slice($slice);
  } elsif ( @_ > 1 ) {
    return $self->SUPER::slice(undef);
  } else {
    return $self->SUPER::slice();
  }
} ## end sub slice
784

785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806
=head2 equals

  Arg [1]       : Bio::EnsEMBL::Exon exon
  Example       : if ($exonA->equals($exonB)) { ... }
  Description   : Compares two exons for equality.
                  The test for eqality goes through the following list
                  and terminates at the first true match:

                  1. If Bio::EnsEMBL::Feature::equals() returns false,
                     then the exons are *not* equal.
                  2. If both exons have stable IDs: if these are the
                     same, the exons are equal, otherwise not.
                  3. If the exons have the same start, end, strand, and
                     phase, then they are equal, otherwise not.

  Return type   : Boolean (0, 1)

  Exceptions    : Thrown if a non-transcript is passed as the argument.

=cut

sub equals {
807
  my ( $self, $exon ) = @_;
808

809
  if ( !defined($exon) ) { return 0 }
810 811 812
  if ( $self eq $exon ) { return 1 }

  assert_ref( $exon, 'Bio::EnsEMBL::Exon' );
813 814

  my $feature_equals = $self->SUPER::equals($exon);
815
  if ( defined($feature_equals) && $feature_equals == 0 ) {
816 817 818 819 820 821 822
    return 0;
  }

  if ( defined( $self->stable_id() ) && defined( $exon->stable_id() ) )
  {
    if ( $self->stable_id() eq $exon->stable_id() ) {
      return 1;
823 824
    }
    else {
825 826 827 828
      return 0;
    }
  }

829 830 831
  if ( $self->start() == $exon->start() &&
       $self->end() == $exon->end() &&
       $self->strand() == $exon->strand() &&
832 833
       $self->phase() == $exon->phase() &&
       $self->end_phase() == $exon->end_phase() )
834 835 836 837 838
  {
    return 1;
  }

  return 0;
839
} ## end sub equals
840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855

=head2 move

  Arg [1]    : int start
  Arg [2]    : int end
  Arg [3]    : (optional) int strand
  Example    : None
  Description: Sets the start, end and strand in one call rather than in 
               3 seperate calls to the start(), end() and strand() methods.
               This is for convenience and for speed when this needs to be
               done within a tight loop.  This overrides the superclass
               move() method so that the internal sequence cache can be
               flushed if the exon if moved.
  Returntype : none
  Exceptions : Thrown is invalid arguments are provided
  Caller     : general
856
  Status     : Stable
857 858 859 860 861

=cut

sub move {
  my $self = shift;
862
  # flush the internal sequence cache
863 864 865 866 867 868 869 870 871 872 873 874 875 876
  delete $self->{'_seq_cache'};
  return $self->SUPER::move(@_);
}


=head2 transform

  Arg  1     : String $coordinate_system_name
  Arg [2]    : String $coordinate_system_version
  Description: moves this exon to the given coordinate system. If this exon has
               attached supporting evidence, they move as well.
  Returntype : Bio::EnsEMBL::Exon
  Exceptions : wrong parameters
  Caller     : general
877
  Status     : Stable
878 879 880 881 882 883 884

=cut

sub transform {
  my $self = shift;

  # catch for old style transform calls
Ian Longden's avatar
Ian Longden committed
885
  if( !@_  || ( ref $_[0] && 
886 887
         ($_[0]->isa( "Bio::EnsEMBL::Slice" ) or $_[0]->isa( "Bio::EnsEMBL::LRGSlice" ))
        )) {
888 889 890 891 892
    deprecate('Calling transform without a coord system name is deprecated.');
    return $self->_deprecated_transform(@_);
  }

  my $new_exon = $self->SUPER::transform( @_ );
893 894 895 896
  if (not defined $new_exon or
      $new_exon->length != $self->length) {
    return undef;
  }
897 898 899 900 901

  if( exists $self->{'_supporting_evidence'} ) {
    my @new_features;
    for my $old_feature ( @{$self->{'_supporting_evidence'}} ) {
      my $new_feature = $old_feature->transform( @_ );
902 903 904
      if (defined $new_feature) {
        push( @new_features, $new_feature );
      }
905 906 907 908 909 910
    }
    $new_exon->{'_supporting_evidence'} = \@new_features;
  }

  #dont want to share the same sequence cache
  delete $new_exon->{'_seq_cache'};
911

912
  return $new_exon;
Michele Clamp's avatar
Michele Clamp committed
913 914
}

Graham McVicker's avatar
Graham McVicker committed
915

916
=head2 transfer
917

918 919 920 921 922
  Arg [1]    : Bio::EnsEMBL::Slice $destination_slice
  Example    : none
  Description: Moves this Exon to given target slice coordinates. If Features
               are attached they are moved as well. Returns a new exon.
  Returntype : Bio::EnsEMBL::Gene
Graham McVicker's avatar
Graham McVicker committed
923
  Exceptions : none
924
  Caller     : general
925
  Status     : Stable
926 927 928

=cut

929 930
sub transfer {
  my $self  = shift;
931

932 933 934 935 936 937 938 939 940 941 942
  my $new_exon = $self->SUPER::transfer( @_ );
  return undef unless $new_exon;

  if( exists $self->{'_supporting_evidence'} ) {
    my @new_features;
    for my $old_feature ( @{$self->{'_supporting_evidence'}} ) {
      my $new_feature = $old_feature->transfer( @_ );
      push( @new_features, $new_feature );
    }
    $new_exon->{'_supporting_evidence'} = \@new_features;
  }
943

944 945
  #dont want to share the same sequence cache
  delete $new_exon->{'_seq_cache'};
946

947 948
  return $new_exon;
}
949 950


Eduardo Eyras's avatar
Eduardo Eyras committed
951
=head2 add_supporting_features
952

953
  Arg [1]    : Bio::EnsEMBL::Feature $feature
954 955 956 957 958 959 960 961
  Example    : $exon->add_supporting_features(@features);
  Description: Adds a list of supporting features to this exon. 
               Duplicate features are not added.  
               If supporting features are added manually in this
               way, prior to calling get_all_supporting_features then the
               get_all_supporting_features call will not retrieve supporting
               features from the database.
  Returntype : none
962
  Exceptions : throw if any of the features are not Feature
963 964
               throw if any of the features are not in the same coordinate
               system as the exon
965
  Caller     : general
966
  Status     : Stable
967 968 969

=cut

970 971
sub add_supporting_features {
  my ($self,@features) = @_;
972

973 974
  return unless @features;

975
  $self->{_supporting_evidence} ||= []; 
976
  
977
  # check whether this feature object has been added already
978
  FEATURE: foreach my $feature (@features) {
979 980 981
    unless($feature && $feature->isa("Bio::EnsEMBL::Feature")) {
      throw("Supporting feat [$feature] not a " .
            "Bio::EnsEMBL::Feature");
982 983
    } 
    
984
    if ((defined $self->slice() && defined $feature->slice())&&
985
      ( $self->slice()->name() ne $feature->slice()->name())){
986 987 988
      throw("Supporting feat not in same coord system as exon\n" .
            "exon is attached to [".$self->slice()->name()."]\n" .
            "feat is attached to [".$feature->slice()->name()."]");
989
    }
990 991 992 993

    foreach my $added_feature ( @{ $self->{_supporting_evidence} } ){
      # compare objects
      if ( $feature == $added_feature ){
994 995
  # this feature has already been added
  next FEATURE;
996
      }
Web Admin's avatar
Web Admin committed
997
    }
998
    
999
    # no duplicate was found, add the feature
1000 1001
    push(@{$self->{_supporting_evidence}},$feature);
  }
1002 1003
}

1004

1005 1006 1007 1008 1009 1010 1011
=head2 flush_supporting_features

  Example     : $exon->flush_supporting_features;
  Description : Removes all supporting evidence from the exon.
  Return type : (Empty) listref
  Exceptions  : none
  Caller      : general
1012
  Status      : Stable
1013 1014 1015 1016 1017 1018 1019 1020 1021

=cut

sub flush_supporting_features {
  my $self = shift;
  $self->{'_supporting_evidence'} = [];
}


1022
=head2 get_all_supporting_features
Michele Clamp's avatar
Michele Clamp committed
1023

1024
  Arg [1]    : none
1025
  Example    : @evidence = @{$exon->get_all_supporting_features()};
1026 1027 1028
  Description: Retreives any supporting features added manually by 
               calls to add_supporting_features. If no features have been
               added manually and this exon is in a database (i.e. it h
Graham McVicker's avatar
Graham McVicker committed
1029 1030 1031
  Returntype : listreference of Bio::EnsEMBL::BaseAlignFeature objects 
  Exceptions : none
  Caller     : general
1032
  Status     : Stable
Michele Clamp's avatar
Michele Clamp committed
1033 1034 1035

=cut

1036 1037
sub get_all_supporting_features {
  my $self = shift;
Bronwen Aken's avatar
Bronwen Aken committed
1038

1039
  if( !exists  $self->{_supporting_evidence} )  {
1040 1041
    if($self->adaptor) {
      my $sfa = $self->adaptor->db->get_SupportingFeatureAdaptor();
1042
      $self->{_supporting_evidence} = $sfa->fetch_all_by_Exon($self);
1043
    } 
1044
  }
Bronwen Aken's avatar
Bronwen Aken committed
1045
   
1046
  return $self->{_supporting_evidence} || [];
1047 1048 1049
}


1050
=head2 find_supporting_evidence
1051

1052 1053 1054 1055
# This method is only for genebuild backwards compatibility.
# Avoid using it if possible

  Arg [1]    : Bio::EnsEMBL::Feature $features
1056 1057 1058 1059 1060 1061 1062 1063 1064