Exon.pm 41.1 KB
Newer Older
1 2
=head1 LICENSE

3
  Copyright (c) 1999-2010 The European Bioinformatics Institute and
4
  Genome Research Limited.  All rights reserved.
5

6 7
  This software is distributed under a modified Apache license.
  For license details, please see
Ewan Birney's avatar
Ewan Birney committed
8

9 10 11
    http://www.ensembl.org/info/about/code_licence.html

=head1 CONTACT
Ewan Birney's avatar
Ewan Birney committed
12

13 14 15 16 17 18 19
  Please email comments or questions to the public Ensembl
  developers list at <ensembl-dev@ebi.ac.uk>.

  Questions may also be sent to the Ensembl help desk at
  <helpdesk@ensembl.org>.

=cut
20

21 22 23
=head1 NAME

Bio::EnsEMBL::Exon - A class representing an Exon
Ewan Birney's avatar
Ewan Birney committed
24 25 26

=head1 SYNOPSIS

27 28 29 30 31 32 33 34 35 36
    $ex = new Bio::EnsEMBL::Exon(
      -START     => 100,
      -END       => 200,
      -STRAND    => 1,
      -SLICE     => $slice,
      -DBID      => $dbID,
      -ANALYSIS  => $analysis,
      -STABLE_ID => 'ENSE000000123',
      -VERSION   => 2
    );
37

38 39
  # seq() returns a Bio::Seq
  my $seq = $exon->seq->seq();
40

41 42
  # Peptide only makes sense within transcript context
  my $pep = $exon->peptide($transcript)->seq();
43

44 45 46 47
  # Normal feature operations can be performed:
  $exon = $exon->transform('clone');
  $exon->move( $new_start, $new_end, $new_strand );
  print $exon->slice->seq_region_name();
Ewan Birney's avatar
Ewan Birney committed
48 49 50

=head1 DESCRIPTION

51 52
This is a class which represents an exon which is part of a transcript.
See Bio::EnsEMBL:Transcript
Ewan Birney's avatar
Ewan Birney committed
53

Graham McVicker's avatar
Graham McVicker committed
54
=head1 METHODS
Ewan Birney's avatar
Ewan Birney committed
55 56 57

=cut

58 59
package Bio::EnsEMBL::Exon;

Ewan Birney's avatar
Ewan Birney committed
60 61
use strict;

62
use Bio::EnsEMBL::Feature;
Ewan Birney's avatar
Ewan Birney committed
63
use Bio::Seq; # exons have to have sequences...
Ewan Birney's avatar
Ewan Birney committed
64

65 66
use Bio::EnsEMBL::Utils::Exception qw( warning throw deprecate );
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
Bronwen Aken's avatar
Bronwen Aken committed
67
use Bio::EnsEMBL::DBSQL::SupportingFeatureAdaptor;
Ewan Birney's avatar
Ewan Birney committed
68

69
use vars qw(@ISA);
70
@ISA = qw(Bio::EnsEMBL::Feature);
Graham McVicker's avatar
Graham McVicker committed
71 72


73 74
=head2 new

75 76 77 78 79 80 81 82 83 84
  Arg [-SLICE]: Bio::EnsEMBL::SLice - Represents the sequence that this
                feature is on. The coordinates of the created feature are
                relative to the start of the slice.
  Arg [-START]: The start coordinate of this feature relative to the start
                of the slice it is sitting on.  Coordinates start at 1 and
                are inclusive.
  Arg [-END]  : The end coordinate of this feature relative to the start of
                the slice it is sitting on.  Coordinates start at 1 and are
                inclusive.
  Arg [-STRAND]: The orientation of this feature.  Valid values are 1,-1,0.
85 86
  Arg [-SEQNAME] : (optional) A seqname to be used instead of the default name
                of the of the slice.  Useful for features that do not have an
87 88 89
                attached slice such as protein features.
  Arg [-dbID]   : (optional) internal database id
  Arg [-ADAPTOR]: (optional) Bio::EnsEMBL::DBSQL::BaseAdaptor
90 91 92 93 94 95
  Arg [-PHASE]    : the phase. 
  Arg [-END_PHASE]: the end phase
  Arg [-STABLE_ID]: (optional) the stable id of the exon
  Arg [-VERSION]  : (optional) the version
  Arg [-CREATED_DATE] : (optional) the created date
  Arg [-MODIFIED_DATE]: (optional) the last midifeid date
96

97 98
  Example    : none
  Description: create an Exon object
Graham McVicker's avatar
Graham McVicker committed
99
  Returntype : Bio::EnsEMBL::Exon
100
  Exceptions : if phase is not valid (i.e. 0,1, 2 -1)
101
  Caller     : general
102
  Status     : Stable
103 104

=cut
Ewan Birney's avatar
Ewan Birney committed
105

106
sub new {
107
  my $class = shift;
Ewan Birney's avatar
Ewan Birney committed
108

109 110 111 112
  $class = ref $class || $class;

  my $self = $class->SUPER::new( @_ );

113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
  my ( $phase, $end_phase, $stable_id, $version, $created_date,
    $modified_date, $is_current, $is_constitutive )
    = rearrange( [
      "PHASE",        "END_PHASE",
      "STABLE_ID",    "VERSION",
      "CREATED_DATE", "MODIFIED_DATE",
      "IS_CURRENT",   "IS_CONSTITUTIVE"
    ],
    @_
    );

  if ( defined($phase) ) {    # make sure phase is valid.
    $self->phase($phase);
  }

  $self->{'end_phase'}     = $end_phase;
  $self->{'stable_id'}     = $stable_id;
  $self->{'version'}       = $version;
  $self->{'created_date'}  = $created_date;
132
  $self->{'modified_date'} = $modified_date;
133

134 135
  # Default is_current
  if ( !defined($is_current) ) { $is_current = 1 }
136
  $self->{'is_current'} = $is_current;
137

138 139 140 141
  # Default is_constitutive
  if ( !defined($is_constitutive) ) { $is_constitutive = 0 }
  $self->{'is_constitutive'} = $is_constitutive;

142
  return $self;
Ewan Birney's avatar
Ewan Birney committed
143 144
}

Graham McVicker's avatar
Graham McVicker committed
145

146
# =head2 new_fast
147

148 149 150 151 152 153 154 155 156 157
#   Arg [1]    : Bio::EnsEMBL::Slice $slice
#   Arg [2]    : int $start
#   Arg [3]    : int $end
#   Arg [4]    : int $strand (1 or -1)
#   Example    : none
#   Description: create an Exon object
#   Returntype : Bio::EnsEMBL::Exon
#   Exceptions : throws if end < start
#   Caller     : general, creation in Bio::EnsEMBL::Lite::GeneAdaptor
#   Status     : Stable
158

159
# =cut
160

161 162
# sub new_fast {
#   my ($class, $slice, $start, $end, $strand) = @_;
163

164
#   my $self = bless {}, $class;
165

166 167
#   # Swap start and end if they're in the wrong order
#   # We assume that the strand is correct and keep the input value.
168

169 170 171
#   if ($start > $end) {
#     throw( "End smaller than start not allowed" );
#   }
172

173 174 175 176
#   $self->start ($start);
#   $self->end   ($end);
#   $self->strand($strand);
#   $self->slice($slice);
177

178 179
#   return $self;
# }
180 181


182 183
=head2 end_phase

Graham McVicker's avatar
Graham McVicker committed
184 185 186 187 188 189 190 191 192 193 194 195
  Arg [1]    : (optional) int $end_phase
  Example    : $end_phase = $feat->end_phase;
  Description: Gets/Sets the end phase of the exon.
               end_phase = number of bases from the last incomplete codon of 
               this exon.
               Usually, end_phase = (phase + exon_length)%3
               but end_phase could be -1 if the exon is half-coding and its 3 
               prime end is UTR.
  Returntype : int
  Exceptions : warning if end_phase is called without an argument and the
               value is not set.
  Caller     : general
196
  Status     : Stable
197 198 199 200

=cut

sub end_phase {
201 202 203 204 205 206 207
  my $self = shift;
  if( @_ ) { 
    $self->{'end_phase'} = shift;
  } else {
    if( ! defined ( $self->{'end_phase'} )) {
      warning( "No end phase set in Exon. You must set it explicitly." );
    }
208
  }
209
  return $self->{'end_phase'};
210
}
211

212

213 214
=head2 phase

215 216 217 218 219 220 221 222 223
  Arg [1]    : (optional) int $phase
  Example    :  my $phase = $exon->phase;
                $exon->phase(2);
  Description: Gets/Sets the phase of the exon.
  Returntype : int
  Exceptions : throws if phase is not (0, 1 2 or -1).
  Caller     : general
  Status     : Stable

224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242

Get or set the phase of the Exon, which tells the
translation machinery, which makes a peptide from
the DNA, where to start.

The Ensembl phase convention can be thought of as
"the number of bases of the first codon which are
on the previous exon".  It is therefore 0, 1 or 2
(or -1 if the exon is non-coding).  In ascii art,
with alternate codons represented by B<###> and
B<+++>:

       Previous Exon   Intron   This Exon
    ...-------------            -------------...

    5'                    Phase                3'
    ...#+++###+++###          0 +++###+++###+...
    ...+++###+++###+          1 ++###+++###++...
    ...++###+++###++          2 +###+++###+++...
243

244 245 246 247 248 249 250 251
Here is another explanation from Ewan:

Phase means the place where the intron lands
inside the codon - 0 between  codons, 1 between
the 1st and second base, 2 between the second and
3rd  base. Exons therefore have a start phase and
a end phase, but introns have just one phase.

252 253 254 255 256 257
=cut

sub phase {
  my ($self,$value) = @_;
  
  if (defined($value)) {
James Gilbert's avatar
 
James Gilbert committed
258 259
    # Value must be 0,1,2, or -1 for non-coding
    if ($value =~ /^(-1|0|1|2)$/) {
Laura Clarke's avatar
 
Laura Clarke committed
260
      #print STDERR "Setting phase to $value\n";
261
      $self->{'phase'} = $value;
James Gilbert's avatar
 
James Gilbert committed
262
    } else {
263 264
      throw("Bad value ($value) for exon phase. Should only be" .
            " -1,0,1,2\n");
265 266 267 268 269
    }
  }
  return $self->{'phase'};
}

Graham McVicker's avatar
Graham McVicker committed
270

Michele Clamp's avatar
Michele Clamp committed
271 272
=head2 frame

Graham McVicker's avatar
Graham McVicker committed
273 274 275 276 277 278 279
  Arg [1]    : none
  Example    : $frame = $exon->frame
  Description: Gets the frame of this exon
  Returntype : int
  Exceptions : thrown if an arg is passed
               thrown if frame cannot be calculated due to a bad phase value
  Caller     : general
280
  Status     : Stable
Michele Clamp's avatar
Michele Clamp committed
281 282 283 284 285

=cut

sub frame {
  my ($self,$value) = @_;
286 287

  if( defined $value ) {
288
    throw("Cannot set frame. Deduced from seq_start and phase");
289 290 291 292 293 294 295 296
  }

  # frame is mod 3 of the translation point

  if( $self->phase == -1 ) {
    return '.'; # gff convention for no frame info
  }
  if( $self->phase == 0 ) {
Val Curwen's avatar
 
Val Curwen committed
297
    return $self->start%3;
Michele Clamp's avatar
Michele Clamp committed
298
  }
299 300

  if( $self->phase == 1 ) {
Val Curwen's avatar
 
Val Curwen committed
301
    return ($self->start+2)%3;
302 303 304
  }

  if( $self->phase == 2 ) {
Val Curwen's avatar
 
Val Curwen committed
305
    return ($self->start+1)%3;
306 307
  }

308 309 310 311 312 313 314 315 316 317 318 319 320 321 322
  throw("bad phase in exon ".$self->phase);

}


=head2 start

  Arg [1]    : int $start (optional)
  Example    : $start = $exon->start();
  Description: Getter/Setter for the start of this exon.  The superclass
               implmentation is overridden to flush the internal sequence
               cache if this value is altered
  Returntype : int
  Exceptions : none
  Caller     : general
323
  Status     : Stable
324 325 326 327 328

=cut

sub start {
  my $self = shift;
329
  # if an arg was provided, flush the internal sequence cache
330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
  delete $self->{'_seq_cache'} if(@_);
  return $self->SUPER::start(@_);
}


=head2 end

  Arg [1]    : int $end (optional)
  Example    : $end = $exon->end();
  Description: Getter/Setter for the end of this exon.  The superclass
               implmentation is overridden to flush the internal sequence
               cache if this value is altered
  Returntype : int
  Exceptions : none
  Caller     : general
345
  Status     : Stable
346 347 348 349 350

=cut

sub end {
  my $self = shift;
351
  # if an arg was provided, flush the internal sequence cache
352 353 354 355 356 357 358 359 360 361 362 363 364 365 366
  delete $self->{'_seq_cache'} if(@_);
  return $self->SUPER::end(@_);
}


=head2 strand

  Arg [1]    : int $strand (optional)
  Example    : $start = $exon->strand();
  Description: Getter/Setter for the strand of this exon.  The superclass
               implmentation is overridden to flush the internal sequence
               cache if this value is altered
  Returntype : int
  Exceptions : none
  Caller     : general
367
  Status     : Stable
368 369 370 371 372

=cut

sub strand {
  my $self = shift;
373
  # if an arg was provided, flush the internal sequence cache
374 375 376 377
  delete $self->{'_seq_cache'} if(@_);
  return $self->SUPER::strand(@_);
}

378 379
=head2 cdna_start

380 381 382
    Arg [1]     : Bio::EnsEMBL::Transcript $transcript
                  The transcript for which cDNA coordinates should be
                  relative to.
383 384 385
    Example     : $cdna_start = $exon->cdna_start($transcript);
    Description : Returns the start position of the exon in cDNA
                  coordinates.
386 387 388
                  Since an exon may be part of one or more transcripts,
                  the relevant transcript must be given as argument to
                  this method.
389 390 391 392 393
    Return type : Integer
    Exceptions  : Throws if the given argument is not a transcript.
                  Throws if the first part of the exon maps into a gap.
                  Throws if the exon can not be mapped at all.
    Caller      : General
394
    Status      : Stable
395 396 397 398

=cut

sub cdna_start {
399 400
  my $self = shift;
  my ($transcript) = @_;
401

402 403 404 405 406 407
  if (    !defined($transcript)
       || !ref($transcript)
       || !$transcript->isa('Bio::EnsEMBL::Transcript') )
  {
    throw("Argument is not a transcript");
  }
408

409
  my $transcript_id = $transcript->dbID();
410

411 412 413 414 415 416 417 418 419 420 421
  if ( !exists( $self->{'cdna_start'}->{$transcript_id} ) ) {
    my @coords =
      $transcript->genomic2cdna( $self->start(), $self->end(),
                                 $self->strand() );

    if ( @coords && !$coords[0]->isa('Bio::EnsEMBL::Mapper::Gap') ) {
      $self->{'cdna_start'}->{$transcript_id} = $coords[0]->start();
    } elsif (@coords) {
      throw("First part of exon maps into a gap");
    } else {
      throw("Can not map exon");
422
    }
423
  }
424

425
  return $self->{'cdna_start'}->{$transcript_id};
426 427 428 429
} ## end sub cdna_start

=head2 cdna_end

430 431 432
    Arg [1]     : Bio::EnsEMBL::Transcript $transcript
                  The transcript for which cDNA coordinates should be
                  relative to.
433 434 435
    Example     : $cdna_end = $exon->cdna_end($transcript);
    Description : Returns the end position of the exon in cDNA
                  coordinates.
436 437 438
                  Since an exon may be part of one or more transcripts,
                  the relevant transcript must be given as argument to
                  this method.
439 440 441 442 443
    Return type : Integer
    Exceptions  : Throws if the given argument is not a transcript.
                  Throws if the last part of the exon maps into a gap.
                  Throws if the exon can not be mapped at all.
    Caller      : General
444
    Status      : Stable
445 446 447 448

=cut

sub cdna_end {
449 450
  my $self = shift;
  my ($transcript) = @_;
451

452 453 454 455 456 457
  if (    !defined($transcript)
       || !ref($transcript)
       || !$transcript->isa('Bio::EnsEMBL::Transcript') )
  {
    throw("Argument is not a transcript");
  }
458

459
  my $transcript_id = $transcript->dbID();
460

461 462 463 464 465 466 467 468 469 470 471
  if ( !exists( $self->{'cdna_end'}->{$transcript_id} ) ) {
    my @coords =
      $transcript->genomic2cdna( $self->start(), $self->end(),
                                 $self->strand() );

    if ( @coords && !$coords[-1]->isa('Bio::EnsEMBL::Mapper::Gap') ) {
      $self->{'cdna_end'}->{$transcript_id} = $coords[-1]->end();
    } elsif (@coords) {
      throw("Last part of exon maps into gap");
    } else {
      throw("Can not map exon");
472
    }
473
  }
474

475
  return $self->{'cdna_end'}->{$transcript_id};
476 477 478 479
} ## end sub cdna_end

=head2 cdna_coding_start

480 481 482
    Arg [1]     : Bio::EnsEMBL::Transcript $transcript
                  The transcript for which cDNA coordinates should be
                  relative to.
483 484 485 486
    Example     : $cdna_coding_start = $exon->cdna_coding_start($transcript);
    Description : Returns the start position of the coding region of the
                  exon in cDNA coordinates.  Returns undef if the whole
                  exon is non-coding.
487 488 489
                  Since an exon may be part of one or more transcripts,
                  the relevant transcript must be given as argument to
                  this method.
490 491 492
    Return type : Integer or undef
    Exceptions  : Throws if the given argument is not a transcript.
    Caller      : General
493
    Status      : Stable
494 495 496 497

=cut

sub cdna_coding_start {
498 499 500 501 502 503 504 505 506
  my $self = shift;
  my ($transcript) = @_;

  if (    !defined($transcript)
       || !ref($transcript)
       || !$transcript->isa('Bio::EnsEMBL::Transcript') )
  {
    throw("Argument is not a transcript");
  }
507

508
  my $transcript_id = $transcript->dbID();
509

510 511
  if ( !exists( $self->{'cdna_coding_start'}->{$transcript_id} ) ) {
    my $transcript_coding_start = $transcript->cdna_coding_start();
512

513 514 515 516 517 518 519 520 521 522 523 524 525
    if ( !defined($transcript_coding_start) ) {
      # This is a non-coding transcript.
      $self->{'cdna_coding_start'}->{$transcript_id} = undef;
      $self->{'cdna_coding_end'}->{$transcript_id}   = undef;
    } else {
      my $cdna_start = $self->cdna_start($transcript);

      if ( $transcript_coding_start < $cdna_start ) {
        # Coding region starts upstream of this exon...

        if ( $transcript->cdna_coding_end() < $cdna_start ) {
          # ... and also ends upstream of this exon.
          $self->{'cdna_coding_start'}->{$transcript_id} = undef;
526
        } else {
527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547
          # ... and does not end upstream of this exon.
          $self->{'cdna_coding_start'}->{$transcript_id} = $cdna_start;
        }
      } else {
        # Coding region starts either within or downstream of this
        # exon.

        if ( $transcript_coding_start <= $self->cdna_end($transcript) )
        {
          # Coding region starts within this exon.
          $self->{'cdna_coding_start'}->{$transcript_id} =
            $transcript_coding_start;
        } else {
          # Coding region starts downstream of this exon.
          $self->{'cdna_coding_start'}->{$transcript_id} = undef;
        }
      }
    } ## end else [ if ( !defined($transcript_coding_start...
  } ## end if ( !exists( $self->{...

  return $self->{'cdna_coding_start'}->{$transcript_id};
548 549 550 551
} ## end sub cdna_coding_start

=head2 cdna_coding_end

552 553 554
    Arg [1]     : Bio::EnsEMBL::Transcript $transcript
                  The transcript for which cDNA coordinates should be
                  relative to.
555 556 557 558
    Example     : $cdna_coding_end = $exon->cdna_coding_end($transcript);
    Description : Returns the end position of the coding region of the
                  exon in cDNA coordinates.  Returns undef if the whole
                  exon is non-coding.
559 560 561
                  Since an exon may be part of one or more transcripts,
                  the relevant transcript must be given as argument to
                  this method.
562 563 564
    Return type : Integer or undef
    Exceptions  : Throws if the given argument is not a transcript.
    Caller      : General
565
    Status      : Stable
566 567 568 569

=cut

sub cdna_coding_end {
570 571
  my $self = shift;
  my ($transcript) = @_;
572

573 574 575 576 577 578 579 580 581 582 583
  if (    !defined($transcript)
       || !ref($transcript)
       || !$transcript->isa('Bio::EnsEMBL::Transcript') )
  {
    throw("Argument is not a transcript");
  }

  my $transcript_id = $transcript->dbID();

  if ( !exists( $self->{'cdna_coding_end'}->{$transcript_id} ) ) {
    my $transcript_coding_end = $transcript->cdna_coding_end();
584

585 586 587 588 589 590 591 592 593
    if ( !defined($transcript_coding_end) ) {
      # This is a non-coding transcript.
      $self->{'cdna_coding_start'}->{$transcript_id} = undef;
      $self->{'cdna_coding_end'}->{$transcript_id}   = undef;
    } else {
      my $cdna_end = $self->cdna_end($transcript);

      if ( $transcript_coding_end > $cdna_end ) {
        # Coding region ends downstream of this exon...
594

595 596 597
        if ( $transcript->cdna_coding_start() > $cdna_end ) {
          # ... and also starts downstream of this exon.
          $self->{'cdna_coding_end'}->{$transcript_id} = undef;
598
        } else {
599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619
          # ... and does not start downstream of this exon.
          $self->{'cdna_coding_end'}->{$transcript_id} = $cdna_end;
        }
      } else {
        # Coding region ends either within or upstream of this
        # exon.

        if ( $transcript_coding_end >= $self->cdna_start($transcript) )
        {
          # Coding region ends within this exon.
          $self->{'cdna_coding_end'}->{$transcript_id} =
            $transcript_coding_end;
        } else {
          # Coding region ends upstream of this exon.
          $self->{'cdna_coding_end'}->{$transcript_id} = undef;
        }
      }
    } ## end else [ if ( !defined($transcript_coding_end...
  } ## end if ( !exists( $self->{...

  return $self->{'cdna_coding_end'}->{$transcript_id};
620 621 622 623
} ## end sub cdna_coding_end

=head2 coding_region_start

624
    Arg [1]     : Bio::EnsEMBL::Transcript $transcript
625 626
    Example     : $coding_region_start =
                    $exon->coding_region_start($transcript);
627 628 629 630
    Description : Returns the start position of the coding region
                  of the exon in slice-relative coordinates on the
                  forward strand.  Returns undef if the whole exon is
                  non-coding.
631 632 633
                  Since an exon may be part of one or more transcripts,
                  the relevant transcript must be given as argument to
                  this method.
634
    Return type : Integer or undef
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
635
    Exceptions  : Throws if the given argument is not a transcript.
636
    Caller      : General
637
    Status      : Stable
638 639 640

=cut

641
# The implementation of this method is analogous to the implementation
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
642
# of cdna_coding_start().
643 644

sub coding_region_start {
645 646
  my $self = shift;
  my ($transcript) = @_;
647

648 649 650 651 652 653
  if (    !defined($transcript)
       || !ref($transcript)
       || !$transcript->isa('Bio::EnsEMBL::Transcript') )
  {
    throw("Argument is not a transcript");
  }
654

655
  my $transcript_id = $transcript->dbID();
656

657 658
  if ( !exists( $self->{'coding_region_start'}->{$transcript_id} ) ) {
    my $transcript_coding_start = $transcript->coding_region_start();
659

660 661 662 663 664 665
    if ( !defined($transcript_coding_start) ) {
      # This is a non-coding transcript.
      $self->{'coding_region_start'}->{$transcript_id} = undef;
      $self->{'coding_region_end'}->{$transcript_id}   = undef;
    } else {
      my $start = $self->start();
666

667 668
      if ( $transcript_coding_start < $start ) {
        # Coding region starts upstream of this exon...
669

670 671 672
        if ( $transcript->coding_region_end() < $start ) {
          # ... and also ends upstream of this exon.
          $self->{'coding_region_start'}->{$transcript_id} = undef;
673
          $self->{'coding_region_end'}->{$transcript_id}   = undef;
674 675 676 677 678 679 680 681 682 683 684 685 686 687 688
        } else {
          # ... and does not end upstream of this exon.
          $self->{'coding_region_start'}->{$transcript_id} = $start;
        }
      } else {
        # Coding region starts either within or downstream of this
        # exon.

        if ( $transcript_coding_start <= $self->end() ) {
          # Coding region starts within this exon.
          $self->{'coding_region_start'}->{$transcript_id} =
            $transcript_coding_start;
        } else {
          # Coding region starts downstream of this exon.
          $self->{'coding_region_start'}->{$transcript_id} = undef;
689
          $self->{'coding_region_end'}->{$transcript_id}   = undef;
690 691 692 693
        }
      }
    } ## end else [ if ( !defined($transcript_coding_start...
  } ## end if ( !exists( $self->{...
694

695
  return $self->{'coding_region_start'}->{$transcript_id};
696 697 698 699
} ## end sub coding_region_start

=head2 coding_region_end

700
    Arg [1]     : Bio::EnsEMBL::Transcript $transcript
701 702
    Example     : $coding_region_end =
                    $exon->coding_region_end($transcript);
703 704 705 706
    Description : Returns the end position of the coding region of
                  the exon in slice-relative coordinates on the
                  forward strand.  Returns undef if the whole exon is
                  non-coding.
707 708 709
                  Since an exon may be part of one or more transcripts,
                  the relevant transcript must be given as argument to
                  this method.
710
    Return type : Integer or undef
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
711
    Exceptions  : Throws if the given argument is not a transcript.
712
    Caller      : General
713
    Status      : Stable
714 715 716

=cut

717
# The implementation of this method is analogous to the implementation
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
718
# of cdna_coding_end().
719 720

sub coding_region_end {
721 722
  my $self = shift;
  my ($transcript) = @_;
723

724 725 726 727 728 729
  if (    !defined($transcript)
       || !ref($transcript)
       || !$transcript->isa('Bio::EnsEMBL::Transcript') )
  {
    throw("Argument is not a transcript");
  }
730

731
  my $transcript_id = $transcript->dbID();
732

733 734
  if ( !exists( $self->{'coding_region_end'}->{$transcript_id} ) ) {
    my $transcript_coding_end = $transcript->coding_region_end();
735

736 737 738 739 740 741
    if ( !defined($transcript_coding_end) ) {
      # This is a non-coding transcript.
      $self->{'coding_region_start'}->{$transcript_id} = undef;
      $self->{'coding_region_end'}->{$transcript_id}   = undef;
    } else {
      my $end = $self->end();
742

743 744
      if ( $transcript_coding_end > $end ) {
        # Coding region ends downstream of this exon...
745

746 747
        if ( $transcript->coding_region_start() > $end ) {
          # ... and also starts downstream of this exon.
748 749
          $self->{'coding_region_start'}->{$transcript_id} = undef;
          $self->{'coding_region_end'}->{$transcript_id}   = undef;
750 751 752 753 754 755 756 757 758 759 760 761 762 763
        } else {
          # ... and does not start downstream of this exon.
          $self->{'coding_region_end'}->{$transcript_id} = $end;
        }
      } else {
        # Coding region ends either within or upstream of this
        # exon.

        if ( $transcript_coding_end >= $self->start() ) {
          # Coding region ends within this exon.
          $self->{'coding_region_end'}->{$transcript_id} =
            $transcript_coding_end;
        } else {
          # Coding region ends upstream of this exon.
764 765
          $self->{'coding_region_start'}->{$transcript_id} = undef;
          $self->{'coding_region_end'}->{$transcript_id}   = undef;
766 767 768 769
        }
      }
    } ## end else [ if ( !defined($transcript_coding_end...
  } ## end if ( !exists( $self->{...
770

771
  return $self->{'coding_region_end'}->{$transcript_id};
772
} ## end sub coding_region_end
773 774 775 776 777 778 779 780 781 782 783

=head2 slice

  Arg [1]    : Bio::EnsEMBL::Slice
  Example    : $slice = $exon->slice();
  Description: Getter/Setter for the slice this exon is on.  The superclass
               implmentation is overridden to flush the internal sequence
               cache if this value is altered
  Returntype : Bio::EnsEMBL::Slice
  Exceptions : none
  Caller     : general
784
  Status     : Stable
785 786 787

=cut

788
sub slice {
789
  my $self = shift;
790
  # if an arg was provided, flush the internal sequence cache
791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810
  delete $self->{'_seq_cache'} if(@_);
  return $self->SUPER::slice(@_);
}


=head2 move

  Arg [1]    : int start
  Arg [2]    : int end
  Arg [3]    : (optional) int strand
  Example    : None
  Description: Sets the start, end and strand in one call rather than in 
               3 seperate calls to the start(), end() and strand() methods.
               This is for convenience and for speed when this needs to be
               done within a tight loop.  This overrides the superclass
               move() method so that the internal sequence cache can be
               flushed if the exon if moved.
  Returntype : none
  Exceptions : Thrown is invalid arguments are provided
  Caller     : general
811
  Status     : Stable
812 813 814 815 816

=cut

sub move {
  my $self = shift;
817
  # flush the internal sequence cache
818 819 820 821 822 823 824 825 826 827 828 829 830 831
  delete $self->{'_seq_cache'};
  return $self->SUPER::move(@_);
}


=head2 transform

  Arg  1     : String $coordinate_system_name
  Arg [2]    : String $coordinate_system_version
  Description: moves this exon to the given coordinate system. If this exon has
               attached supporting evidence, they move as well.
  Returntype : Bio::EnsEMBL::Exon
  Exceptions : wrong parameters
  Caller     : general
832
  Status     : Stable
833 834 835 836 837 838 839

=cut

sub transform {
  my $self = shift;

  # catch for old style transform calls
Ian Longden's avatar
Ian Longden committed
840 841 842
  if( !@_  || ( ref $_[0] && 
	       ($_[0]->isa( "Bio::EnsEMBL::Slice" ) or $_[0]->isa( "Bio::EnsEMBL::LRGSlice" ))
	      )) {
843 844 845 846 847
    deprecate('Calling transform without a coord system name is deprecated.');
    return $self->_deprecated_transform(@_);
  }

  my $new_exon = $self->SUPER::transform( @_ );
848 849 850 851
  if (not defined $new_exon or
      $new_exon->length != $self->length) {
    return undef;
  }
852 853 854 855 856

  if( exists $self->{'_supporting_evidence'} ) {
    my @new_features;
    for my $old_feature ( @{$self->{'_supporting_evidence'}} ) {
      my $new_feature = $old_feature->transform( @_ );
857 858 859
      if (defined $new_feature) {
        push( @new_features, $new_feature );
      }
860 861 862 863 864 865
    }
    $new_exon->{'_supporting_evidence'} = \@new_features;
  }

  #dont want to share the same sequence cache
  delete $new_exon->{'_seq_cache'};
866

867
  return $new_exon;
Michele Clamp's avatar
Michele Clamp committed
868 869
}

Graham McVicker's avatar
Graham McVicker committed
870

871
=head2 transfer
872

873 874 875 876 877
  Arg [1]    : Bio::EnsEMBL::Slice $destination_slice
  Example    : none
  Description: Moves this Exon to given target slice coordinates. If Features
               are attached they are moved as well. Returns a new exon.
  Returntype : Bio::EnsEMBL::Gene
Graham McVicker's avatar
Graham McVicker committed
878
  Exceptions : none
879
  Caller     : general
880
  Status     : Stable
881 882 883

=cut

884 885
sub transfer {
  my $self  = shift;
886

887 888 889 890 891 892 893 894 895 896 897
  my $new_exon = $self->SUPER::transfer( @_ );
  return undef unless $new_exon;

  if( exists $self->{'_supporting_evidence'} ) {
    my @new_features;
    for my $old_feature ( @{$self->{'_supporting_evidence'}} ) {
      my $new_feature = $old_feature->transfer( @_ );
      push( @new_features, $new_feature );
    }
    $new_exon->{'_supporting_evidence'} = \@new_features;
  }
898

899 900
  #dont want to share the same sequence cache
  delete $new_exon->{'_seq_cache'};
901

902 903
  return $new_exon;
}
904 905


Eduardo Eyras's avatar
Eduardo Eyras committed
906
=head2 add_supporting_features
907

908
  Arg [1]    : Bio::EnsEMBL::Feature $feature
909 910 911 912 913 914 915 916
  Example    : $exon->add_supporting_features(@features);
  Description: Adds a list of supporting features to this exon. 
               Duplicate features are not added.  
               If supporting features are added manually in this
               way, prior to calling get_all_supporting_features then the
               get_all_supporting_features call will not retrieve supporting
               features from the database.
  Returntype : none
917
  Exceptions : throw if any of the features are not Feature
918 919
               throw if any of the features are not in the same coordinate
               system as the exon
920
  Caller     : general
921
  Status     : Stable
922 923 924

=cut

925 926
sub add_supporting_features {
  my ($self,@features) = @_;
927

928 929
  return unless @features;

930
  $self->{_supporting_evidence} ||= []; 
931
  
932
  # check whether this feature object has been added already
933
  FEATURE: foreach my $feature (@features) {
934 935 936
    unless($feature && $feature->isa("Bio::EnsEMBL::Feature")) {
      throw("Supporting feat [$feature] not a " .
            "Bio::EnsEMBL::Feature");
937 938
    } 
    
939 940 941 942 943
    if ((defined $self->slice() && defined $feature->slice())&&
	    ( $self->slice()->name() ne $feature->slice()->name())){
      throw("Supporting feat not in same coord system as exon\n" .
            "exon is attached to [".$self->slice()->name()."]\n" .
            "feat is attached to [".$feature->slice()->name()."]");
944
    }
945 946 947 948

    foreach my $added_feature ( @{ $self->{_supporting_evidence} } ){
      # compare objects
      if ( $feature == $added_feature ){
949
	# this feature has already been added
950
	next FEATURE;
951
      }
Web Admin's avatar
Web Admin committed
952
    }
953
    
954
    # no duplicate was found, add the feature
955 956
    push(@{$self->{_supporting_evidence}},$feature);
  }
957 958
}

959

960 961 962 963 964 965 966
=head2 flush_supporting_features

  Example     : $exon->flush_supporting_features;
  Description : Removes all supporting evidence from the exon.
  Return type : (Empty) listref
  Exceptions  : none
  Caller      : general
967
  Status      : Stable
968 969 970 971 972 973 974 975 976

=cut

sub flush_supporting_features {
  my $self = shift;
  $self->{'_supporting_evidence'} = [];
}


977
=head2 get_all_supporting_features
Michele Clamp's avatar
Michele Clamp committed
978

979
  Arg [1]    : none
980
  Example    : @evidence = @{$exon->get_all_supporting_features()};
981 982 983
  Description: Retreives any supporting features added manually by 
               calls to add_supporting_features. If no features have been
               added manually and this exon is in a database (i.e. it h
Graham McVicker's avatar
Graham McVicker committed
984 985 986
  Returntype : listreference of Bio::EnsEMBL::BaseAlignFeature objects 
  Exceptions : none
  Caller     : general
987
  Status     : Stable
Michele Clamp's avatar
Michele Clamp committed
988 989 990

=cut

991 992
sub get_all_supporting_features {
  my $self = shift;
Bronwen Aken's avatar
Bronwen Aken committed
993

994
  if( !exists  $self->{_supporting_evidence} )  {
995 996
    if($self->adaptor) {
      my $sfa = $self->adaptor->db->get_SupportingFeatureAdaptor();
997
      $self->{_supporting_evidence} = $sfa->fetch_all_by_Exon($self);
998
    } 
999
  }
Bronwen Aken's avatar
Bronwen Aken committed
1000
   
1001
  return $self->{_supporting_evidence} || [];
1002 1003 1004
}


1005
=head2 find_supporting_evidence
1006

1007 1008