Transcript.pm 58.7 KB
Newer Older
1
package Bio::EnsEMBL::Transcript;
Ewan Birney's avatar
Ewan Birney committed
2 3 4

=head1 NAME

5
Bio::EnsEMBL::Transcript - object representing an Ensembl transcript
Ewan Birney's avatar
Ewan Birney committed
6 7 8

=head1 SYNOPSIS

9
Creation:
10

11
     my $tran = new Bio::EnsEMBL::Transcript();
12
     my $tran = new Bio::EnsEMBL::Transcript(-EXONS => \@exons);
13 14 15

Manipulation:

Graham McVicker's avatar
Graham McVicker committed
16
     # Returns an array of Exon objects
17
     my @exons = @{$tran->get_all_Exons()};
18

Graham McVicker's avatar
Graham McVicker committed
19
     # Returns the peptide translation of the exons as a Bio::Seq
20 21 22 23 24
     if($tran->translation() {
       my $pep   = $tran->translate();
     } else {
       print "Transcript ", $tran->stable_id(), " is non-coding\n";
     }
25

26 27
=head1 DESCRIPTION

28
A representation of a transcript within the Ensembl system.  A transcript
29 30
consists of a set of Exons and (possibly) a Translation which defines the
coding and non-coding regions of the exons.
Ewan Birney's avatar
Ewan Birney committed
31

32
=head1 LICENCE
Ewan Birney's avatar
Ewan Birney committed
33

34 35
This code is distributed under an Apache style licence. Please see
http://www.ensembl.org/info/about/code_licence.html for details.
Ewan Birney's avatar
Ewan Birney committed
36

37
=head1 AUTHOR
38

39
Ensembl core API team
Ewan Birney's avatar
Ewan Birney committed
40

41 42 43 44 45 46
=head1 CONTACT

Please post comments/questions to the Ensembl development list
<ensembl-dev@ebi.ac.uk>

=cut
47

Ewan Birney's avatar
Ewan Birney committed
48 49
use strict;

50
use Bio::EnsEMBL::Feature;
Ian Longden's avatar
Ian Longden committed
51
use Bio::EnsEMBL::Intron;
52
use Bio::EnsEMBL::TranscriptMapper;
53
use Bio::EnsEMBL::Utils::TranscriptSNPs;
54
use Bio::EnsEMBL::SeqEdit;
55

56 57 58
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
use Bio::EnsEMBL::Utils::Exception qw( deprecate warning throw );

59
use vars qw(@ISA);
60
@ISA = qw(Bio::EnsEMBL::Feature);
Ewan Birney's avatar
Ewan Birney committed
61

Graham McVicker's avatar
Graham McVicker committed
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80

=head2 new

  Arg [-EXONS] :
        reference to list of Bio::EnsEMBL::Exon objects - exons which make up 
        this transcript
  Arg [-STABLE_ID] :
        string - the stable identifier of this transcript
  Arg [-VERSION] :
        int - the version of the stable identifier of this transcript
  Arg [-EXTERNAL_NAME] :
        string - the external database name associated with this transcript
  Arg [-EXTERNAL_DB] :
        string - the name of the database the external name is from
  Arg [-EXTERNAL_STATUS]:
        string - the status of the external identifier
  Arg [-DISPLAY_XREF]:
        Bio::EnsEMBL::DBEntry - The external database entry that is used
        to label this transcript when it is displayed.
81 82 83 84 85 86 87 88
  Arg [-CREATED_DATE]:
        string - the date the transcript was created
  Arg [-MODIFIED_DATE]:
        string - the date the transcript was last modified
  Arg [-DESCRIPTION]:
        string - the transcipts description
  Arg [-BIOTYPE]: 
        string - the biotype e.g. "protein_coding"
89 90
  Arg [-STATUS]:
        string - the transcripts status i.e. "KNOWN","NOVEL"
91 92
  Arg [-IS_CURRENT]:
        Boolean - specifies if this is the current version of the transcript
Graham McVicker's avatar
Graham McVicker committed
93 94 95 96 97
  Example    : $tran = new Bio::EnsEMBL::Transcript(-EXONS => \@exons);
  Description: Constructor. Instantiates a Transcript object.
  Returntype : Bio::EnsEMBL::Transcript
  Exceptions : throw on bad arguments
  Caller     : general
98
  Status     : Stable
Graham McVicker's avatar
Graham McVicker committed
99 100 101

=cut

102
sub new {
103
  my ($class) = shift;
104

105
  if (ref $class) { 
106 107 108
      $class = ref $class;
  }

109
  my $self = $class->SUPER::new(@_);
Ewan Birney's avatar
Ewan Birney committed
110

111
  my ( $exons, $stable_id, $version, $external_name, $external_db,
112
       $external_status, $display_xref, $created_date, $modified_date,
113 114
       $description, $biotype, $confidence, $external_db_name, $status,
       $is_current );
115 116 117 118 119 120 121 122 123 124

  #catch for old style constructor calling:
  if((@_ > 0) && ref($_[0])) {
    $exons = [@_];
    deprecate("Transcript constructor should use named arguments.\n" .
              'Use Bio::EnsEMBL::Transcript->new(-EXONS => \@exons);' .
              "\ninstead of Bio::EnsEMBL::Transcript->new(\@exons);");
  }
  else {
    ( $exons, $stable_id, $version, $external_name, $external_db,
125
      $external_status, $display_xref, $created_date, $modified_date,
126 127
      $description, $biotype, $confidence, $external_db_name, $status,
      $is_current ) = 
128
        rearrange( [ "EXONS", 'STABLE_ID', 'VERSION', 'EXTERNAL_NAME', 
129
                     'EXTERNAL_DB', 'EXTERNAL_STATUS', 'DISPLAY_XREF',
130
		     'CREATED_DATE', 'MODIFIED_DATE', 'DESCRIPTION',
131 132
		     'BIOTYPE', 'CONFIDENCE', 'EXTERNAL_DB_NAME', 'STATUS',
                     'IS_CURRENT' ], @_ );
133
  }
Ewan Birney's avatar
Ewan Birney committed
134

135 136 137
  if( $exons ) {
    $self->{'_trans_exon_array'} = $exons;
    $self->recalculate_coordinates();
138
  }
139

140 141
  $self->stable_id( $stable_id );
  $self->version( $version );
142 143
  $self->{'created_date'} = $created_date;
  $self->{'modified_date'} = $modified_date;
144 145 146 147
  $self->external_name( $external_name ) if( defined $external_name );
  $self->external_db( $external_db ) if( defined $external_db );
  $self->external_status( $external_status ) if( defined $external_status );
  $self->display_xref( $display_xref ) if( defined $display_xref );
148
  $self->edits_enabled(1);
149

150
  $self->description( $description );
151 152
  $self->status( $confidence );  # old style name
  $self->status( $status );      # new style name
153
  $self->biotype( $biotype );
154 155 156 157

  # default is_current
  $is_current = 1 unless (defined($is_current));
  $self->{'is_current'} = $is_current;
158

159
  return $self;
Ewan Birney's avatar
Ewan Birney committed
160 161
}

162

Graham McVicker's avatar
Graham McVicker committed
163
=head2 get_all_DBLinks
164

165
  Example    : my @dblinks = @{ $transcript->get_all_DBLinks };
166 167 168 169 170 171 172
  Description: Retrieves _all_ related DBEntries for this transcript.  
               This includes all DBEntries that are associated with the
               corresponding translation.

               If you only want to retrieve the DBEntries associated with the
               transcript then you should use the get_all_DBEntries call 
               instead.
173
  Returntype : Listref of Bio::EnsEMBL::DBEntry objects, sorted by
174
               priority (desc), external db name (asc), display_id (asc)
175 176
  Exceptions : none
  Caller     : general
177
  Status     : Stable
178 179 180

=cut

Graham McVicker's avatar
Graham McVicker committed
181 182
sub get_all_DBLinks {
  my $self = shift;
183
  my $ex_db_exp = shift;
Ian Longden's avatar
Ian Longden committed
184
  my $ex_db_type = shift;
185

186 187
  my @links;

Ian Longden's avatar
Ian Longden committed
188
  push @links, @{$self->get_all_DBEntries($ex_db_exp, $ex_db_type)};
189 190

  my $transl = $self->translation();
Ian Longden's avatar
Ian Longden committed
191
  push @links, @{$transl->get_all_DBEntries($ex_db_exp, $ex_db_type)} if($transl);
192

193 194
  @links = sort {_compare_xrefs()} @links;

195
  return \@links;
196 197
}

Graham McVicker's avatar
Graham McVicker committed
198

199
=head2 get_all_DBEntries
200

201
  Example    : my @dbentries = @{ $gene->get_all_DBEntries };
202 203 204 205 206 207 208 209
  Description: Retrieves DBEntries (xrefs) for this transcript.  
               This does _not_ include the corresponding translations 
               DBEntries (see get_all_DBLinks).

               This method will attempt to lazy-load DBEntries from a
               database if an adaptor is available and no DBEntries are present
               on the transcript (i.e. they have not already been added or 
               loaded).
210
  Returntype : Listref of Bio::EnsEMBL::DBEntry objects
211 212
  Exceptions : none
  Caller     : get_all_DBLinks, TranscriptAdaptor::store
213
  Status     : Stable
214 215 216

=cut

217 218
sub get_all_DBEntries {
  my $self = shift;
219
  my $ex_db_exp = shift;
Ian Longden's avatar
Ian Longden committed
220
  my $ex_db_type = shift;
221

222 223 224 225 226
  my $cache_name = "dbentries";

  if(defined($ex_db_exp)){
    $cache_name .= $ex_db_exp;
  }
Ian Longden's avatar
Ian Longden committed
227 228
  if(defined($ex_db_type)){
    $cache_name .= $ex_db_type;
229
  }
230
  # if not cached, retrieve all of the xrefs for this gene
231 232
  if(!defined $self->{$cache_name} && $self->adaptor()) {
    $self->{$cache_name} = 
Ian Longden's avatar
Ian Longden committed
233
      $self->adaptor->db->get_DBEntryAdaptor->fetch_all_by_Transcript($self, $ex_db_exp, $ex_db_type);
234
  }
235

236
  $self->{$cache_name} ||= [];
237

238
  return $self->{$cache_name};
239 240 241 242
}


=head2 add_DBEntry
243

244 245
  Arg [1]    : Bio::EnsEMBL::DBEntry $dbe
               The dbEntry to be added
246 247 248 249 250
  Example    : my $dbe = Bio::EnsEMBL::DBEntery->new(...);
               $transcript->add_DBEntry($dbe);
  Description: Associates a DBEntry with this transcript. Note that adding
               DBEntries will prevent future lazy-loading of DBEntries for this
               gene (see get_all_DBEntries).
251 252 253
  Returntype : none
  Exceptions : thrown on incorrect argument type
  Caller     : general
254
  Status     : Stable
255 256 257 258 259 260 261 262

=cut

sub add_DBEntry {
  my $self = shift;
  my $dbe = shift;

  unless($dbe && ref($dbe) && $dbe->isa('Bio::EnsEMBL::DBEntry')) {
263
    throw('Expected DBEntry argument');
264 265 266 267
  }

  $self->{'dbentries'} ||= [];
  push @{$self->{'dbentries'}}, $dbe;
268 269
}

270

271 272
=head2 get_all_supporting_features

273
  Example    : my @evidence = @{ $transcript->get_all_supporting_features };
274 275
  Description: Retreives any supporting features added manually by 
               calls to add_supporting_features.
276
  Returntype : Listref of Bio::EnsEMBL::FeaturePair objects
277 278
  Exceptions : none
  Caller     : general
279
  Status     : Stable
280 281 282 283 284

=cut

sub get_all_supporting_features {
  my $self = shift;
285 286 287 288 289 290 291

  if( !exists  $self->{_supporting_evidence} )  {
    if($self->adaptor) {
      my $tsfa = $self->adaptor->db->get_TranscriptSupportingFeatureAdaptor();
      $self->{_supporting_evidence} = $tsfa->fetch_all_by_Transcript($self);
    }
  }
292 293 294 295 296 297 298
  
  return $self->{_supporting_evidence} || [];
}


=head2 add_supporting_features

299 300 301
  Arg [1-N]  : Bio::EnsEMBL::FeaturePair $feature
               The supporting features to add
  Example    : $transcript->add_supporting_features(@features);
302
  Description: Adds a list of supporting features to this Transcript.
303 304
               The added features can be retieved by
               get_all_supporting_features().
305 306 307 308 309
  Returntype : none
  Exceptions : throw if any of the features are not FeaturePairs
               throw if any of the features are not in the same coordinate
               system as the Transcript
  Caller     : general
310
  Status     : Stable
311 312 313 314
 
=cut
 
sub add_supporting_features {
315
  my ($self, @features) = @_;
316 317 318 319 320 321 322 323

  return unless @features;
 
  $self->{_supporting_evidence} ||= [];
  
  # check whether this feature object has been added already
  FEATURE: foreach my $feature (@features) {

324 325 326 327 328
    if (!defined($feature) || ref($feature) eq "ARRAY") {
      throw("Element in transcript supporting features array is undefined or is an ARRAY for " . $self->dbID);
    }
    if (!$feature || !$feature->isa("Bio::EnsEMBL::FeaturePair")) {
      print "feature = " . $feature . "\n";
329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348
      throw("Supporting feat [$feature] not a " .
            "Bio::EnsEMBL::FeaturePair");
    } 
    
    if ((defined $self->slice() && defined $feature->slice())&&
	    ( $self->slice()->name() ne $feature->slice()->name())){
      throw("Supporting feat not in same coord system as exon\n" .
            "exon is attached to [".$self->slice()->name()."]\n" .
            "feat is attached to [".$feature->slice()->name()."]");
    }

    foreach my $added_feature ( @{ $self->{_supporting_evidence} } ){
      # compare objects
      if ( $feature == $added_feature ){
	#this feature has already been added
	next FEATURE;
      }
    }
    
    #no duplicate was found, add the feature
349
    push(@{$self->{_supporting_evidence}}, $feature);
350 351 352 353
  }
}


354 355 356 357 358 359 360
=head2 flush_supporting_features

  Example     : $transcript->flush_supporting_features;
  Description : Removes all supporting evidence from the transcript.
  Return type : (Empty) listref
  Exceptions  : none
  Caller      : general
361
  Status      : Stable
362 363 364 365 366 367 368 369 370

=cut

sub flush_supporting_features {
  my $self = shift;
  $self->{'_supporting_evidence'} = [];
}


371 372
=head2 external_db

373 374 375 376 377 378 379 380
  Arg [1]    : (optional) String - name of external db to set
  Example    : $transcript->external_db('HGNC');
  Description: Getter/setter for attribute external_db. The db is the one that 
               belongs to the external_name.  
  Returntype : String
  Exceptions : none
  Caller     : general
  Status     : Stable
381 382 383 384

=cut

sub external_db {
385
  my ( $self, $ext_dbname ) = @_;
386

387
  if(defined $ext_dbname) { 
388
    return ( $self->{'external_db'} = $ext_dbname );
389
  }
390

391 392
  if( exists $self->{'external_db'} ) {
    return $self->{'external_db'};
393
  }
394

395
  my $display_xref = $self->display_xref();
396

397 398 399 400 401
  if( defined $display_xref ) {
    return $display_xref->dbname()
  } else {
    return undef;
  }
402 403
}

404

405 406
=head2 external_status

407 408 409 410 411 412 413 414
  Arg [1]    : (optional) String - status of the external db
  Example    : $transcript->external_status('KNOWNXREF');
  Description: Getter/setter for attribute external_status. The status of
               the external db of the one that belongs to the external_name.
  Returntype : String
  Exceptions : none
  Caller     : general
  Status     : Stable
415 416 417 418 419 420 421

=cut

sub external_status { 
  my ( $self, $ext_status ) = @_;

  if(defined $ext_status) {
422
    return ( $self->{'external_status'} = $ext_status );
423 424
  }

425 426
  if( exists $self->{'external_status'} ) {
    return $self->{'external_status'};
427 428 429 430 431 432 433 434 435 436 437 438
  }

  my $display_xref = $self->display_xref();

  if( defined $display_xref ) {
    return $display_xref->status()
  } else {
    return undef;
  }
}


439 440
=head2 external_name

441 442 443 444 445 446 447
  Arg [1]    : (optional) String - the external name to set
  Example    : $transcript->external_name('BRCA2-001');
  Description: Getter/setter for attribute external_name.
  Returntype : String or undef
  Exceptions : none
  Caller     : general
  Status     : Stable
448 449 450 451

=cut

sub external_name {
452
  my ($self, $ext_name) = @_;
453

454
  if(defined $ext_name) { 
455
    return ( $self->{'external_name'} = $ext_name );
456
  }
457

458 459
  if( exists $self->{'external_name'} ) {
    return $self->{'external_name'};
460
  }
461

462
  my $display_xref = $self->display_xref();
463

464 465 466 467 468
  if( defined $display_xref ) {
    return $display_xref->display_id()
  } else {
    return undef;
  }
469 470 471
}


472 473
=head2 is_known

474 475 476 477
  Example    : print "Transcript ".$transcript->stable_id." is KNOWN\n" if
                  $transcript->is_known;
  Description: Returns TRUE if this gene has a status of 'KNOWN'
  Returntype : TRUE if known, FALSE otherwise
478 479
  Exceptions : none
  Caller     : general
480
  Status     : Stable
481 482 483

=cut

484 485
sub is_known {
  my $self = shift;
486
  return ( $self->{'status'} eq "KNOWN" );
487 488 489
}


490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507
=head2 status

  Arg [1]    : string $status
  Example    : none
  Description: get/set for attribute status
  Returntype : string
  Exceptions : none
  Caller     : general
  Status     : Medium Risk

=cut

sub status {
   my $self = shift;
  $self->{'status'} = shift if( @_ );
  return $self->{'status'};
}

508 509 510 511 512 513 514 515
=head2 biotype

  Arg [1]    : string $biotype
  Example    : none
  Description: get/set for attribute biotype
  Returntype : string
  Exceptions : none
  Caller     : general
516
  Status     : Stable
517 518 519 520 521 522 523

=cut

sub biotype {
   my $self = shift;
  $self->{'biotype'} = shift if( @_ );
  return ( $self->{'biotype'} || "protein_coding" );
524 525
}

526

Alistair Rust's avatar
Alistair Rust committed
527
=head2 display_xref
528

529 530 531
  Arg [1]    : (optional) Bio::EnsEMBL::DBEntry - the display xref to set
  Example    : $transcript->display_xref($db_entry);
  Description: Getter/setter for display_xref for this transcript.
532
  Returntype : Bio::EnsEMBL::DBEntry
533 534
  Exceptions : none
  Caller     : general
535
  Status     : Stable
536 537 538

=cut

539
sub display_xref {
540 541 542
  my $self = shift;
  $self->{'display_xref'} = shift if(@_);
  return $self->{'display_xref'};
Ewan Birney's avatar
Ewan Birney committed
543 544 545 546 547
}


=head2 translation

548 549 550
  Args       : None
  Example    : if ( $transcript->translation() ) {
                 print( $transcript->translation()->stable_id(), "\n" );
551
               } else {
552
                 print("Pseudogene\n");
553
               }
554 555 556 557 558 559 560
  Description: Getter/setter for the Translation object which
               defines the CDS (and as a result the peptide encoded
               by) this transcript.  This function will return
               undef if this transcript is a pseudogene, i.e. a
               non-translating transcript such as an ncRNA.  This
               is the accepted method of determining whether a
               transcript is a pseudogene or not.
561
  Returntype : Bio::EnsEMBL::Translation
562 563
  Exceptions : none
  Caller     : general
564
  Status     : Stable
Ewan Birney's avatar
Ewan Birney committed
565 566 567

=cut

Michele Clamp's avatar
Michele Clamp committed
568
sub translation {
569 570 571
  my $self = shift;
  if( @_ ) {
    my $value = shift;
572 573 574
    if( defined($value) &&
        (!ref($value) || !$value->isa('Bio::EnsEMBL::Translation'))) {
      throw("Bio::EnsEMBL::Translation argument expected.");
Ewan Birney's avatar
Ewan Birney committed
575
    }
576
    $self->{'translation'} = $value;
577 578
  } elsif( !exists($self->{'translation'}) and defined($self->adaptor())) {
    $self->{'translation'} =
579
      $self->adaptor()->db()->get_TranslationAdaptor()->
580
        fetch_by_Transcript( $self );
581 582
  }
  return $self->{'translation'};
Ewan Birney's avatar
Ewan Birney committed
583 584
}

Arne Stabenau's avatar
Arne Stabenau committed
585

586 587 588 589
=head2 spliced_seq

  Args       : none
  Example    : none
590 591
  Description: Retrieves all Exon sequences and concats them together.
               No phase padding magic is done, even if phases do not align.
592 593 594
  Returntype : txt
  Exceptions : none
  Caller     : general
595
  Status     : Stable
596 597 598 599 600

=cut

sub spliced_seq {
  my ( $self ) = @_;
601

602 603
  my $seq_string = "";
  for my $ex ( @{$self->get_all_Exons()} ) {
604 605 606 607 608 609 610 611 612
    my $seq = $ex->seq();

    if(!$seq) {
      warning("Could not obtain seq for exon.  Transcript sequence may not " .
              "be correct.");
      $seq_string .= 'N' x $ex->length();
    } else {
      $seq_string .= $seq->seq();
    }
613 614
  }

615 616 617
  # apply post transcriptional edits
  if($self->edits_enabled()) {
    my @seqeds = @{$self->get_all_SeqEdits()};
618

619 620 621
    # sort edits in reverse order to remove complication of
    # adjusting downstream edits
    @seqeds = sort {$b->start() <=> $a->start()} @seqeds;
622

623 624
    foreach my $se (@seqeds) {
      $se->apply_edit(\$seq_string);
625
    }
626
  }
627

628
  return $seq_string;
629 630 631 632 633 634 635
}


=head2 translateable_seq

  Args       : none
  Example    : print $transcript->translateable_seq(), "\n";
636
  Description: Returns a sequence string which is the the translateable part
637
               of the transcripts sequence.  This is formed by splicing all
638 639 640 641 642
               Exon sequences together and apply all defined RNA edits.
               Then the coding part of the sequence is extracted and returned.
               The code will not support monkey exons any more. If you want to
               have non phase matching exons, defined appropriate _rna_edit
               attributes!
643 644 645

               An empty string is returned if this transcript is a pseudogene
               (i.e. is non-translateable).
646 647 648
  Returntype : txt
  Exceptions : none
  Caller     : general
649
  Status     : Stable
650 651 652 653 654 655

=cut

sub translateable_seq {
  my ( $self ) = @_;

656 657 658 659
  if(!$self->translation()) {
    return '';
  }

660
  my $mrna = $self->spliced_seq();
661

662 663
  my $start = $self->cdna_coding_start();
  my $end = $self->cdna_coding_end();
664

665 666
  $mrna = substr( $mrna, $start-1, $end-$start+1 );

667 668 669 670
  my $start_phase = $self->translation->start_Exon->phase();
  if( $start_phase > 0 ) {
    $mrna = "N"x$start_phase . $mrna;
  }
671 672
  if( ! $start || ! $end ) {
    return "";
673
  }
674

675
  return $mrna;
676 677
}

Arne Stabenau's avatar
Arne Stabenau committed
678

679 680 681 682 683 684 685
=head2 cdna_coding_start

  Arg [1]    : (optional) $value
  Example    : $relative_coding_start = $transcript->cdna_coding_start;
  Description: Retrieves the position of the coding start of this transcript
               in cdna coordinates (relative to the start of the 5prime end of
               the transcript, excluding introns, including utrs).
686 687 688

               This will return undef if this is a pseudogene (i.e. a
               transcript with no translation).
689 690 691
  Returntype : int
  Exceptions : none
  Caller     : five_prime_utr, get_all_snps, general
692
  Status     : Stable
693 694 695 696

=cut

sub cdna_coding_start {
697 698 699 700
  my $self = shift;

  if( @_ ) {
    $self->{'cdna_coding_start'} = shift;
701
  }
702

703
  if(!defined $self->{'cdna_coding_start'} && defined $self->translation){
704
    # calc coding start relative from the start of translation (in cdna coords)
705 706 707
    my $start = 0;

    my @exons = @{$self->get_all_Exons};
708 709
    my $exon;

710
    while($exon = shift @exons) {
711
      if($exon == $self->translation->start_Exon) {
712 713 714
        #add the utr portion of the start exon
        $start += $self->translation->start;
        last;
715
      } else {
716 717
        #add the entire length of this non-coding exon
        $start += $exon->length;
718 719
      }
    }
720 721 722 723 724 725 726 727 728 729 730 731 732 733 734

    # adjust cdna coords if sequence edits are enabled
    if($self->edits_enabled()) {
      my @seqeds = @{$self->get_all_SeqEdits()};
      # sort in reverse order to avoid adjustment of downstream edits
      @seqeds = sort {$b->start() <=> $a->start()} @seqeds;

      foreach my $se (@seqeds) {
        # use less than start so that start of CDS can be extended
        if($se->start() < $start) {
          $start += $se->length_diff();
        }
      }
    }

Graham McVicker's avatar
Graham McVicker committed
735
    $self->{'cdna_coding_start'} = $start;
736 737 738 739 740 741 742 743 744
  }

  return $self->{'cdna_coding_start'};
}


=head2 cdna_coding_end

  Arg [1]    : (optional) $value
745
  Example    : $cdna_coding_end = $transcript->cdna_coding_end;
746 747
  Description: Retrieves the end of the coding region of this transcript in
               cdna coordinates (relative to the five prime end of the
Graham McVicker's avatar
Graham McVicker committed
748
               transcript, excluding introns, including utrs).
749 750 751 752

               This will return undef if this transcript is a pseudogene
               (i.e. a transcript with no translation and therefor no CDS).
  Returntype : int
753 754
  Exceptions : none
  Caller     : general
755
  Status     : Stable
756 757 758 759

=cut

sub cdna_coding_end {
760
  my $self = shift;
761

762 763
  if( @_ ) {
    $self->{'cdna_coding_end'} = shift;
764 765
  }

766
  if(!defined $self->{'cdna_coding_end'} && defined $self->translation) {
767 768 769
    my @exons = @{$self->get_all_Exons};

    my $end = 0;
Graham McVicker's avatar
Graham McVicker committed
770
    while(my $exon = shift @exons) {
771
      if($exon == $self->translation->end_Exon) {
772 773 774
        # add coding portion of the final coding exon
        $end += $self->translation->end;
        last;
775
      } else {
776 777
        # add entire exon
        $end += $exon->length;
778 779
      }
    }
780 781 782 783 784 785 786 787 788 789 790 791 792 793 794

    # adjust cdna coords if sequence edits are enabled
    if($self->edits_enabled()) {
      my @seqeds = @{$self->get_all_SeqEdits()};
      # sort in reverse order to avoid adjustment of downstream edits
      @seqeds = sort {$b->start() <=> $a->start()} @seqeds;

      foreach my $se (@seqeds) {
        # use less than or equal to end+1 so end of the CDS can be extended
        if($se->start() <= $end + 1) {
          $end += $se->length_diff();
        }
      }
    }

795 796 797 798 799 800 801
    $self->{'cdna_coding_end'} = $end;
  }

  return $self->{'cdna_coding_end'};
}


802
=head2 coding_region_start
803 804

  Arg [1]    : (optional) $value
805
  Example    : $coding_region_start = $transcript->coding_region_start
806 807
  Description: Retrieves the start of the coding region of this transcript
               in genomic coordinates (i.e. in either slice or contig coords).
808 809 810 811 812
               By convention, the coding_region_start is always lower than
               the value returned by the coding_end method.
               The value returned by this function is NOT the biological
               coding start since on the reverse strand the biological coding
               start would be the higher genomic value.
813 814 815

               This function will return undef if this is a pseudogene
               (a non-translated transcript).
Graham McVicker's avatar
Graham McVicker committed
816
  Returntype : int
817 818
  Exceptions : none
  Caller     : general
819
  Status     : Stable
820 821 822

=cut

823
sub coding_region_start {
824
  my ($self, $value) = @_;
825

826
  if( defined $value ) {
827
    $self->{'coding_region_start'} = $value;
828
  } elsif(!defined $self->{'coding_region_start'} &&
829
	  defined $self->translation) {
830 831 832
    #calculate the coding start from the translation
    my $start;
    my $strand = $self->translation()->start_Exon->strand();
833
    if( $strand == 1 ) {
Graham McVicker's avatar
Graham McVicker committed
834
      $start = $self->translation()->start_Exon->start();
835 836
      $start += ( $self->translation()->start() - 1 );
    } else {
Graham McVicker's avatar
Graham McVicker committed
837
      $start = $self->translation()->end_Exon->end();
838 839
      $start -= ( $self->translation()->end() - 1 );
    }
840
    $self->{'coding_region_start'} = $start;
841
  }
842

843
  return $self->{'coding_region_start'};
844 845
}

846

847
=head2 coding_region_end
848

Graham McVicker's avatar
Graham McVicker committed
849
  Arg [1]    : (optional) $value
850
  Example    : $coding_region_end = $transcript->coding_region_end
851
  Description: Retrieves the end of the coding region of this transcript
Graham McVicker's avatar
Graham McVicker committed
852
               in genomic coordinates (i.e. in either slice or contig coords).
853 854 855 856
               By convention, the coding_region_end is always higher than the
               value returned by the coding_region_start method.
               The value returned by this function is NOT the biological
               coding end since on the reverse strand the biological coding
857
               end would be the lower genomic value.
858 859 860

               This function will return undef if this is a pseudogene
               (a non-translated transcript).
Graham McVicker's avatar
Graham McVicker committed
861 862 863
  Returntype : int
  Exceptions : none
  Caller     : general
864
  Status     : Stable
865 866 867

=cut

868
sub coding_region_end {
869
  my ($self, $value ) = @_;
870

871 872
  my $strand;
  my $end;
873 874

  if( defined $value ) {
875
    $self->{'coding_region_end'} = $value;
876
  } elsif( ! defined $self->{'coding_region_end'}
877
	   && defined $self->translation() ) {
Graham McVicker's avatar
Graham McVicker committed
878
    $strand = $self->translation()->start_Exon->strand();
879
    if( $strand == 1 ) {
Graham McVicker's avatar
Graham McVicker committed
880
      $end = $self->translation()->end_Exon->start();
881 882
      $end += ( $self->translation()->end() - 1 );
    } else {
Graham McVicker's avatar
Graham McVicker committed
883
      $end = $self->translation()->start_Exon->end();
884 885
      $end -= ( $self->translation()->start() - 1 );
    }
886
    $self->{'coding_region_end'} = $end;