Transcript.pm 63.1 KB
Newer Older
1 2
=head1 LICENSE

3
  Copyright (c) 1999-2010 The European Bioinformatics Institute and
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
  Genome Research Limited.  All rights reserved.

  This software is distributed under a modified Apache license.
  For license details, please see

    http://www.ensembl.org/info/about/code_licence.html

=head1 CONTACT

  Please email comments or questions to the public Ensembl
  developers list at <ensembl-dev@ebi.ac.uk>.

  Questions may also be sent to the Ensembl help desk at
  <helpdesk@ensembl.org>.

=cut
Ewan Birney's avatar
Ewan Birney committed
20 21 22

=head1 NAME

23
Bio::EnsEMBL::Transcript - object representing an Ensembl transcript
Ewan Birney's avatar
Ewan Birney committed
24 25 26

=head1 SYNOPSIS

27
Creation:
28

29 30
  my $tran = new Bio::EnsEMBL::Transcript();
  my $tran = new Bio::EnsEMBL::Transcript( -EXONS => \@exons );
31 32 33

Manipulation:

34 35
  # Returns an array of Exon objects
  my @exons = @{ $tran->get_all_Exons() };
36

37 38 39 40 41 42
  # Returns the peptide translation of the exons as a Bio::Seq
  if ( $tran->translation() ) {
    my $pep = $tran->translate();
  } else {
    print "Transcript ", $tran->stable_id(), " is non-coding\n";
  }
43

44 45
=head1 DESCRIPTION

46
A representation of a transcript within the Ensembl system.  A transcript
47 48
consists of a set of Exons and (possibly) a Translation which defines the
coding and non-coding regions of the exons.
Ewan Birney's avatar
Ewan Birney committed
49

50
=head1 METHODS
51 52

=cut
53

54 55
package Bio::EnsEMBL::Transcript;

Ewan Birney's avatar
Ewan Birney committed
56 57
use strict;

58
use Bio::EnsEMBL::Feature;
Ian Longden's avatar
Ian Longden committed
59
use Bio::EnsEMBL::Intron;
60
use Bio::EnsEMBL::TranscriptMapper;
61
use Bio::EnsEMBL::Utils::TranscriptSNPs;
62
use Bio::EnsEMBL::SeqEdit;
63

64 65 66
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
use Bio::EnsEMBL::Utils::Exception qw( deprecate warning throw );

67
use vars qw(@ISA);
68
@ISA = qw(Bio::EnsEMBL::Feature);
Ewan Birney's avatar
Ewan Birney committed
69

Graham McVicker's avatar
Graham McVicker committed
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88

=head2 new

  Arg [-EXONS] :
        reference to list of Bio::EnsEMBL::Exon objects - exons which make up 
        this transcript
  Arg [-STABLE_ID] :
        string - the stable identifier of this transcript
  Arg [-VERSION] :
        int - the version of the stable identifier of this transcript
  Arg [-EXTERNAL_NAME] :
        string - the external database name associated with this transcript
  Arg [-EXTERNAL_DB] :
        string - the name of the database the external name is from
  Arg [-EXTERNAL_STATUS]:
        string - the status of the external identifier
  Arg [-DISPLAY_XREF]:
        Bio::EnsEMBL::DBEntry - The external database entry that is used
        to label this transcript when it is displayed.
89 90 91 92 93 94 95 96
  Arg [-CREATED_DATE]:
        string - the date the transcript was created
  Arg [-MODIFIED_DATE]:
        string - the date the transcript was last modified
  Arg [-DESCRIPTION]:
        string - the transcipts description
  Arg [-BIOTYPE]: 
        string - the biotype e.g. "protein_coding"
97 98
  Arg [-STATUS]:
        string - the transcripts status i.e. "KNOWN","NOVEL"
99 100
  Arg [-IS_CURRENT]:
        Boolean - specifies if this is the current version of the transcript
Graham McVicker's avatar
Graham McVicker committed
101 102 103 104 105
  Example    : $tran = new Bio::EnsEMBL::Transcript(-EXONS => \@exons);
  Description: Constructor. Instantiates a Transcript object.
  Returntype : Bio::EnsEMBL::Transcript
  Exceptions : throw on bad arguments
  Caller     : general
106
  Status     : Stable
Graham McVicker's avatar
Graham McVicker committed
107 108 109

=cut

110
sub new {
111
  my $proto = shift;
112

113
  my $class = ref($proto) || $proto;
114

115
  my $self = $class->SUPER::new(@_);
Ewan Birney's avatar
Ewan Birney committed
116

117 118 119 120 121 122 123
  my (
    $exons,            $stable_id,    $version,
    $external_name,    $external_db,  $external_status,
    $display_xref,     $created_date, $modified_date,
    $description,      $biotype,      $confidence,
    $external_db_name, $status,       $is_current
  );
124

125 126
  # Catch for old style constructor calling:
  if ( ( @_ > 0 ) && ref( $_[0] ) ) {
127
    $exons = [@_];
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
    deprecate( "Transcript constructor should use named arguments.\n"
        . "Use Bio::EnsEMBL::Transcript->new(-EXONS => \@exons);\n"
        . "instead of Bio::EnsEMBL::Transcript->new(\@exons);" );
  } else {
    (
      $exons,            $stable_id,    $version,
      $external_name,    $external_db,  $external_status,
      $display_xref,     $created_date, $modified_date,
      $description,      $biotype,      $confidence,
      $external_db_name, $status,       $is_current
      )
      = rearrange( [
        'EXONS',            'STABLE_ID',
        'VERSION',          'EXTERNAL_NAME',
        'EXTERNAL_DB',      'EXTERNAL_STATUS',
        'DISPLAY_XREF',     'CREATED_DATE',
        'MODIFIED_DATE',    'DESCRIPTION',
        'BIOTYPE',          'CONFIDENCE',
        'EXTERNAL_DB_NAME', 'STATUS',
        'IS_CURRENT'
      ],
      @_
      );
151
  }
152 153

  if ($exons) {
154 155
    $self->{'_trans_exon_array'} = $exons;
    $self->recalculate_coordinates();
156
  }
157

158 159 160
  $self->stable_id($stable_id);
  $self->version($version);
  $self->{'created_date'}  = $created_date;
161
  $self->{'modified_date'} = $modified_date;
162 163 164 165 166
  $self->external_name($external_name) if ( defined $external_name );
  $self->external_db($external_db)     if ( defined $external_db );
  $self->external_status($external_status)
    if ( defined $external_status );
  $self->display_xref($display_xref) if ( defined $display_xref );
167
  $self->edits_enabled(1);
168

169 170 171 172
  $self->description($description);
  $self->status($confidence);    # old style name
  $self->status($status);        # new style name
  $self->biotype($biotype);
173 174

  # default is_current
175
  $is_current = 1 unless ( defined($is_current) );
176
  $self->{'is_current'} = $is_current;
177

178
  return $self;
179
} ## end sub new
180

Graham McVicker's avatar
Graham McVicker committed
181
=head2 get_all_DBLinks
182

183
  Example    : my @dblinks = @{ $transcript->get_all_DBLinks };
184 185 186 187 188 189 190
  Description: Retrieves _all_ related DBEntries for this transcript.  
               This includes all DBEntries that are associated with the
               corresponding translation.

               If you only want to retrieve the DBEntries associated with the
               transcript then you should use the get_all_DBEntries call 
               instead.
191
  Returntype : Listref of Bio::EnsEMBL::DBEntry objects, sorted by
192
               priority (desc), external db name (asc), display_id (asc)
193 194
  Exceptions : none
  Caller     : general
195
  Status     : Stable
196 197 198

=cut

Graham McVicker's avatar
Graham McVicker committed
199 200
sub get_all_DBLinks {
  my $self = shift;
201
  my $ex_db_exp = shift;
Ian Longden's avatar
Ian Longden committed
202
  my $ex_db_type = shift;
203

204 205
  my @links;

Ian Longden's avatar
Ian Longden committed
206
  push @links, @{$self->get_all_DBEntries($ex_db_exp, $ex_db_type)};
207 208

  my $transl = $self->translation();
Ian Longden's avatar
Ian Longden committed
209
  push @links, @{$transl->get_all_DBEntries($ex_db_exp, $ex_db_type)} if($transl);
210

211 212
  @links = sort {_compare_xrefs()} @links;

213
  return \@links;
214 215
}

Graham McVicker's avatar
Graham McVicker committed
216

217
=head2 get_all_DBEntries
218

219
  Example    : my @dbentries = @{ $gene->get_all_DBEntries };
220 221 222 223 224 225 226 227
  Description: Retrieves DBEntries (xrefs) for this transcript.  
               This does _not_ include the corresponding translations 
               DBEntries (see get_all_DBLinks).

               This method will attempt to lazy-load DBEntries from a
               database if an adaptor is available and no DBEntries are present
               on the transcript (i.e. they have not already been added or 
               loaded).
228
  Returntype : Listref of Bio::EnsEMBL::DBEntry objects
229 230
  Exceptions : none
  Caller     : get_all_DBLinks, TranscriptAdaptor::store
231
  Status     : Stable
232 233 234

=cut

235 236
sub get_all_DBEntries {
  my $self = shift;
237
  my $ex_db_exp = shift;
Ian Longden's avatar
Ian Longden committed
238
  my $ex_db_type = shift;
239

240 241 242 243 244
  my $cache_name = "dbentries";

  if(defined($ex_db_exp)){
    $cache_name .= $ex_db_exp;
  }
Ian Longden's avatar
Ian Longden committed
245 246
  if(defined($ex_db_type)){
    $cache_name .= $ex_db_type;
247
  }
248
  # if not cached, retrieve all of the xrefs for this gene
249 250
  if(!defined $self->{$cache_name} && $self->adaptor()) {
    $self->{$cache_name} = 
Ian Longden's avatar
Ian Longden committed
251
      $self->adaptor->db->get_DBEntryAdaptor->fetch_all_by_Transcript($self, $ex_db_exp, $ex_db_type);
252
  }
253

254
  $self->{$cache_name} ||= [];
255

256
  return $self->{$cache_name};
257 258 259 260
}


=head2 add_DBEntry
261

262 263
  Arg [1]    : Bio::EnsEMBL::DBEntry $dbe
               The dbEntry to be added
264 265 266 267 268
  Example    : my $dbe = Bio::EnsEMBL::DBEntery->new(...);
               $transcript->add_DBEntry($dbe);
  Description: Associates a DBEntry with this transcript. Note that adding
               DBEntries will prevent future lazy-loading of DBEntries for this
               gene (see get_all_DBEntries).
269 270 271
  Returntype : none
  Exceptions : thrown on incorrect argument type
  Caller     : general
272
  Status     : Stable
273 274 275 276 277 278 279 280

=cut

sub add_DBEntry {
  my $self = shift;
  my $dbe = shift;

  unless($dbe && ref($dbe) && $dbe->isa('Bio::EnsEMBL::DBEntry')) {
281
    throw('Expected DBEntry argument');
282 283 284 285
  }

  $self->{'dbentries'} ||= [];
  push @{$self->{'dbentries'}}, $dbe;
286 287
}

288

289 290
=head2 get_all_supporting_features

291
  Example    : my @evidence = @{ $transcript->get_all_supporting_features };
292 293
  Description: Retreives any supporting features added manually by 
               calls to add_supporting_features.
294
  Returntype : Listref of Bio::EnsEMBL::FeaturePair objects
295 296
  Exceptions : none
  Caller     : general
297
  Status     : Stable
298 299 300 301 302

=cut

sub get_all_supporting_features {
  my $self = shift;
303 304 305 306 307 308 309

  if( !exists  $self->{_supporting_evidence} )  {
    if($self->adaptor) {
      my $tsfa = $self->adaptor->db->get_TranscriptSupportingFeatureAdaptor();
      $self->{_supporting_evidence} = $tsfa->fetch_all_by_Transcript($self);
    }
  }
310 311 312 313 314 315 316
  
  return $self->{_supporting_evidence} || [];
}


=head2 add_supporting_features

317 318 319
  Arg [1-N]  : Bio::EnsEMBL::FeaturePair $feature
               The supporting features to add
  Example    : $transcript->add_supporting_features(@features);
320
  Description: Adds a list of supporting features to this Transcript.
321 322
               The added features can be retieved by
               get_all_supporting_features().
323 324 325 326 327
  Returntype : none
  Exceptions : throw if any of the features are not FeaturePairs
               throw if any of the features are not in the same coordinate
               system as the Transcript
  Caller     : general
328
  Status     : Stable
329 330 331 332
 
=cut
 
sub add_supporting_features {
333
  my ($self, @features) = @_;
334 335 336 337 338 339 340 341

  return unless @features;
 
  $self->{_supporting_evidence} ||= [];
  
  # check whether this feature object has been added already
  FEATURE: foreach my $feature (@features) {

342 343 344 345 346
    if (!defined($feature) || ref($feature) eq "ARRAY") {
      throw("Element in transcript supporting features array is undefined or is an ARRAY for " . $self->dbID);
    }
    if (!$feature || !$feature->isa("Bio::EnsEMBL::FeaturePair")) {
      print "feature = " . $feature . "\n";
347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366
      throw("Supporting feat [$feature] not a " .
            "Bio::EnsEMBL::FeaturePair");
    } 
    
    if ((defined $self->slice() && defined $feature->slice())&&
	    ( $self->slice()->name() ne $feature->slice()->name())){
      throw("Supporting feat not in same coord system as exon\n" .
            "exon is attached to [".$self->slice()->name()."]\n" .
            "feat is attached to [".$feature->slice()->name()."]");
    }

    foreach my $added_feature ( @{ $self->{_supporting_evidence} } ){
      # compare objects
      if ( $feature == $added_feature ){
	#this feature has already been added
	next FEATURE;
      }
    }
    
    #no duplicate was found, add the feature
367
    push(@{$self->{_supporting_evidence}}, $feature);
368 369 370 371
  }
}


372 373 374 375 376 377 378
=head2 flush_supporting_features

  Example     : $transcript->flush_supporting_features;
  Description : Removes all supporting evidence from the transcript.
  Return type : (Empty) listref
  Exceptions  : none
  Caller      : general
379
  Status      : Stable
380 381 382 383 384 385 386 387 388

=cut

sub flush_supporting_features {
  my $self = shift;
  $self->{'_supporting_evidence'} = [];
}


389 390
=head2 external_db

391 392 393 394 395 396 397 398
  Arg [1]    : (optional) String - name of external db to set
  Example    : $transcript->external_db('HGNC');
  Description: Getter/setter for attribute external_db. The db is the one that 
               belongs to the external_name.  
  Returntype : String
  Exceptions : none
  Caller     : general
  Status     : Stable
399 400 401 402

=cut

sub external_db {
403
  my ( $self, $ext_dbname ) = @_;
404

405
  if(defined $ext_dbname) { 
406
    return ( $self->{'external_db'} = $ext_dbname );
407
  }
408

409 410
  if( exists $self->{'external_db'} ) {
    return $self->{'external_db'};
411
  }
412

413
  my $display_xref = $self->display_xref();
414

415 416 417 418 419
  if( defined $display_xref ) {
    return $display_xref->dbname()
  } else {
    return undef;
  }
420 421
}

422

423 424
=head2 external_status

425 426 427 428 429 430 431 432
  Arg [1]    : (optional) String - status of the external db
  Example    : $transcript->external_status('KNOWNXREF');
  Description: Getter/setter for attribute external_status. The status of
               the external db of the one that belongs to the external_name.
  Returntype : String
  Exceptions : none
  Caller     : general
  Status     : Stable
433 434 435 436 437 438 439

=cut

sub external_status { 
  my ( $self, $ext_status ) = @_;

  if(defined $ext_status) {
440
    return ( $self->{'external_status'} = $ext_status );
441 442
  }

443 444
  if( exists $self->{'external_status'} ) {
    return $self->{'external_status'};
445 446 447 448 449 450 451 452 453 454 455 456
  }

  my $display_xref = $self->display_xref();

  if( defined $display_xref ) {
    return $display_xref->status()
  } else {
    return undef;
  }
}


457 458
=head2 external_name

459 460 461 462 463 464 465
  Arg [1]    : (optional) String - the external name to set
  Example    : $transcript->external_name('BRCA2-001');
  Description: Getter/setter for attribute external_name.
  Returntype : String or undef
  Exceptions : none
  Caller     : general
  Status     : Stable
466 467 468 469

=cut

sub external_name {
470
  my ($self, $ext_name) = @_;
471

472
  if(defined $ext_name) { 
473
    return ( $self->{'external_name'} = $ext_name );
474
  }
475

476 477
  if( exists $self->{'external_name'} ) {
    return $self->{'external_name'};
478
  }
479

480
  my $display_xref = $self->display_xref();
481

482 483 484 485 486
  if( defined $display_xref ) {
    return $display_xref->display_id()
  } else {
    return undef;
  }
487 488 489
}


490 491
=head2 is_known

492 493 494 495
  Example    : print "Transcript ".$transcript->stable_id." is KNOWN\n" if
                  $transcript->is_known;
  Description: Returns TRUE if this gene has a status of 'KNOWN'
  Returntype : TRUE if known, FALSE otherwise
496 497
  Exceptions : none
  Caller     : general
498
  Status     : Stable
499 500 501

=cut

502 503
sub is_known {
  my $self = shift;
504
  return ( $self->{'status'} eq "KNOWN" );
505 506 507
}


508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525
=head2 status

  Arg [1]    : string $status
  Example    : none
  Description: get/set for attribute status
  Returntype : string
  Exceptions : none
  Caller     : general
  Status     : Medium Risk

=cut

sub status {
   my $self = shift;
  $self->{'status'} = shift if( @_ );
  return $self->{'status'};
}

526 527 528 529 530 531 532 533
=head2 biotype

  Arg [1]    : string $biotype
  Example    : none
  Description: get/set for attribute biotype
  Returntype : string
  Exceptions : none
  Caller     : general
534
  Status     : Stable
535 536 537 538 539 540 541

=cut

sub biotype {
   my $self = shift;
  $self->{'biotype'} = shift if( @_ );
  return ( $self->{'biotype'} || "protein_coding" );
542 543
}

544

Alistair Rust's avatar
Alistair Rust committed
545
=head2 display_xref
546

547 548 549
  Arg [1]    : (optional) Bio::EnsEMBL::DBEntry - the display xref to set
  Example    : $transcript->display_xref($db_entry);
  Description: Getter/setter for display_xref for this transcript.
550
  Returntype : Bio::EnsEMBL::DBEntry
551 552
  Exceptions : none
  Caller     : general
553
  Status     : Stable
554 555 556

=cut

557
sub display_xref {
558 559 560
  my $self = shift;
  $self->{'display_xref'} = shift if(@_);
  return $self->{'display_xref'};
Ewan Birney's avatar
Ewan Birney committed
561 562 563 564 565
}


=head2 translation

566 567 568
  Args       : None
  Example    : if ( $transcript->translation() ) {
                 print( $transcript->translation()->stable_id(), "\n" );
569
               } else {
570
                 print("Pseudogene\n");
571
               }
572 573 574 575 576 577 578
  Description: Getter/setter for the Translation object which
               defines the CDS (and as a result the peptide encoded
               by) this transcript.  This function will return
               undef if this transcript is a pseudogene, i.e. a
               non-translating transcript such as an ncRNA.  This
               is the accepted method of determining whether a
               transcript is a pseudogene or not.
579
  Returntype : Bio::EnsEMBL::Translation
580 581
  Exceptions : none
  Caller     : general
582
  Status     : Stable
Ewan Birney's avatar
Ewan Birney committed
583 584 585

=cut

Michele Clamp's avatar
Michele Clamp committed
586
sub translation {
587 588 589 590
  my ( $self, $value ) = @_;

  if ( defined($value) ) {
    if ( !ref($value) || !$value->isa('Bio::EnsEMBL::Translation') ) {
591
      throw("Bio::EnsEMBL::Translation argument expected.");
Ewan Birney's avatar
Ewan Birney committed
592
    }
593

594
    $self->{'translation'} = $value;
595 596 597 598

  } elsif ( !exists( $self->{'translation'} )
    && defined( $self->adaptor() ) )
  {
599
    $self->{'translation'} =
600 601
      $self->adaptor()->db()->get_TranslationAdaptor()
      ->fetch_by_Transcript($self);
602
  }
603

604
  return $self->{'translation'};
Ewan Birney's avatar
Ewan Birney committed
605 606
}

607 608 609 610 611 612 613 614 615 616 617 618
=head2 get_all_alternative_translations

  Args       : None
  Example    :

    my @alt_translations =
      @{ $transcript->get_all_alternative_translations() };

  Description:  Fetches all alternative translations defined for this
                transcript.  The canonical translation is not returned.

  Returntype : Array reference to Bio::EnsEMBL::Translation
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
619
  Exceptions : None
620 621 622 623 624 625 626 627
  Caller     : General
  Status     : Stable

=cut

sub get_all_alternative_translations {
  my ($self) = @_;

628 629 630
  if (   !exists( $self->{'alternative_translations'} )
       && defined( $self->adaptor() ) )
  {
631 632 633 634

    my $pa           = $self->adaptor()->db()->get_TranslationAdaptor();
    my @translations = @{ $pa->fetch_all_by_Transcript($self) };

635 636 637
    # The first in the list of translations is the canonical one,
    # shift it off.
    shift(@translations);
638

639
    $self->{'alternative_translations'} = \@translations;
640 641 642
  }

  return $self->{'alternative_translations'};
643
}
644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675

=head2 add_alternative_translation

  Args       : Bio::EnsEMBL::Translation $translation
  Example    :

    $transcript->add_alternative_translation($translation);

  Description: Adds an alternative translation to this transcript.
  Returntype : None
  Exceptions : None
  Caller     : General
  Status     : Stable

=cut

sub add_alternative_translation {
  my ( $self, $translation ) = @_;

  if ( !(    defined($translation)
          && ref($translation)
          && $translation->isa('Bio::EnsEMBL::Translation') ) )
  {
    throw("Bio::EnsEMBL::Translation argument expected.");
  }

  # Load the existsing alternative translations from the database if
  # they haven't already been loaded.
  $self->get_all_alternative_translations();

  push( @{ $self->{'alternative_translations'} }, $translation );
}
Arne Stabenau's avatar
Arne Stabenau committed
676

677 678 679 680
=head2 spliced_seq

  Args       : none
  Example    : none
681 682
  Description: Retrieves all Exon sequences and concats them together.
               No phase padding magic is done, even if phases do not align.
683 684 685
  Returntype : txt
  Exceptions : none
  Caller     : general
686
  Status     : Stable
687 688 689 690 691

=cut

sub spliced_seq {
  my ( $self ) = @_;
692

693 694
  my $seq_string = "";
  for my $ex ( @{$self->get_all_Exons()} ) {
695 696 697 698 699 700 701 702 703
    my $seq = $ex->seq();

    if(!$seq) {
      warning("Could not obtain seq for exon.  Transcript sequence may not " .
              "be correct.");
      $seq_string .= 'N' x $ex->length();
    } else {
      $seq_string .= $seq->seq();
    }
704 705
  }

706 707 708
  # apply post transcriptional edits
  if($self->edits_enabled()) {
    my @seqeds = @{$self->get_all_SeqEdits()};
709

710 711 712
    # sort edits in reverse order to remove complication of
    # adjusting downstream edits
    @seqeds = sort {$b->start() <=> $a->start()} @seqeds;
713

714 715
    foreach my $se (@seqeds) {
      $se->apply_edit(\$seq_string);
716
    }
717
  }
718

719
  return $seq_string;
720 721 722 723 724 725 726
}


=head2 translateable_seq

  Args       : none
  Example    : print $transcript->translateable_seq(), "\n";
727
  Description: Returns a sequence string which is the the translateable part
728
               of the transcripts sequence.  This is formed by splicing all
729 730 731 732 733
               Exon sequences together and apply all defined RNA edits.
               Then the coding part of the sequence is extracted and returned.
               The code will not support monkey exons any more. If you want to
               have non phase matching exons, defined appropriate _rna_edit
               attributes!
734 735 736

               An empty string is returned if this transcript is a pseudogene
               (i.e. is non-translateable).
737 738 739
  Returntype : txt
  Exceptions : none
  Caller     : general
740
  Status     : Stable
741 742 743 744 745 746

=cut

sub translateable_seq {
  my ( $self ) = @_;

747 748 749 750
  if(!$self->translation()) {
    return '';
  }

751
  my $mrna = $self->spliced_seq();
752

753 754
  my $start = $self->cdna_coding_start();
  my $end = $self->cdna_coding_end();
755

756 757
  $mrna = substr( $mrna, $start-1, $end-$start+1 );

758 759 760 761
  my $start_phase = $self->translation->start_Exon->phase();
  if( $start_phase > 0 ) {
    $mrna = "N"x$start_phase . $mrna;
  }
762 763
  if( ! $start || ! $end ) {
    return "";
764
  }
765

766
  return $mrna;
767 768
}

Arne Stabenau's avatar
Arne Stabenau committed
769

770 771 772 773 774 775 776
=head2 cdna_coding_start

  Arg [1]    : (optional) $value
  Example    : $relative_coding_start = $transcript->cdna_coding_start;
  Description: Retrieves the position of the coding start of this transcript
               in cdna coordinates (relative to the start of the 5prime end of
               the transcript, excluding introns, including utrs).
777 778 779

               This will return undef if this is a pseudogene (i.e. a
               transcript with no translation).
780 781 782
  Returntype : int
  Exceptions : none
  Caller     : five_prime_utr, get_all_snps, general
783
  Status     : Stable
784 785 786 787

=cut

sub cdna_coding_start {
788 789 790 791
  my $self = shift;

  if( @_ ) {
    $self->{'cdna_coding_start'} = shift;
792
  }
793

794
  if(!defined $self->{'cdna_coding_start'} && defined $self->translation){
795
    # calc coding start relative from the start of translation (in cdna coords)
796 797 798
    my $start = 0;

    my @exons = @{$self->get_all_Exons};
799 800
    my $exon;

801
    while($exon = shift @exons) {
802
      if($exon == $self->translation->start_Exon) {
803 804 805
        #add the utr portion of the start exon
        $start += $self->translation->start;
        last;
806
      } else {
807 808
        #add the entire length of this non-coding exon
        $start += $exon->length;
809 810
      }
    }
811 812 813 814 815 816 817 818 819 820 821 822 823 824 825

    # adjust cdna coords if sequence edits are enabled
    if($self->edits_enabled()) {
      my @seqeds = @{$self->get_all_SeqEdits()};
      # sort in reverse order to avoid adjustment of downstream edits
      @seqeds = sort {$b->start() <=> $a->start()} @seqeds;

      foreach my $se (@seqeds) {
        # use less than start so that start of CDS can be extended
        if($se->start() < $start) {
          $start += $se->length_diff();
        }
      }
    }

Graham McVicker's avatar
Graham McVicker committed
826
    $self->{'cdna_coding_start'} = $start;
827 828 829 830 831 832 833 834 835
  }

  return $self->{'cdna_coding_start'};
}


=head2 cdna_coding_end

  Arg [1]    : (optional) $value
836
  Example    : $cdna_coding_end = $transcript->cdna_coding_end;
837 838
  Description: Retrieves the end of the coding region of this transcript in
               cdna coordinates (relative to the five prime end of the
Graham McVicker's avatar
Graham McVicker committed
839
               transcript, excluding introns, including utrs).
840 841 842 843

               This will return undef if this transcript is a pseudogene
               (i.e. a transcript with no translation and therefor no CDS).
  Returntype : int
844 845
  Exceptions : none
  Caller     : general
846
  Status     : Stable
847 848 849 850

=cut

sub cdna_coding_end {
851
  my $self = shift;
852

853 854
  if( @_ ) {
    $self->{'cdna_coding_end'} = shift;
855 856
  }

857
  if(!defined $self->{'cdna_coding_end'} && defined $self->translation) {
858 859 860
    my @exons = @{$self->get_all_Exons};

    my $end = 0;
Graham McVicker's avatar
Graham McVicker committed
861
    while(my $exon = shift @exons) {
862
      if($exon == $self->translation->end_Exon) {
863 864 865
        # add coding portion of the final coding exon
        $end += $self->translation->end;
        last;
866
      } else {
867 868
        # add entire exon
        $end += $exon->length;
869 870
      }
    }
871 872 873 874 875 876 877 878 879 880 881 882 883 884 885

    # adjust cdna coords if sequence edits are enabled
    if($self->edits_enabled()) {
      my @seqeds = @{$self->get_all_SeqEdits()};
      # sort in reverse order to avoid adjustment of downstream edits
      @seqeds = sort {$b->start() <=> $a->start()} @seqeds;

      foreach my $se (@seqeds) {
        # use less than or equal to end+1 so end of the CDS can be extended
        if($se->start() <= $end + 1) {
          $end += $se->length_diff();
        }
      }
    }

886 887 888 889 890 891 892
    $self->{'cdna_coding_end'} = $end;
  }

  return $self->{'cdna_coding_end'};
}


893
=head2 coding_region_start
894 895

  Arg [1]    : (optional) $value
896
  Example    : $coding_region_start = $transcript->coding_region_start
897 898
  Description: Retrieves the start of the coding region of this transcript
               in genomic coordinates (i.e. in either slice or contig coords).
899 900 901 902 903
               By convention, the coding_region_start is always lower than
               the value returned by the coding_end method.
               The value returned by this function is NOT the biological
               coding start since on the reverse strand the biological coding
               start would be the higher genomic value.
904 905 906

               This function will return undef if this is a pseudogene
               (a non-translated transcript).
Graham McVicker's avatar
Graham McVicker committed
907
  Returntype : int
908 909
  Exceptions : none
  Caller     : general
910
  Status     : Stable
911 912 913

=cut

914
sub coding_region_start {
915
  my ($self, $value) = @_;
916

917
  if( defined $value ) {
918
    $self->{'coding_region_start'} = $value;
919
  } elsif(!defined $self->{'coding_region_start'} &&
920
	  defined $self->translation) {
921 922 923
    #calculate the coding start from the translation
    my $start;
    my $strand = $self->translation()->start_Exon->strand();
924
    if( $strand == 1 ) {
Graham McVicker's avatar
Graham McVicker committed
925
      $start = $self->translation()->start_Exon->start();
926 927
      $start += ( $self->translation()->start() - 1 );
    } else {
Graham McVicker's avatar
Graham McVicker committed
928
      $start = $self->translation()->end_Exon->end();
929 930
      $start -= ( $self->translation()->end() - 1 );
    }
931
    $self->{'coding_region_start'} = $start;
932
  }
933

934
  return $self->{'coding_region_start'};
935 936
}

937

938
=head2 coding_region_end
939

Graham McVicker's avatar
Graham McVicker committed
940
  Arg [1]    : (optional) $value
941
  Example    : $coding_region_end = $transcript->coding_region_end
942
  Description: Retrieves the end of the coding region of this transcript
Graham McVicker's avatar
Graham McVicker committed
943
               in genomic coordinates (i.e. in either slice or contig coords).
944 945 946 947
               By convention, the coding_region_end is always higher than the
               value returned by the coding_region_start method.
               The value returned by this function is NOT the biological
               coding end since on the reverse strand the biological coding
948
               end would be the lower genomic value.
949 950 951