Transcript.pm 90.6 KB
Newer Older
1 2
=head1 LICENSE

Magali Ruffier's avatar
Magali Ruffier committed
3
Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Tiago Grego's avatar
Tiago Grego committed
4
Copyright [2016-2019] EMBL-European Bioinformatics Institute
5

6 7 8 9 10 11 12 13 14 15 16 17 18
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

=cut
19 20 21 22 23


=head1 CONTACT

  Please email comments or questions to the public Ensembl
Magali Ruffier's avatar
Magali Ruffier committed
24
  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 26

  Questions may also be sent to the Ensembl help desk at
Magali Ruffier's avatar
Magali Ruffier committed
27
  <http://www.ensembl.org/Help/Contact>.
28 29

=cut
Ewan Birney's avatar
Ewan Birney committed
30 31 32

=head1 NAME

33
Bio::EnsEMBL::Transcript - object representing an Ensembl transcript
Ewan Birney's avatar
Ewan Birney committed
34 35 36

=head1 SYNOPSIS

37
Creation:
38

39 40
  my $tran = new Bio::EnsEMBL::Transcript();
  my $tran = new Bio::EnsEMBL::Transcript( -EXONS => \@exons );
41 42 43

Manipulation:

44 45
  # Returns an array of Exon objects
  my @exons = @{ $tran->get_all_Exons() };
46

47 48 49 50 51 52
  # Returns the peptide translation of the exons as a Bio::Seq
  if ( $tran->translation() ) {
    my $pep = $tran->translate();
  } else {
    print "Transcript ", $tran->stable_id(), " is non-coding\n";
  }
53

54 55
=head1 DESCRIPTION

56
A representation of a transcript within the Ensembl system.  A transcript
57 58
consists of a set of Exons and (possibly) a Translation which defines the
coding and non-coding regions of the exons.
Ewan Birney's avatar
Ewan Birney committed
59

60
=cut
61

62 63
package Bio::EnsEMBL::Transcript;

Ewan Birney's avatar
Ewan Birney committed
64 65
use strict;

66
use Bio::EnsEMBL::Feature;
67
use Bio::EnsEMBL::UTR;
Ian Longden's avatar
Ian Longden committed
68
use Bio::EnsEMBL::Intron;
69 70
use Bio::EnsEMBL::ExonTranscript;
use Bio::EnsEMBL::CDS;
71
use Bio::EnsEMBL::TranscriptMapper;
72
use Bio::EnsEMBL::SeqEdit;
73
use Bio::EnsEMBL::Biotype;
74
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
Magali Ruffier's avatar
Magali Ruffier committed
75
use Bio::EnsEMBL::Utils::Exception qw(warning throw );
76
use Bio::EnsEMBL::Utils::Scalar qw( assert_ref );
77

78
use parent qw(Bio::EnsEMBL::Feature);
Ewan Birney's avatar
Ewan Birney committed
79

80 81 82 83
use constant SEQUENCE_ONTOLOGY => {
  acc  => 'SO:0000673',
  term => 'transcript',
};
Graham McVicker's avatar
Graham McVicker committed
84 85 86 87

=head2 new

  Arg [-EXONS] :
Tiago Grego's avatar
Tiago Grego committed
88
        reference to list of Bio::EnsEMBL::Exon objects - exons which make up 
Graham McVicker's avatar
Graham McVicker committed
89 90 91 92 93 94 95 96 97 98 99 100 101 102
        this transcript
  Arg [-STABLE_ID] :
        string - the stable identifier of this transcript
  Arg [-VERSION] :
        int - the version of the stable identifier of this transcript
  Arg [-EXTERNAL_NAME] :
        string - the external database name associated with this transcript
  Arg [-EXTERNAL_DB] :
        string - the name of the database the external name is from
  Arg [-EXTERNAL_STATUS]:
        string - the status of the external identifier
  Arg [-DISPLAY_XREF]:
        Bio::EnsEMBL::DBEntry - The external database entry that is used
        to label this transcript when it is displayed.
103 104 105 106 107
  Arg [-CREATED_DATE]:
        string - the date the transcript was created
  Arg [-MODIFIED_DATE]:
        string - the date the transcript was last modified
  Arg [-DESCRIPTION]:
108
        string - the transcripts description
Tiago Grego's avatar
Tiago Grego committed
109
  Arg [-BIOTYPE]: 
110
        string - the biotype e.g. "protein_coding"
111 112
  Arg [-IS_CURRENT]:
        Boolean - specifies if this is the current version of the transcript
113 114 115
  Arg [-SOURCE]:
        string - the transcript source, e.g. "ensembl"

Graham McVicker's avatar
Graham McVicker committed
116 117 118 119 120
  Example    : $tran = new Bio::EnsEMBL::Transcript(-EXONS => \@exons);
  Description: Constructor. Instantiates a Transcript object.
  Returntype : Bio::EnsEMBL::Transcript
  Exceptions : throw on bad arguments
  Caller     : general
121
  Status     : Stable
Graham McVicker's avatar
Graham McVicker committed
122 123 124

=cut

125
sub new {
126
  my $proto = shift;
127

128
  my $class = ref($proto) || $proto;
129

130
  my $self = $class->SUPER::new(@_);
Ewan Birney's avatar
Ewan Birney committed
131

132 133 134 135 136
  my (
    $exons,            $stable_id,    $version,
    $external_name,    $external_db,  $external_status,
    $display_xref,     $created_date, $modified_date,
    $description,      $biotype,      $confidence,
137
    $external_db_name, $is_current,
138
    $source
139
  );
140

141 142 143 144 145
    (
      $exons,            $stable_id,    $version,
      $external_name,    $external_db,  $external_status,
      $display_xref,     $created_date, $modified_date,
      $description,      $biotype,      $confidence,
146
      $external_db_name, $is_current,
147
      $source
148 149 150 151 152 153 154 155
      )
      = rearrange( [
        'EXONS',            'STABLE_ID',
        'VERSION',          'EXTERNAL_NAME',
        'EXTERNAL_DB',      'EXTERNAL_STATUS',
        'DISPLAY_XREF',     'CREATED_DATE',
        'MODIFIED_DATE',    'DESCRIPTION',
        'BIOTYPE',          'CONFIDENCE',
156
        'EXTERNAL_DB_NAME',
157
        'IS_CURRENT',       'SOURCE'
158 159 160 161 162
      ],
      @_
      );

  if ($exons) {
163 164
    $self->{'_trans_exon_array'} = $exons;
    $self->recalculate_coordinates();
165
  }
166

167 168
  $self->stable_id($stable_id);
  $self->{'created_date'}  = $created_date;
169
  $self->{'modified_date'} = $modified_date;
170 171 172 173 174
  $self->external_name($external_name) if ( defined $external_name );
  $self->external_db($external_db)     if ( defined $external_db );
  $self->external_status($external_status)
    if ( defined $external_status );
  $self->display_xref($display_xref) if ( defined $display_xref );
175
  $self->edits_enabled(1);
176

177
  $self->description($description);
178

179
  $self->{'biotype'} = $biotype;
180

181
  $self->source($source);
182

183 184 185 186
  # Default version
  if ( !defined($version) ) { $version = 1 }
  $self->{'version'} = $version;

187
  # default is_current
188
  $is_current = 1 unless ( defined($is_current) );
189
  $self->{'is_current'} = $is_current;
190

191
  return $self;
192
} ## end sub new
193

Graham McVicker's avatar
Graham McVicker committed
194
=head2 get_all_DBLinks
195

196 197 198 199
  Arg [1]    : String database name (optional)
               SQL wildcard characters (_ and %) can be used to
               specify patterns.

200 201 202 203 204
  Arg [2]    : (optional) String, external database type, can be one of
               ('ARRAY','ALT_TRANS','ALT_GENE','MISC','LIT','PRIMARY_DB_SYNONYM','ENSEMBL'),
               SQL wildcard characters (_ and %) can be used to
               specify patterns.

205
  Example    : my @dblinks = @{ $transcript->get_all_DBLinks() };
206 207
               @dblinks = @{ $transcript->get_all_DBLinks('Uniprot%') };}
               @dblinks = @{ $transcript->get_all_DBLinks('%', 'ENSEMBL') };
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223

  Description: Retrieves *all* related DBEntries for this
               transcript.  This includes all DBEntries that are
               associated with the corresponding translation.

               If you only want to retrieve the DBEntries associated
               with the transcript (and not the translation) then
               you should use the get_all_DBEntries() call instead.

               Note: Each entry may be listed more than once.  No
               uniqueness checks are done.  Also if you put in an
               incorrect external database name no checks are done
               to see if this exists, you will just get an empty
               list.

  Return type: Listref of Bio::EnsEMBL::DBEntry objects
224 225
  Exceptions : none
  Caller     : general
226
  Status     : Stable
227 228 229

=cut

Graham McVicker's avatar
Graham McVicker committed
230
sub get_all_DBLinks {
231
  my ( $self, $db_name_exp, $ex_db_type ) = @_;
232

233 234
  my @links =
    @{ $self->get_all_DBEntries( $db_name_exp, $ex_db_type ) };
235

236
  # Add all of the transcript and translation xrefs to the return list.
237 238 239
  my $translation = $self->translation();
  if ( defined($translation) ) {
    push( @links,
240
          @{$translation->get_all_DBEntries( $db_name_exp, $ex_db_type ) }
241 242
    );
  }
243

244
  @links = sort { _compare_xrefs() } @links;
245

246
  return \@links;
247 248
}

249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
=head2 get_all_xrefs

  Arg [1]    : String database name (optional)
               SQL wildcard characters (_ and %) can be used to
               specify patterns.

  Example    : @xrefs = @{ $transcript->get_all_xrefs() };
               @xrefs = @{ $transcript->get_all_xrefs('Uniprot%') };

  Description: Retrieves *all* related xrefs for this transcript.
               This includes all xrefs that are associated with the
               corresponding translation of this transcript.

               If you want to retrieve the xrefs associated with
               only the transcript (and not the translation) then
               you should use the get_all_object_xrefs() method
               instead.

               Note: Each entry may be listed more than once.  No
               uniqueness checks are done.  Also if you put in an
               incorrect external database name no checks are done
               to see if this exists, you will just get an empty
               list.

                NB: This method is an alias for the
                    get_all_DBLinks() method.

  Return type: Listref of Bio::EnsEMBL::DBEntry objects

  Status     : Stable

=cut

282 283 284 285
sub get_all_xrefs {
  my $self = shift;
  return $self->get_all_DBLinks(@_);
}
Graham McVicker's avatar
Graham McVicker committed
286

287
=head2 get_all_DBEntries
288

289 290 291
  Arg [1]    : (optional) String, external database name,
               SQL wildcard characters (_ and %) can be used to
               specify patterns.
292

293 294 295 296
  Arg [2]    : (optional) String, external database type, can be one of
               ('ARRAY','ALT_TRANS','ALT_GENE','MISC','LIT','PRIMARY_DB_SYNONYM','ENSEMBL'),
               SQL wildcard characters (_ and %) can be used to
               specify patterns.
297

298
  Example    : my @dbentries = @{ $transcript->get_all_DBEntries() };
299 300
               @dbentries = @{ $transcript->get_all_DBEntries('Uniprot%') };}
               @dbentries = @{ $transcript->get_all_DBEntries('%', 'ENSEMBL') };}
301 302 303 304 305 306 307 308 309 310

  Description: Retrieves DBEntries (xrefs) for this transcript.
               This does *not* include the corresponding
               translations DBEntries (see get_all_DBLinks()).

               This method will attempt to lazy-load DBEntries
               from a database if an adaptor is available and no
               DBEntries are present on the transcript (i.e. they
               have not already been added or loaded).

311
  Returntype : Listref of Bio::EnsEMBL::DBEntry objects
312 313
  Exceptions : none
  Caller     : get_all_DBLinks, TranscriptAdaptor::store
314
  Status     : Stable
315 316 317

=cut

318
sub get_all_DBEntries {
319
  my ( $self, $ex_db_exp, $ex_db_type ) = @_;
320

321
  my $cache_name = 'dbentries';
322

323
  if ( defined($ex_db_exp) ) {
324 325
    $cache_name .= $ex_db_exp;
  }
326

327
  if ( defined($ex_db_type) ) {
Ian Longden's avatar
Ian Longden committed
328
    $cache_name .= $ex_db_type;
329
  }
330

331 332 333
  # if not cached, retrieve all of the xrefs for this transcript
  if ( !defined( $self->{$cache_name} ) && defined( $self->adaptor() ) )
  {
334
    $self->{$cache_name} =
335 336
      $self->adaptor()->db()->get_DBEntryAdaptor()
      ->fetch_all_by_Transcript( $self, $ex_db_exp, $ex_db_type );
337
  }
338

339
  $self->{$cache_name} ||= [];
340

341
  return $self->{$cache_name};
342
} ## end sub get_all_DBEntries
343

344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361
=head2 get_all_object_xrefs

  Arg [1]    : (optional) String, external database name

  Arg [2]    : (optional) String, external_db type

  Example    : @oxrefs = @{ $transcript->get_all_object_xrefs() };

  Description: Retrieves xrefs for this transcript.  This does
               *not* include xrefs that are associated with the
               corresponding translations of this transcript (see
               get_all_xrefs()).

               This method will attempt to lazy-load xrefs from a
               database if an adaptor is available and no xrefs are
               present on the transcript (i.e. they have not already
               been added or loaded).

362 363
                NB: This method is an alias for the
                    get_all_DBentries() method.
364 365 366 367 368 369 370

  Return type: Listref of Bio::EnsEMBL::DBEntry objects

  Status     : Stable

=cut

371 372 373 374
sub get_all_object_xrefs {
  my $self = shift;
  return $self->get_all_DBEntries(@_);
}
375 376

=head2 add_DBEntry
377

378 379
  Arg [1]    : Bio::EnsEMBL::DBEntry $dbe
               The dbEntry to be added
380 381 382 383 384
  Example    : my $dbe = Bio::EnsEMBL::DBEntery->new(...);
               $transcript->add_DBEntry($dbe);
  Description: Associates a DBEntry with this transcript. Note that adding
               DBEntries will prevent future lazy-loading of DBEntries for this
               gene (see get_all_DBEntries).
385 386 387
  Returntype : none
  Exceptions : thrown on incorrect argument type
  Caller     : general
388
  Status     : Stable
389 390 391 392 393 394 395 396

=cut

sub add_DBEntry {
  my $self = shift;
  my $dbe = shift;

  unless($dbe && ref($dbe) && $dbe->isa('Bio::EnsEMBL::DBEntry')) {
397
    throw('Expected DBEntry argument');
398 399 400 401
  }

  $self->{'dbentries'} ||= [];
  push @{$self->{'dbentries'}}, $dbe;
402 403
}

404

405 406
=head2 get_all_supporting_features

407
  Example    : my @evidence = @{ $transcript->get_all_supporting_features };
Tiago Grego's avatar
Tiago Grego committed
408
  Description: Retrieves any supporting features added manually by 
409
               calls to add_supporting_features.
410
  Returntype : Listref of Bio::EnsEMBL::FeaturePair objects
411 412
  Exceptions : none
  Caller     : general
413
  Status     : Stable
414 415 416 417 418

=cut

sub get_all_supporting_features {
  my $self = shift;
419 420 421 422 423 424 425

  if( !exists  $self->{_supporting_evidence} )  {
    if($self->adaptor) {
      my $tsfa = $self->adaptor->db->get_TranscriptSupportingFeatureAdaptor();
      $self->{_supporting_evidence} = $tsfa->fetch_all_by_Transcript($self);
    }
  }
Tiago Grego's avatar
Tiago Grego committed
426
  
427 428 429 430 431 432
  return $self->{_supporting_evidence} || [];
}


=head2 add_supporting_features

433 434 435
  Arg [1-N]  : Bio::EnsEMBL::FeaturePair $feature
               The supporting features to add
  Example    : $transcript->add_supporting_features(@features);
436
  Description: Adds a list of supporting features to this Transcript.
437 438
               The added features can be retieved by
               get_all_supporting_features().
439 440 441 442 443
  Returntype : none
  Exceptions : throw if any of the features are not FeaturePairs
               throw if any of the features are not in the same coordinate
               system as the Transcript
  Caller     : general
444
  Status     : Stable
Tiago Grego's avatar
Tiago Grego committed
445
 
446
=cut
Tiago Grego's avatar
Tiago Grego committed
447
 
448
sub add_supporting_features {
449
  my ($self, @features) = @_;
450 451

  return unless @features;
Tiago Grego's avatar
Tiago Grego committed
452
 
453
  $self->{_supporting_evidence} ||= [];
Tiago Grego's avatar
Tiago Grego committed
454
  
455 456 457
  # check whether this feature object has been added already
  FEATURE: foreach my $feature (@features) {

458 459 460 461 462
    if (!defined($feature) || ref($feature) eq "ARRAY") {
      throw("Element in transcript supporting features array is undefined or is an ARRAY for " . $self->dbID);
    }
    if (!$feature || !$feature->isa("Bio::EnsEMBL::FeaturePair")) {
      print "feature = " . $feature . "\n";
463 464
      throw("Supporting feat [$feature] not a " .
            "Bio::EnsEMBL::FeaturePair");
Tiago Grego's avatar
Tiago Grego committed
465 466
    } 
    
467
    if ((defined $self->slice() && defined $feature->slice())&&
468
      ( $self->slice()->name() ne $feature->slice()->name())){
469 470
      throw("Supporting feat not in same coord system as transcript\n" .
            "transcript is attached to [".$self->slice()->name()."]\n" .
471 472 473 474 475 476
            "feat is attached to [".$feature->slice()->name()."]");
    }

    foreach my $added_feature ( @{ $self->{_supporting_evidence} } ){
      # compare objects
      if ( $feature == $added_feature ){
477 478
  #this feature has already been added
  next FEATURE;
479 480
      }
    }
Tiago Grego's avatar
Tiago Grego committed
481
    
482
    #no duplicate was found, add the feature
483
    push(@{$self->{_supporting_evidence}}, $feature);
484 485 486 487
  }
}


488 489 490 491 492 493 494
=head2 flush_supporting_features

  Example     : $transcript->flush_supporting_features;
  Description : Removes all supporting evidence from the transcript.
  Return type : (Empty) listref
  Exceptions  : none
  Caller      : general
495
  Status      : Stable
496 497 498 499 500 501 502 503 504

=cut

sub flush_supporting_features {
  my $self = shift;
  $self->{'_supporting_evidence'} = [];
}


505 506
=head2 external_db

507 508
  Arg [1]    : (optional) String - name of external db to set
  Example    : $transcript->external_db('HGNC');
Tiago Grego's avatar
Tiago Grego committed
509 510
  Description: Getter/setter for attribute external_db. The db is the one that 
               belongs to the external_name.  
511 512 513 514
  Returntype : String
  Exceptions : none
  Caller     : general
  Status     : Stable
515 516 517 518

=cut

sub external_db {
519
  my ( $self, $ext_dbname ) = @_;
520

Tiago Grego's avatar
Tiago Grego committed
521
  if(defined $ext_dbname) { 
522
    return ( $self->{'external_db'} = $ext_dbname );
523
  }
524

525 526
  if( exists $self->{'external_db'} ) {
    return $self->{'external_db'};
527
  }
528

529
  my $display_xref = $self->display_xref();
530

531 532 533 534 535
  if( defined $display_xref ) {
    return $display_xref->dbname()
  } else {
    return undef;
  }
536 537
}

538

539 540
=head2 external_status

541 542 543 544 545 546 547 548
  Arg [1]    : (optional) String - status of the external db
  Example    : $transcript->external_status('KNOWNXREF');
  Description: Getter/setter for attribute external_status. The status of
               the external db of the one that belongs to the external_name.
  Returntype : String
  Exceptions : none
  Caller     : general
  Status     : Stable
549 550 551

=cut

Tiago Grego's avatar
Tiago Grego committed
552
sub external_status { 
553 554 555
  my ( $self, $ext_status ) = @_;

  if(defined $ext_status) {
556
    return ( $self->{'external_status'} = $ext_status );
557 558
  }

559 560
  if( exists $self->{'external_status'} ) {
    return $self->{'external_status'};
561 562 563 564 565 566 567 568 569 570 571 572
  }

  my $display_xref = $self->display_xref();

  if( defined $display_xref ) {
    return $display_xref->status()
  } else {
    return undef;
  }
}


573 574
=head2 external_name

575 576 577 578 579 580 581
  Arg [1]    : (optional) String - the external name to set
  Example    : $transcript->external_name('BRCA2-001');
  Description: Getter/setter for attribute external_name.
  Returntype : String or undef
  Exceptions : none
  Caller     : general
  Status     : Stable
582 583 584 585

=cut

sub external_name {
586
  my ($self, $ext_name) = @_;
587

Tiago Grego's avatar
Tiago Grego committed
588
  if(defined $ext_name) { 
589
    return ( $self->{'external_name'} = $ext_name );
590
  }
591

592 593
  if( exists $self->{'external_name'} ) {
    return $self->{'external_name'};
594
  }
595

596
  my $display_xref = $self->display_xref();
597

598 599 600 601 602
  if( defined $display_xref ) {
    return $display_xref->display_id()
  } else {
    return undef;
  }
603 604
}

605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621
=head2 source

  Arg [1]    : (optional) String - the source to set
  Example    : $transcript->source('ensembl');
  Description: Getter/setter for attribute source
  Returntype : String
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub source {
  my $self = shift;
  $self->{'source'} = shift if( @_ );
  return ( $self->{'source'} || "ensembl" );
}
622

Alistair Rust's avatar
Alistair Rust committed
623
=head2 display_xref
624

625 626 627
  Arg [1]    : (optional) Bio::EnsEMBL::DBEntry - the display xref to set
  Example    : $transcript->display_xref($db_entry);
  Description: Getter/setter for display_xref for this transcript.
628
  Returntype : Bio::EnsEMBL::DBEntry
629 630
  Exceptions : none
  Caller     : general
631
  Status     : Stable
632 633 634

=cut

635
sub display_xref {
636 637 638
  my $self = shift;
  $self->{'display_xref'} = shift if(@_);
  return $self->{'display_xref'};
Ewan Birney's avatar
Ewan Birney committed
639 640
}

641 642 643 644 645 646 647
=head2 is_canonical

  Args [1]      : (optional) Boolean is_canonical

  Example       : if ($transcript->is_canonical()) { ... }

  Description : Returns true (non-zero) if the transcript is the
Andy Yates's avatar
Andy Yates committed
648 649 650 651 652
                canonical transcript of its gene, false (0) if not. If the code
                returns an undefined it is because its state is not currently
                known. Internally the code will consult the database for this
                value if it is unknown and the transcript has a dbID and an
                attached adaptor
653 654 655 656 657 658 659 660 661

  Return type   : Boolean

  Status        : Stable

=cut

sub is_canonical {
  my ( $self, $value ) = @_;
Tiago Grego's avatar
Tiago Grego committed
662
  
663 664
  #Shortcut call
  return $self->{is_canonical} if defined $self->{is_canonical};
Tiago Grego's avatar
Tiago Grego committed
665
  
666
  if ( defined($value) ) {
667 668 669 670 671 672
    $self->{is_canonical} = ( $value ? 1 : 0 );
  }
  else {
    if(! defined $self->{is_canonical} && $self->dbID() && $self->adaptor()) {
      $self->{is_canonical} = $self->adaptor()->is_Transcript_canonical($self);
    }
673 674
  }

675
  return $self->{is_canonical};
676
}
Ewan Birney's avatar
Ewan Birney committed
677 678 679

=head2 translation

680 681 682
  Args       : None
  Example    : if ( $transcript->translation() ) {
                 print( $transcript->translation()->stable_id(), "\n" );
683
               } else {
684
                 print("Pseudogene\n");
685
               }
686 687 688 689 690 691 692
  Description: Getter/setter for the Translation object which
               defines the CDS (and as a result the peptide encoded
               by) this transcript.  This function will return
               undef if this transcript is a pseudogene, i.e. a
               non-translating transcript such as an ncRNA.  This
               is the accepted method of determining whether a
               transcript is a pseudogene or not.
693
  Returntype : Bio::EnsEMBL::Translation
694 695
  Exceptions : none
  Caller     : general
696
  Status     : Stable
Ewan Birney's avatar
Ewan Birney committed
697 698 699

=cut

Michele Clamp's avatar
Michele Clamp committed
700
sub translation {
701
  my ( $self, $translation ) = @_;
702

703
  if ( defined($translation) ) {
704
    assert_ref( $translation, 'Bio::EnsEMBL::Translation' );
705

706 707
    $self->{'translation'} = $translation;
    $translation->transcript($self);
708

709 710
    $self->{'cdna_coding_start'} = undef;
    $self->{'cdna_coding_end'}   = undef;
711 712 713 714

    $self->{'coding_region_start'} = undef;
    $self->{'coding_region_end'}   = undef;

715
    $self->{'transcript_mapper'} = undef;
716

717 718 719
  } elsif ( @_ > 1 ) {
    if ( defined( $self->{'translation'} ) ) {
      # Removing existing translation
720

721 722
      $self->{'translation'}->transcript(undef);
      delete( $self->{'translation'} );
723

724 725
      $self->{'cdna_coding_start'} = undef;
      $self->{'cdna_coding_end'}   = undef;
726

727 728
      $self->{'coding_region_start'} = undef;
      $self->{'coding_region_end'}   = undef;
729

730 731
      $self->{'transcript_mapper'} = undef;
    }
732
  } elsif (   !exists( $self->{'translation'} )
733
            && defined( $self->adaptor() ) )
734
  {
735
    $self->{'translation'} =
736 737
      $self->adaptor()->db()->get_TranslationAdaptor()
      ->fetch_by_Transcript($self);
738
  }
739

740
  return $self->{'translation'};
741
} ## end sub translation
Ewan Birney's avatar
Ewan Birney committed
742

743 744 745 746 747 748 749 750 751 752 753
=head2 get_all_alternative_translations

  Args       : None
  Example    :

    my @alt_translations =
      @{ $transcript->get_all_alternative_translations() };

  Description:  Fetches all alternative translations defined for this
                transcript.  The canonical translation is not returned.

754
  Returntype : Arrayref to Bio::EnsEMBL::Translation
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
755
  Exceptions : None
756 757 758 759 760 761 762 763
  Caller     : General
  Status     : Stable

=cut

sub get_all_alternative_translations {
  my ($self) = @_;

764 765 766
  if (   !exists( $self->{'alternative_translations'} )
       && defined( $self->adaptor() ) )
  {
767 768 769
    my $pa = $self->adaptor()->db()->get_TranslationAdaptor();
    my @translations =
      @{ $pa->fetch_all_alternative_by_Transcript($self) };
770

771
    $self->{'alternative_translations'} = \@translations;
772 773 774
  }

  return $self->{'alternative_translations'};
775
}
776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807

=head2 add_alternative_translation

  Args       : Bio::EnsEMBL::Translation $translation
  Example    :

    $transcript->add_alternative_translation($translation);

  Description: Adds an alternative translation to this transcript.
  Returntype : None
  Exceptions : None
  Caller     : General
  Status     : Stable

=cut

sub add_alternative_translation {
  my ( $self, $translation ) = @_;

  if ( !(    defined($translation)
          && ref($translation)
          && $translation->isa('Bio::EnsEMBL::Translation') ) )
  {
    throw("Bio::EnsEMBL::Translation argument expected.");
  }

  # Load the existsing alternative translations from the database if
  # they haven't already been loaded.
  $self->get_all_alternative_translations();

  push( @{ $self->{'alternative_translations'} }, $translation );
}
Arne Stabenau's avatar
Arne Stabenau committed
808

809 810
=head2 spliced_seq

811 812
  Args       : soft_mask (opt)
               if specified, will return a sequence where UTR regions are lowercased
813 814
  Description: Retrieves all Exon sequences and concats them together.
               No phase padding magic is done, even if phases do not align.
815
  Returntype : Text
816 817
  Exceptions : none
  Caller     : general
818
  Status     : Stable
819 820 821 822

=cut

sub spliced_seq {
823
  my ( $self, $soft_mask ) = @_;
824

825 826
  my $seq_string = "";
  for my $ex ( @{$self->get_all_Exons()} ) {
827 828 829 830 831 832
    my $seq = $ex->seq();
    if(!$seq) {
      warning("Could not obtain seq for exon.  Transcript sequence may not " .
              "be correct.");
      $seq_string .= 'N' x $ex->length();
    } else {
833 834 835 836 837
      my $exon_seq = $seq->seq();
      if ($soft_mask) {
        my $padstr;
        if (!defined ($ex->coding_region_start($self))) {
          $exon_seq = lc($exon_seq);
838 839 840
        }

        if ($ex->coding_region_start($self) > $ex->start()) {
841 842 843
          my $forward_length = $ex->coding_region_start($self) - $ex->start();
          my $reverse_length = $ex->end() - $ex->coding_region_start($self);
          if ($ex->strand == 1) {
Tiago Grego's avatar
Tiago Grego committed
844
            $exon_seq = lc (substr($exon_seq, 0, $forward_length)) . substr($exon_seq, $forward_length); 
845
          } else {
846
            $exon_seq = substr($exon_seq, 0, $reverse_length+1) . lc(substr($exon_seq, $reverse_length+1));
847
          }
848 849 850
        }

        if ($ex->coding_region_end($self) < $ex->end()) {
851 852 853 854 855 856 857
          my $forward_length = $ex->coding_region_end($self) - $ex->start();
          my $reverse_length = $ex->end() - $ex->coding_region_end($self);
          if ($ex->strand == 1) {
            $exon_seq = substr($exon_seq, 0, $forward_length+1) . lc(substr($exon_seq, $forward_length+1));
          } else {
            $exon_seq = lc(substr($exon_seq, 0, $reverse_length)) . substr($exon_seq, $reverse_length);
          }
858 859 860
        }
      }
      $seq_string .= $exon_seq;
861
    }
862 863
  }

864 865 866
  # apply post transcriptional edits
  if($self->edits_enabled()) {
    my @seqeds = @{$self->get_all_SeqEdits()};
867

868 869 870
    # sort edits in reverse order to remove complication of
    # adjusting downstream edits
    @seqeds = sort {$b->start() <=> $a->start()} @seqeds;
871

872 873
    foreach my $se (@seqeds) {
      $se->apply_edit(\$seq_string);
874
    }
875
  }
876

877
  return $seq_string;
878 879 880 881 882 883 884
}


=head2 translateable_seq

  Args       : none
  Example    : print $transcript->translateable_seq(), "\n";
885
  Description: Returns a sequence string which is the the translateable part
886
               of the transcripts sequence.  This is formed by splicing all
887 888 889 890 891
               Exon sequences together and apply all defined RNA edits.
               Then the coding part of the sequence is extracted and returned.
               The code will not support monkey exons any more. If you want to
               have non phase matching exons, defined appropriate _rna_edit
               attributes!
892 893 894

               An empty string is returned if this transcript is a pseudogene
               (i.e. is non-translateable).
895
  Returntype : Text
896 897
  Exceptions : none
  Caller     : general
898
  Status     : Stable
899 900 901 902 903 904

=cut

sub translateable_seq {
  my ( $self ) = @_;

905
  if ( !$self->translation() ) {
906 907 908