Transcript.pm 87.5 KB
Newer Older
1
2
=head1 LICENSE

3
Copyright [1999-2016] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
4

5
6
7
8
9
10
11
12
13
14
15
16
17
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

=cut
18
19
20
21
22


=head1 CONTACT

  Please email comments or questions to the public Ensembl
Magali Ruffier's avatar
Magali Ruffier committed
23
  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
24
25

  Questions may also be sent to the Ensembl help desk at
Magali Ruffier's avatar
Magali Ruffier committed
26
  <http://www.ensembl.org/Help/Contact>.
27
28

=cut
Ewan Birney's avatar
Ewan Birney committed
29
30
31

=head1 NAME

32
Bio::EnsEMBL::Transcript - object representing an Ensembl transcript
Ewan Birney's avatar
Ewan Birney committed
33
34
35

=head1 SYNOPSIS

36
Creation:
37

38
39
  my $tran = new Bio::EnsEMBL::Transcript();
  my $tran = new Bio::EnsEMBL::Transcript( -EXONS => \@exons );
40
41
42

Manipulation:

43
44
  # Returns an array of Exon objects
  my @exons = @{ $tran->get_all_Exons() };
45

46
47
48
49
50
51
  # Returns the peptide translation of the exons as a Bio::Seq
  if ( $tran->translation() ) {
    my $pep = $tran->translate();
  } else {
    print "Transcript ", $tran->stable_id(), " is non-coding\n";
  }
52

53
54
=head1 DESCRIPTION

55
A representation of a transcript within the Ensembl system.  A transcript
56
57
consists of a set of Exons and (possibly) a Translation which defines the
coding and non-coding regions of the exons.
Ewan Birney's avatar
Ewan Birney committed
58

59
=cut
60

61
62
package Bio::EnsEMBL::Transcript;

Ewan Birney's avatar
Ewan Birney committed
63
64
use strict;

65
use Bio::EnsEMBL::Feature;
66
use Bio::EnsEMBL::UTR;
Ian Longden's avatar
Ian Longden committed
67
use Bio::EnsEMBL::Intron;
68
69
use Bio::EnsEMBL::ExonTranscript;
use Bio::EnsEMBL::CDS;
70
use Bio::EnsEMBL::TranscriptMapper;
71
use Bio::EnsEMBL::SeqEdit;
72

73
74
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
use Bio::EnsEMBL::Utils::Exception qw( deprecate warning throw );
75
use Bio::EnsEMBL::Utils::Scalar qw( assert_ref );
76

77
use vars qw(@ISA);
78
@ISA = qw(Bio::EnsEMBL::Feature);
Ewan Birney's avatar
Ewan Birney committed
79

Graham McVicker's avatar
Graham McVicker committed
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98

=head2 new

  Arg [-EXONS] :
        reference to list of Bio::EnsEMBL::Exon objects - exons which make up 
        this transcript
  Arg [-STABLE_ID] :
        string - the stable identifier of this transcript
  Arg [-VERSION] :
        int - the version of the stable identifier of this transcript
  Arg [-EXTERNAL_NAME] :
        string - the external database name associated with this transcript
  Arg [-EXTERNAL_DB] :
        string - the name of the database the external name is from
  Arg [-EXTERNAL_STATUS]:
        string - the status of the external identifier
  Arg [-DISPLAY_XREF]:
        Bio::EnsEMBL::DBEntry - The external database entry that is used
        to label this transcript when it is displayed.
99
100
101
102
103
  Arg [-CREATED_DATE]:
        string - the date the transcript was created
  Arg [-MODIFIED_DATE]:
        string - the date the transcript was last modified
  Arg [-DESCRIPTION]:
104
        string - the transcripts description
105
106
  Arg [-BIOTYPE]: 
        string - the biotype e.g. "protein_coding"
107
108
  Arg [-STATUS]:
        string - the transcripts status i.e. "KNOWN","NOVEL"
109
110
  Arg [-IS_CURRENT]:
        Boolean - specifies if this is the current version of the transcript
111
112
113
  Arg [-SOURCE]:
        string - the transcript source, e.g. "ensembl"

Graham McVicker's avatar
Graham McVicker committed
114
115
116
117
118
  Example    : $tran = new Bio::EnsEMBL::Transcript(-EXONS => \@exons);
  Description: Constructor. Instantiates a Transcript object.
  Returntype : Bio::EnsEMBL::Transcript
  Exceptions : throw on bad arguments
  Caller     : general
119
  Status     : Stable
Graham McVicker's avatar
Graham McVicker committed
120
121
122

=cut

123
sub new {
124
  my $proto = shift;
125

126
  my $class = ref($proto) || $proto;
127

128
  my $self = $class->SUPER::new(@_);
Ewan Birney's avatar
Ewan Birney committed
129

130
131
132
133
134
  my (
    $exons,            $stable_id,    $version,
    $external_name,    $external_db,  $external_status,
    $display_xref,     $created_date, $modified_date,
    $description,      $biotype,      $confidence,
135
136
    $external_db_name, $status,       $is_current,
    $source
137
  );
138

139
140
  # Catch for old style constructor calling:
  if ( ( @_ > 0 ) && ref( $_[0] ) ) {
141
    $exons = [@_];
142
143
144
145
146
147
148
149
150
    deprecate( "Transcript constructor should use named arguments.\n"
        . "Use Bio::EnsEMBL::Transcript->new(-EXONS => \@exons);\n"
        . "instead of Bio::EnsEMBL::Transcript->new(\@exons);" );
  } else {
    (
      $exons,            $stable_id,    $version,
      $external_name,    $external_db,  $external_status,
      $display_xref,     $created_date, $modified_date,
      $description,      $biotype,      $confidence,
151
152
      $external_db_name, $status,       $is_current,
      $source
153
154
155
156
157
158
159
160
161
      )
      = rearrange( [
        'EXONS',            'STABLE_ID',
        'VERSION',          'EXTERNAL_NAME',
        'EXTERNAL_DB',      'EXTERNAL_STATUS',
        'DISPLAY_XREF',     'CREATED_DATE',
        'MODIFIED_DATE',    'DESCRIPTION',
        'BIOTYPE',          'CONFIDENCE',
        'EXTERNAL_DB_NAME', 'STATUS',
162
        'IS_CURRENT',       'SOURCE'
163
164
165
      ],
      @_
      );
166
  }
167
168

  if ($exons) {
169
170
    $self->{'_trans_exon_array'} = $exons;
    $self->recalculate_coordinates();
171
  }
172

173
174
175
  $self->stable_id($stable_id);
  $self->version($version);
  $self->{'created_date'}  = $created_date;
176
  $self->{'modified_date'} = $modified_date;
177
178
179
180
181
  $self->external_name($external_name) if ( defined $external_name );
  $self->external_db($external_db)     if ( defined $external_db );
  $self->external_status($external_status)
    if ( defined $external_status );
  $self->display_xref($display_xref) if ( defined $display_xref );
182
  $self->edits_enabled(1);
183

184
185
186
187
  $self->description($description);
  $self->status($confidence);    # old style name
  $self->status($status);        # new style name
  $self->biotype($biotype);
188
  $self->source($source);
189
190

  # default is_current
191
  $is_current = 1 unless ( defined($is_current) );
192
  $self->{'is_current'} = $is_current;
193

194
  return $self;
195
} ## end sub new
196

Graham McVicker's avatar
Graham McVicker committed
197
=head2 get_all_DBLinks
198

199
200
201
202
  Arg [1]    : String database name (optional)
               SQL wildcard characters (_ and %) can be used to
               specify patterns.

203
  Example    : my @dblinks = @{ $transcript->get_all_DBLinks() };
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
               my @dblinks = @{ $transcript->get_all_DBLinks('Uniprot%') };

  Description: Retrieves *all* related DBEntries for this
               transcript.  This includes all DBEntries that are
               associated with the corresponding translation.

               If you only want to retrieve the DBEntries associated
               with the transcript (and not the translation) then
               you should use the get_all_DBEntries() call instead.

               Note: Each entry may be listed more than once.  No
               uniqueness checks are done.  Also if you put in an
               incorrect external database name no checks are done
               to see if this exists, you will just get an empty
               list.

  Return type: Listref of Bio::EnsEMBL::DBEntry objects
221
222
  Exceptions : none
  Caller     : general
223
  Status     : Stable
224
225
226

=cut

Graham McVicker's avatar
Graham McVicker committed
227
sub get_all_DBLinks {
228
  my ( $self, $db_name_exp, $ex_db_type ) = @_;
229

230
231
  my @links =
    @{ $self->get_all_DBEntries( $db_name_exp, $ex_db_type ) };
232

233
  # Add all of the transcript and translation xrefs to the return list.
234
235
236
  my $translation = $self->translation();
  if ( defined($translation) ) {
    push( @links,
237
          @{$translation->get_all_DBEntries( $db_name_exp, $ex_db_type ) }
238
239
    );
  }
240

241
  @links = sort { _compare_xrefs() } @links;
242

243
  return \@links;
244
245
}

246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
=head2 get_all_xrefs

  Arg [1]    : String database name (optional)
               SQL wildcard characters (_ and %) can be used to
               specify patterns.

  Example    : @xrefs = @{ $transcript->get_all_xrefs() };
               @xrefs = @{ $transcript->get_all_xrefs('Uniprot%') };

  Description: Retrieves *all* related xrefs for this transcript.
               This includes all xrefs that are associated with the
               corresponding translation of this transcript.

               If you want to retrieve the xrefs associated with
               only the transcript (and not the translation) then
               you should use the get_all_object_xrefs() method
               instead.

               Note: Each entry may be listed more than once.  No
               uniqueness checks are done.  Also if you put in an
               incorrect external database name no checks are done
               to see if this exists, you will just get an empty
               list.

                NB: This method is an alias for the
                    get_all_DBLinks() method.

  Return type: Listref of Bio::EnsEMBL::DBEntry objects

  Status     : Stable

=cut

279
280
281
282
sub get_all_xrefs {
  my $self = shift;
  return $self->get_all_DBLinks(@_);
}
Graham McVicker's avatar
Graham McVicker committed
283

284
=head2 get_all_DBEntries
285

286
  Arg [1]    : (optional) String, external database name
287

288
  Arg [2]    : (optional) String, external database type
289

290
  Example    : my @dbentries = @{ $transcript->get_all_DBEntries() };
291
292
293
294
295
296
297
298
299
300

  Description: Retrieves DBEntries (xrefs) for this transcript.
               This does *not* include the corresponding
               translations DBEntries (see get_all_DBLinks()).

               This method will attempt to lazy-load DBEntries
               from a database if an adaptor is available and no
               DBEntries are present on the transcript (i.e. they
               have not already been added or loaded).

301
  Returntype : Listref of Bio::EnsEMBL::DBEntry objects
302
303
  Exceptions : none
  Caller     : get_all_DBLinks, TranscriptAdaptor::store
304
  Status     : Stable
305
306
307

=cut

308
sub get_all_DBEntries {
309
  my ( $self, $ex_db_exp, $ex_db_type ) = @_;
310

311
  my $cache_name = 'dbentries';
312

313
  if ( defined($ex_db_exp) ) {
314
315
    $cache_name .= $ex_db_exp;
  }
316

317
  if ( defined($ex_db_type) ) {
Ian Longden's avatar
Ian Longden committed
318
    $cache_name .= $ex_db_type;
319
  }
320

321
322
323
  # if not cached, retrieve all of the xrefs for this transcript
  if ( !defined( $self->{$cache_name} ) && defined( $self->adaptor() ) )
  {
324
    $self->{$cache_name} =
325
326
      $self->adaptor()->db()->get_DBEntryAdaptor()
      ->fetch_all_by_Transcript( $self, $ex_db_exp, $ex_db_type );
327
  }
328

329
  $self->{$cache_name} ||= [];
330

331
  return $self->{$cache_name};
332
} ## end sub get_all_DBEntries
333

334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
=head2 get_all_object_xrefs

  Arg [1]    : (optional) String, external database name

  Arg [2]    : (optional) String, external_db type

  Example    : @oxrefs = @{ $transcript->get_all_object_xrefs() };

  Description: Retrieves xrefs for this transcript.  This does
               *not* include xrefs that are associated with the
               corresponding translations of this transcript (see
               get_all_xrefs()).

               This method will attempt to lazy-load xrefs from a
               database if an adaptor is available and no xrefs are
               present on the transcript (i.e. they have not already
               been added or loaded).

352
353
                NB: This method is an alias for the
                    get_all_DBentries() method.
354
355
356
357
358
359
360

  Return type: Listref of Bio::EnsEMBL::DBEntry objects

  Status     : Stable

=cut

361
362
363
364
sub get_all_object_xrefs {
  my $self = shift;
  return $self->get_all_DBEntries(@_);
}
365
366

=head2 add_DBEntry
367

368
369
  Arg [1]    : Bio::EnsEMBL::DBEntry $dbe
               The dbEntry to be added
370
371
372
373
374
  Example    : my $dbe = Bio::EnsEMBL::DBEntery->new(...);
               $transcript->add_DBEntry($dbe);
  Description: Associates a DBEntry with this transcript. Note that adding
               DBEntries will prevent future lazy-loading of DBEntries for this
               gene (see get_all_DBEntries).
375
376
377
  Returntype : none
  Exceptions : thrown on incorrect argument type
  Caller     : general
378
  Status     : Stable
379
380
381
382
383
384
385
386

=cut

sub add_DBEntry {
  my $self = shift;
  my $dbe = shift;

  unless($dbe && ref($dbe) && $dbe->isa('Bio::EnsEMBL::DBEntry')) {
387
    throw('Expected DBEntry argument');
388
389
390
391
  }

  $self->{'dbentries'} ||= [];
  push @{$self->{'dbentries'}}, $dbe;
392
393
}

394

395
396
=head2 get_all_supporting_features

397
  Example    : my @evidence = @{ $transcript->get_all_supporting_features };
398
  Description: Retrieves any supporting features added manually by 
399
               calls to add_supporting_features.
400
  Returntype : Listref of Bio::EnsEMBL::FeaturePair objects
401
402
  Exceptions : none
  Caller     : general
403
  Status     : Stable
404
405
406
407
408

=cut

sub get_all_supporting_features {
  my $self = shift;
409
410
411
412
413
414
415

  if( !exists  $self->{_supporting_evidence} )  {
    if($self->adaptor) {
      my $tsfa = $self->adaptor->db->get_TranscriptSupportingFeatureAdaptor();
      $self->{_supporting_evidence} = $tsfa->fetch_all_by_Transcript($self);
    }
  }
416
417
418
419
420
421
422
  
  return $self->{_supporting_evidence} || [];
}


=head2 add_supporting_features

423
424
425
  Arg [1-N]  : Bio::EnsEMBL::FeaturePair $feature
               The supporting features to add
  Example    : $transcript->add_supporting_features(@features);
426
  Description: Adds a list of supporting features to this Transcript.
427
428
               The added features can be retieved by
               get_all_supporting_features().
429
430
431
432
433
  Returntype : none
  Exceptions : throw if any of the features are not FeaturePairs
               throw if any of the features are not in the same coordinate
               system as the Transcript
  Caller     : general
434
  Status     : Stable
435
436
437
438
 
=cut
 
sub add_supporting_features {
439
  my ($self, @features) = @_;
440
441
442
443
444
445
446
447

  return unless @features;
 
  $self->{_supporting_evidence} ||= [];
  
  # check whether this feature object has been added already
  FEATURE: foreach my $feature (@features) {

448
449
450
451
452
    if (!defined($feature) || ref($feature) eq "ARRAY") {
      throw("Element in transcript supporting features array is undefined or is an ARRAY for " . $self->dbID);
    }
    if (!$feature || !$feature->isa("Bio::EnsEMBL::FeaturePair")) {
      print "feature = " . $feature . "\n";
453
454
455
456
457
      throw("Supporting feat [$feature] not a " .
            "Bio::EnsEMBL::FeaturePair");
    } 
    
    if ((defined $self->slice() && defined $feature->slice())&&
458
      ( $self->slice()->name() ne $feature->slice()->name())){
459
460
      throw("Supporting feat not in same coord system as transcript\n" .
            "transcript is attached to [".$self->slice()->name()."]\n" .
461
462
463
464
465
466
            "feat is attached to [".$feature->slice()->name()."]");
    }

    foreach my $added_feature ( @{ $self->{_supporting_evidence} } ){
      # compare objects
      if ( $feature == $added_feature ){
467
468
  #this feature has already been added
  next FEATURE;
469
470
471
472
      }
    }
    
    #no duplicate was found, add the feature
473
    push(@{$self->{_supporting_evidence}}, $feature);
474
475
476
477
  }
}


478
479
480
481
482
483
484
=head2 flush_supporting_features

  Example     : $transcript->flush_supporting_features;
  Description : Removes all supporting evidence from the transcript.
  Return type : (Empty) listref
  Exceptions  : none
  Caller      : general
485
  Status      : Stable
486
487
488
489
490
491
492
493
494

=cut

sub flush_supporting_features {
  my $self = shift;
  $self->{'_supporting_evidence'} = [];
}


495
496
=head2 external_db

497
498
499
500
501
502
503
504
  Arg [1]    : (optional) String - name of external db to set
  Example    : $transcript->external_db('HGNC');
  Description: Getter/setter for attribute external_db. The db is the one that 
               belongs to the external_name.  
  Returntype : String
  Exceptions : none
  Caller     : general
  Status     : Stable
505
506
507
508

=cut

sub external_db {
509
  my ( $self, $ext_dbname ) = @_;
510

511
  if(defined $ext_dbname) { 
512
    return ( $self->{'external_db'} = $ext_dbname );
513
  }
514

515
516
  if( exists $self->{'external_db'} ) {
    return $self->{'external_db'};
517
  }
518

519
  my $display_xref = $self->display_xref();
520

521
522
523
524
525
  if( defined $display_xref ) {
    return $display_xref->dbname()
  } else {
    return undef;
  }
526
527
}

528

529
530
=head2 external_status

531
532
533
534
535
536
537
538
  Arg [1]    : (optional) String - status of the external db
  Example    : $transcript->external_status('KNOWNXREF');
  Description: Getter/setter for attribute external_status. The status of
               the external db of the one that belongs to the external_name.
  Returntype : String
  Exceptions : none
  Caller     : general
  Status     : Stable
539
540
541
542
543
544
545

=cut

sub external_status { 
  my ( $self, $ext_status ) = @_;

  if(defined $ext_status) {
546
    return ( $self->{'external_status'} = $ext_status );
547
548
  }

549
550
  if( exists $self->{'external_status'} ) {
    return $self->{'external_status'};
551
552
553
554
555
556
557
558
559
560
561
562
  }

  my $display_xref = $self->display_xref();

  if( defined $display_xref ) {
    return $display_xref->status()
  } else {
    return undef;
  }
}


563
564
=head2 external_name

565
566
567
568
569
570
571
  Arg [1]    : (optional) String - the external name to set
  Example    : $transcript->external_name('BRCA2-001');
  Description: Getter/setter for attribute external_name.
  Returntype : String or undef
  Exceptions : none
  Caller     : general
  Status     : Stable
572
573
574
575

=cut

sub external_name {
576
  my ($self, $ext_name) = @_;
577

578
  if(defined $ext_name) { 
579
    return ( $self->{'external_name'} = $ext_name );
580
  }
581

582
583
  if( exists $self->{'external_name'} ) {
    return $self->{'external_name'};
584
  }
585

586
  my $display_xref = $self->display_xref();
587

588
589
590
591
592
  if( defined $display_xref ) {
    return $display_xref->display_id()
  } else {
    return undef;
  }
593
594
595
}


596
597
=head2 is_known

598
599
600
601
  Example    : print "Transcript ".$transcript->stable_id." is KNOWN\n" if
                  $transcript->is_known;
  Description: Returns TRUE if this gene has a status of 'KNOWN'
  Returntype : TRUE if known, FALSE otherwise
602
603
  Exceptions : none
  Caller     : general
604
  Status     : Stable
605
606
607

=cut

608
609
sub is_known {
  my $self = shift;
610
  return ( $self->{'status'} eq "KNOWN" || $self->{'status'} eq "KNOWN_BY_PROJECTION" );
611
612
613
}


614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
=head2 status

  Arg [1]    : string $status
  Description: get/set for attribute status
  Returntype : string
  Exceptions : none
  Caller     : general
  Status     : Medium Risk

=cut

sub status {
   my $self = shift;
  $self->{'status'} = shift if( @_ );
  return $self->{'status'};
}

631
632
633
634
635
636
637
=head2 biotype

  Arg [1]    : string $biotype
  Description: get/set for attribute biotype
  Returntype : string
  Exceptions : none
  Caller     : general
638
  Status     : Stable
639
640
641
642
643
644
645

=cut

sub biotype {
   my $self = shift;
  $self->{'biotype'} = shift if( @_ );
  return ( $self->{'biotype'} || "protein_coding" );
646
647
}

648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
=head2 source

  Arg [1]    : (optional) String - the source to set
  Example    : $transcript->source('ensembl');
  Description: Getter/setter for attribute source
  Returntype : String
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub source {
  my $self = shift;
  $self->{'source'} = shift if( @_ );
  return ( $self->{'source'} || "ensembl" );
}
665

Alistair Rust's avatar
Alistair Rust committed
666
=head2 display_xref
667

668
669
670
  Arg [1]    : (optional) Bio::EnsEMBL::DBEntry - the display xref to set
  Example    : $transcript->display_xref($db_entry);
  Description: Getter/setter for display_xref for this transcript.
671
  Returntype : Bio::EnsEMBL::DBEntry
672
673
  Exceptions : none
  Caller     : general
674
  Status     : Stable
675
676
677

=cut

678
sub display_xref {
679
680
681
  my $self = shift;
  $self->{'display_xref'} = shift if(@_);
  return $self->{'display_xref'};
Ewan Birney's avatar
Ewan Birney committed
682
683
}

684
685
686
687
688
689
690
=head2 is_canonical

  Args [1]      : (optional) Boolean is_canonical

  Example       : if ($transcript->is_canonical()) { ... }

  Description : Returns true (non-zero) if the transcript is the
Andy Yates's avatar
Andy Yates committed
691
692
693
694
695
                canonical transcript of its gene, false (0) if not. If the code
                returns an undefined it is because its state is not currently
                known. Internally the code will consult the database for this
                value if it is unknown and the transcript has a dbID and an
                attached adaptor
696
697
698
699
700
701
702
703
704

  Return type   : Boolean

  Status        : Stable

=cut

sub is_canonical {
  my ( $self, $value ) = @_;
705
706
707
708
  
  #Shortcut call
  return $self->{is_canonical} if defined $self->{is_canonical};
  
709
  if ( defined($value) ) {
710
711
712
713
714
715
    $self->{is_canonical} = ( $value ? 1 : 0 );
  }
  else {
    if(! defined $self->{is_canonical} && $self->dbID() && $self->adaptor()) {
      $self->{is_canonical} = $self->adaptor()->is_Transcript_canonical($self);
    }
716
717
  }

718
  return $self->{is_canonical};
719
}
Ewan Birney's avatar
Ewan Birney committed
720
721
722

=head2 translation

723
724
725
  Args       : None
  Example    : if ( $transcript->translation() ) {
                 print( $transcript->translation()->stable_id(), "\n" );
726
               } else {
727
                 print("Pseudogene\n");
728
               }
729
730
731
732
733
734
735
  Description: Getter/setter for the Translation object which
               defines the CDS (and as a result the peptide encoded
               by) this transcript.  This function will return
               undef if this transcript is a pseudogene, i.e. a
               non-translating transcript such as an ncRNA.  This
               is the accepted method of determining whether a
               transcript is a pseudogene or not.
736
  Returntype : Bio::EnsEMBL::Translation
737
738
  Exceptions : none
  Caller     : general
739
  Status     : Stable
Ewan Birney's avatar
Ewan Birney committed
740
741
742

=cut

Michele Clamp's avatar
Michele Clamp committed
743
sub translation {
744
  my ( $self, $translation ) = @_;
745

746
  if ( defined($translation) ) {
747
    assert_ref( $translation, 'Bio::EnsEMBL::Translation' );
748

749
750
    $self->{'translation'} = $translation;
    $translation->transcript($self);
751

752
753
    $self->{'cdna_coding_start'} = undef;
    $self->{'cdna_coding_end'}   = undef;
754
755
756
757

    $self->{'coding_region_start'} = undef;
    $self->{'coding_region_end'}   = undef;

758
    $self->{'transcript_mapper'} = undef;
759

760
761
762
  } elsif ( @_ > 1 ) {
    if ( defined( $self->{'translation'} ) ) {
      # Removing existing translation
763

764
765
      $self->{'translation'}->transcript(undef);
      delete( $self->{'translation'} );
766

767
768
      $self->{'cdna_coding_start'} = undef;
      $self->{'cdna_coding_end'}   = undef;
769

770
771
      $self->{'coding_region_start'} = undef;
      $self->{'coding_region_end'}   = undef;
772

773
774
      $self->{'transcript_mapper'} = undef;
    }
775
  } elsif (   !exists( $self->{'translation'} )
776
            && defined( $self->adaptor() ) )
777
  {
778
    $self->{'translation'} =
779
780
      $self->adaptor()->db()->get_TranslationAdaptor()
      ->fetch_by_Transcript($self);
781
  }
782

783
  return $self->{'translation'};
784
} ## end sub translation
Ewan Birney's avatar
Ewan Birney committed
785

786
787
788
789
790
791
792
793
794
795
796
=head2 get_all_alternative_translations

  Args       : None
  Example    :

    my @alt_translations =
      @{ $transcript->get_all_alternative_translations() };

  Description:  Fetches all alternative translations defined for this
                transcript.  The canonical translation is not returned.

797
  Returntype : Arrayref to Bio::EnsEMBL::Translation
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
798
  Exceptions : None
799
800
801
802
803
804
805
806
  Caller     : General
  Status     : Stable

=cut

sub get_all_alternative_translations {
  my ($self) = @_;

807
808
809
  if (   !exists( $self->{'alternative_translations'} )
       && defined( $self->adaptor() ) )
  {
810
811
812
    my $pa = $self->adaptor()->db()->get_TranslationAdaptor();
    my @translations =
      @{ $pa->fetch_all_alternative_by_Transcript($self) };
813

814
    $self->{'alternative_translations'} = \@translations;
815
816
817
  }

  return $self->{'alternative_translations'};
818
}
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850

=head2 add_alternative_translation

  Args       : Bio::EnsEMBL::Translation $translation
  Example    :

    $transcript->add_alternative_translation($translation);

  Description: Adds an alternative translation to this transcript.
  Returntype : None
  Exceptions : None
  Caller     : General
  Status     : Stable

=cut

sub add_alternative_translation {
  my ( $self, $translation ) = @_;

  if ( !(    defined($translation)
          && ref($translation)
          && $translation->isa('Bio::EnsEMBL::Translation') ) )
  {
    throw("Bio::EnsEMBL::Translation argument expected.");
  }

  # Load the existsing alternative translations from the database if
  # they haven't already been loaded.
  $self->get_all_alternative_translations();

  push( @{ $self->{'alternative_translations'} }, $translation );
}
Arne Stabenau's avatar
Arne Stabenau committed
851

852
853
=head2 spliced_seq

854
855
  Args       : soft_mask (opt)
               if specified, will return a sequence where UTR regions are lowercased
856
857
  Description: Retrieves all Exon sequences and concats them together.
               No phase padding magic is done, even if phases do not align.
858
  Returntype : Text
859
860
  Exceptions : none
  Caller     : general
861
  Status     : Stable
862
863
864
865

=cut

sub spliced_seq {
866
  my ( $self, $soft_mask ) = @_;
867

868
869
  my $seq_string = "";
  for my $ex ( @{$self->get_all_Exons()} ) {
870
871
872
873
874
875
876
    my $seq = $ex->seq();

    if(!$seq) {
      warning("Could not obtain seq for exon.  Transcript sequence may not " .
              "be correct.");
      $seq_string .= 'N' x $ex->length();
    } else {
877
878
879
880
881
882
      my $exon_seq = $seq->seq();
      if ($soft_mask) {
        my $padstr;
        if (!defined ($ex->coding_region_start($self))) {
          $exon_seq = lc($exon_seq);
        } elsif ($ex->coding_region_start($self) > $ex->start()) {
883
884
885
886
887
888
889
          my $forward_length = $ex->coding_region_start($self) - $ex->start();
          my $reverse_length = $ex->end() - $ex->coding_region_start($self);
          if ($ex->strand == 1) {
            $exon_seq = lc (substr($exon_seq, 0, $forward_length)) . substr($exon_seq, $forward_length); 
          } else {
            $exon_seq = substr($exon_seq, 0, $reverse_length) . lc(substr($exon_seq, $reverse_length));
          }
890
        } elsif ($ex->coding_region_end($self) < $ex->end()) {
891
892
893
894
895
896
897
          my $forward_length = $ex->coding_region_end($self) - $ex->start();
          my $reverse_length = $ex->end() - $ex->coding_region_end($self);
          if ($ex->strand == 1) {
            $exon_seq = substr($exon_seq, 0, $forward_length+1) . lc(substr($exon_seq, $forward_length+1));
          } else {
            $exon_seq = lc(substr($exon_seq, 0, $reverse_length)) . substr($exon_seq, $reverse_length);
          }
898
899
900
        }
      }
      $seq_string .= $exon_seq;
901
    }
902
903
  }

904
905
906
  # apply post transcriptional edits
  if($self->edits_enabled()) {
    my @seqeds = @{$self->get_all_SeqEdits()};
907

908
909
910
    # sort edits in reverse order to remove complication of
    # adjusting downstream edits
    @seqeds = sort {$b->start() <=> $a->start()} @seqeds;
911

912
913
    foreach my $se (@seqeds) {
      $se->apply_edit(\$seq_string);
914
    }
915
  }
916

917
  return $seq_string;
918
919
920
921
922
923
924
}


=head2 translateable_seq

  Args       : none
  Example    : print $transcript->translateable_seq(), "\n";
925
  Description: Returns a sequence string which is the the translateable part
926
               of the transcripts sequence.  This is formed by splicing all
927
928
929
930
931
               Exon sequences together and apply all defined RNA edits.
               Then the coding part of the sequence is extracted and returned.
               The code will not support monkey exons any more. If you want to
               have non phase matching exons, defined appropriate _rna_edit
               attributes!
932
933
934

               An empty string is returned if this transcript is a pseudogene
               (i.e. is non-translateable).
935
  Returntype : Text
936
937
  Exceptions : none
  Caller     : general
938
  Status     : Stable
939
940
941
942
943
944

=cut

sub translateable_seq {
  my ( $self ) = @_;

945
  if ( !$self->translation() ) {
946
947
948
    return '';
  }

949
  my $mrna = $self->spliced_seq();
950

951
  my $start = $self->cdna_coding_start();
952
  my $end   = $self->cdna_coding_end();
953

954
  $mrna = substr( $mrna, $start - 1, $end - $start + 1 );
955

956
957
958
959
  my $start_phase = $self->translation->start_Exon->phase();
  if( $start_phase > 0 ) {
    $mrna = "N"x$start_phase . $mrna;
  }
960
961
  if( ! $start || ! $end ) {
    return "";
962
  }
963

964
  return $mrna;
965
966
}

Arne Stabenau's avatar
Arne Stabenau committed
967

968
969
970
971
972
973
974
=head2 cdna_coding_start

  Arg [1]    : (optional) $value
  Example    : $relative_coding_start = $transcript->cdna_coding_start;
  Description: Retrieves the position of the coding start of this transcript
               in cdna coordinates (relative to the start of the 5prime end of
               the transcript, excluding introns, including utrs).
975
976
977

               This will return undef if this is a pseudogene (i.e. a
               transcript with no translation).
978
979
980
  Returntype : int
  Exceptions : none
  Caller     : five_prime_utr, get_all_snps, general
981
  Status     : Stable
982
983
984
985

=cut

sub cdna_coding_start {
986
987
988
989
  my $self = shift;

  if( @_ ) {
    $self->{'cdna_coding_start'} = shift;
990
  }
991

992
  if(!defined $self->{'cdna_coding_start'} && defined $self->translation){
993
    # calc coding start relative from the start of translation (in cdna coords)
994
995
996
    my $start = 0;

    my @exons = @{$self->get_all_Exons};
997
998
    my $exon;

999
    while($exon = shift @exons) {
1000
      if($exon == $self->translation->start_Exon) {
1001
1002
1003
        #add the utr portion of the start exon
        $start += $self->translation->start;
        last;
1004
      } else {
1005
1006
        #add the entire length of this non-coding exon
        $start += $exon->length;
1007
1008
      }
    }
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023

    # adjust cdna coords if sequence edits are enabled
    if($self->edits_enabled()) {
      my @seqeds = @{$self->get_all_SeqEdits()};
      # sort in reverse order to avoid adjustment of downstream edits
      @seqeds = sort {$b->start() <=> $a->start()} @seqeds;

      foreach my $se (@seqeds) {
        # use less than start so that start of CDS can be extended
        if($se->start() < $start) {
          $start += $se->length_diff();
        }
      }
    }

Graham McVicker's avatar
Graham McVicker committed
1024
    $self->{'cdna_coding_start'} = $start;
1025
1026
1027
1028
1029
1030
1031
1032
1033
  }

  return $self->{'cdna_coding_start'};
}


=head2 cdna_coding_end

  Arg [1]    : (optional) $value
1034
  Example    : $cdna_coding_end = $transcript->cdna_coding_end;
1035
1036
  Description: Retrieves the end of the coding region of this transcript in
               cdna coordinates (relative to the five prime end of the
Graham McVicker's avatar
Graham McVicker committed
1037
               transcript, excluding introns, including utrs).
1038
1039
1040
1041

               This will return undef if this transcript is a pseudogene
               (i.e. a transcript with no translation and therefor no CDS).
  Returntype : int
1042
1043
  Exceptions : none
  Caller     : general
1044
  Status     : Stable
1045
1046
1047
1048