Transcript.pm 89.5 KB
Newer Older
1
2
=head1 LICENSE

Magali Ruffier's avatar
Magali Ruffier committed
3
Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
premanand17's avatar
premanand17 committed
4
Copyright [2016-2018] EMBL-European Bioinformatics Institute
5

6
7
8
9
10
11
12
13
14
15
16
17
18
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

=cut
19
20
21
22
23


=head1 CONTACT

  Please email comments or questions to the public Ensembl
Magali Ruffier's avatar
Magali Ruffier committed
24
  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25
26

  Questions may also be sent to the Ensembl help desk at
Magali Ruffier's avatar
Magali Ruffier committed
27
  <http://www.ensembl.org/Help/Contact>.
28
29

=cut
Ewan Birney's avatar
Ewan Birney committed
30
31
32

=head1 NAME

33
Bio::EnsEMBL::Transcript - object representing an Ensembl transcript
Ewan Birney's avatar
Ewan Birney committed
34
35
36

=head1 SYNOPSIS

37
Creation:
38

39
40
  my $tran = new Bio::EnsEMBL::Transcript();
  my $tran = new Bio::EnsEMBL::Transcript( -EXONS => \@exons );
41
42
43

Manipulation:

44
45
  # Returns an array of Exon objects
  my @exons = @{ $tran->get_all_Exons() };
46

47
48
49
50
51
52
  # Returns the peptide translation of the exons as a Bio::Seq
  if ( $tran->translation() ) {
    my $pep = $tran->translate();
  } else {
    print "Transcript ", $tran->stable_id(), " is non-coding\n";
  }
53

54
55
=head1 DESCRIPTION

56
A representation of a transcript within the Ensembl system.  A transcript
57
58
consists of a set of Exons and (possibly) a Translation which defines the
coding and non-coding regions of the exons.
Ewan Birney's avatar
Ewan Birney committed
59

60
=cut
61

62
63
package Bio::EnsEMBL::Transcript;

Ewan Birney's avatar
Ewan Birney committed
64
65
use strict;

66
use Bio::EnsEMBL::Feature;
67
use Bio::EnsEMBL::UTR;
Ian Longden's avatar
Ian Longden committed
68
use Bio::EnsEMBL::Intron;
69
70
use Bio::EnsEMBL::ExonTranscript;
use Bio::EnsEMBL::CDS;
71
use Bio::EnsEMBL::TranscriptMapper;
72
use Bio::EnsEMBL::SeqEdit;
73
use Bio::EnsEMBL::Biotype;
74
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
Magali Ruffier's avatar
Magali Ruffier committed
75
use Bio::EnsEMBL::Utils::Exception qw(warning throw );
76
use Bio::EnsEMBL::Utils::Scalar qw( assert_ref );
77

78
use parent qw(Bio::EnsEMBL::Feature);
Ewan Birney's avatar
Ewan Birney committed
79

Graham McVicker's avatar
Graham McVicker committed
80
81
82
83

=head2 new

  Arg [-EXONS] :
Tiago Grego's avatar
Tiago Grego committed
84
        reference to list of Bio::EnsEMBL::Exon objects - exons which make up 
Graham McVicker's avatar
Graham McVicker committed
85
86
87
88
89
90
91
92
93
94
95
96
97
98
        this transcript
  Arg [-STABLE_ID] :
        string - the stable identifier of this transcript
  Arg [-VERSION] :
        int - the version of the stable identifier of this transcript
  Arg [-EXTERNAL_NAME] :
        string - the external database name associated with this transcript
  Arg [-EXTERNAL_DB] :
        string - the name of the database the external name is from
  Arg [-EXTERNAL_STATUS]:
        string - the status of the external identifier
  Arg [-DISPLAY_XREF]:
        Bio::EnsEMBL::DBEntry - The external database entry that is used
        to label this transcript when it is displayed.
99
100
101
102
103
  Arg [-CREATED_DATE]:
        string - the date the transcript was created
  Arg [-MODIFIED_DATE]:
        string - the date the transcript was last modified
  Arg [-DESCRIPTION]:
104
        string - the transcripts description
Tiago Grego's avatar
Tiago Grego committed
105
  Arg [-BIOTYPE]: 
106
        string - the biotype e.g. "protein_coding"
107
108
  Arg [-IS_CURRENT]:
        Boolean - specifies if this is the current version of the transcript
109
110
111
  Arg [-SOURCE]:
        string - the transcript source, e.g. "ensembl"

Graham McVicker's avatar
Graham McVicker committed
112
113
114
115
116
  Example    : $tran = new Bio::EnsEMBL::Transcript(-EXONS => \@exons);
  Description: Constructor. Instantiates a Transcript object.
  Returntype : Bio::EnsEMBL::Transcript
  Exceptions : throw on bad arguments
  Caller     : general
117
  Status     : Stable
Graham McVicker's avatar
Graham McVicker committed
118
119
120

=cut

121
sub new {
122
  my $proto = shift;
123

124
  my $class = ref($proto) || $proto;
125

126
  my $self = $class->SUPER::new(@_);
Ewan Birney's avatar
Ewan Birney committed
127

128
129
130
131
132
  my (
    $exons,            $stable_id,    $version,
    $external_name,    $external_db,  $external_status,
    $display_xref,     $created_date, $modified_date,
    $description,      $biotype,      $confidence,
133
    $external_db_name, $is_current,
134
    $source
135
  );
136

137
138
139
140
141
    (
      $exons,            $stable_id,    $version,
      $external_name,    $external_db,  $external_status,
      $display_xref,     $created_date, $modified_date,
      $description,      $biotype,      $confidence,
142
      $external_db_name, $is_current,
143
      $source
144
145
146
147
148
149
150
151
      )
      = rearrange( [
        'EXONS',            'STABLE_ID',
        'VERSION',          'EXTERNAL_NAME',
        'EXTERNAL_DB',      'EXTERNAL_STATUS',
        'DISPLAY_XREF',     'CREATED_DATE',
        'MODIFIED_DATE',    'DESCRIPTION',
        'BIOTYPE',          'CONFIDENCE',
152
        'EXTERNAL_DB_NAME',
153
        'IS_CURRENT',       'SOURCE'
154
155
156
157
158
      ],
      @_
      );

  if ($exons) {
159
160
    $self->{'_trans_exon_array'} = $exons;
    $self->recalculate_coordinates();
161
  }
162

163
164
  $self->stable_id($stable_id);
  $self->{'created_date'}  = $created_date;
165
  $self->{'modified_date'} = $modified_date;
166
167
168
169
170
  $self->external_name($external_name) if ( defined $external_name );
  $self->external_db($external_db)     if ( defined $external_db );
  $self->external_status($external_status)
    if ( defined $external_status );
  $self->display_xref($display_xref) if ( defined $display_xref );
171
  $self->edits_enabled(1);
172

173
  $self->description($description);
174

175
  $self->{'biotype'} = $biotype;
176

177
  $self->source($source);
178

179
180
181
182
  # Default version
  if ( !defined($version) ) { $version = 1 }
  $self->{'version'} = $version;

183
  # default is_current
184
  $is_current = 1 unless ( defined($is_current) );
185
  $self->{'is_current'} = $is_current;
186

187
  return $self;
188
} ## end sub new
189

Graham McVicker's avatar
Graham McVicker committed
190
=head2 get_all_DBLinks
191

192
193
194
195
  Arg [1]    : String database name (optional)
               SQL wildcard characters (_ and %) can be used to
               specify patterns.

196
197
198
199
200
  Arg [2]    : (optional) String, external database type, can be one of
               ('ARRAY','ALT_TRANS','ALT_GENE','MISC','LIT','PRIMARY_DB_SYNONYM','ENSEMBL'),
               SQL wildcard characters (_ and %) can be used to
               specify patterns.

201
  Example    : my @dblinks = @{ $transcript->get_all_DBLinks() };
202
203
               @dblinks = @{ $transcript->get_all_DBLinks('Uniprot%') };}
               @dblinks = @{ $transcript->get_all_DBLinks('%', 'ENSEMBL') };
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219

  Description: Retrieves *all* related DBEntries for this
               transcript.  This includes all DBEntries that are
               associated with the corresponding translation.

               If you only want to retrieve the DBEntries associated
               with the transcript (and not the translation) then
               you should use the get_all_DBEntries() call instead.

               Note: Each entry may be listed more than once.  No
               uniqueness checks are done.  Also if you put in an
               incorrect external database name no checks are done
               to see if this exists, you will just get an empty
               list.

  Return type: Listref of Bio::EnsEMBL::DBEntry objects
220
221
  Exceptions : none
  Caller     : general
222
  Status     : Stable
223
224
225

=cut

Graham McVicker's avatar
Graham McVicker committed
226
sub get_all_DBLinks {
227
  my ( $self, $db_name_exp, $ex_db_type ) = @_;
228

229
230
  my @links =
    @{ $self->get_all_DBEntries( $db_name_exp, $ex_db_type ) };
231

232
  # Add all of the transcript and translation xrefs to the return list.
233
234
235
  my $translation = $self->translation();
  if ( defined($translation) ) {
    push( @links,
236
          @{$translation->get_all_DBEntries( $db_name_exp, $ex_db_type ) }
237
238
    );
  }
239

240
  @links = sort { _compare_xrefs() } @links;
241

242
  return \@links;
243
244
}

245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
=head2 get_all_xrefs

  Arg [1]    : String database name (optional)
               SQL wildcard characters (_ and %) can be used to
               specify patterns.

  Example    : @xrefs = @{ $transcript->get_all_xrefs() };
               @xrefs = @{ $transcript->get_all_xrefs('Uniprot%') };

  Description: Retrieves *all* related xrefs for this transcript.
               This includes all xrefs that are associated with the
               corresponding translation of this transcript.

               If you want to retrieve the xrefs associated with
               only the transcript (and not the translation) then
               you should use the get_all_object_xrefs() method
               instead.

               Note: Each entry may be listed more than once.  No
               uniqueness checks are done.  Also if you put in an
               incorrect external database name no checks are done
               to see if this exists, you will just get an empty
               list.

                NB: This method is an alias for the
                    get_all_DBLinks() method.

  Return type: Listref of Bio::EnsEMBL::DBEntry objects

  Status     : Stable

=cut

278
279
280
281
sub get_all_xrefs {
  my $self = shift;
  return $self->get_all_DBLinks(@_);
}
Graham McVicker's avatar
Graham McVicker committed
282

283
=head2 get_all_DBEntries
284

285
286
287
  Arg [1]    : (optional) String, external database name,
               SQL wildcard characters (_ and %) can be used to
               specify patterns.
288

289
290
291
292
  Arg [2]    : (optional) String, external database type, can be one of
               ('ARRAY','ALT_TRANS','ALT_GENE','MISC','LIT','PRIMARY_DB_SYNONYM','ENSEMBL'),
               SQL wildcard characters (_ and %) can be used to
               specify patterns.
293

294
  Example    : my @dbentries = @{ $transcript->get_all_DBEntries() };
295
296
               @dbentries = @{ $transcript->get_all_DBEntries('Uniprot%') };}
               @dbentries = @{ $transcript->get_all_DBEntries('%', 'ENSEMBL') };}
297
298
299
300
301
302
303
304
305
306

  Description: Retrieves DBEntries (xrefs) for this transcript.
               This does *not* include the corresponding
               translations DBEntries (see get_all_DBLinks()).

               This method will attempt to lazy-load DBEntries
               from a database if an adaptor is available and no
               DBEntries are present on the transcript (i.e. they
               have not already been added or loaded).

307
  Returntype : Listref of Bio::EnsEMBL::DBEntry objects
308
309
  Exceptions : none
  Caller     : get_all_DBLinks, TranscriptAdaptor::store
310
  Status     : Stable
311
312
313

=cut

314
sub get_all_DBEntries {
315
  my ( $self, $ex_db_exp, $ex_db_type ) = @_;
316

317
  my $cache_name = 'dbentries';
318

319
  if ( defined($ex_db_exp) ) {
320
321
    $cache_name .= $ex_db_exp;
  }
322

323
  if ( defined($ex_db_type) ) {
Ian Longden's avatar
Ian Longden committed
324
    $cache_name .= $ex_db_type;
325
  }
326

327
328
329
  # if not cached, retrieve all of the xrefs for this transcript
  if ( !defined( $self->{$cache_name} ) && defined( $self->adaptor() ) )
  {
330
    $self->{$cache_name} =
331
332
      $self->adaptor()->db()->get_DBEntryAdaptor()
      ->fetch_all_by_Transcript( $self, $ex_db_exp, $ex_db_type );
333
  }
334

335
  $self->{$cache_name} ||= [];
336

337
  return $self->{$cache_name};
338
} ## end sub get_all_DBEntries
339

340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
=head2 get_all_object_xrefs

  Arg [1]    : (optional) String, external database name

  Arg [2]    : (optional) String, external_db type

  Example    : @oxrefs = @{ $transcript->get_all_object_xrefs() };

  Description: Retrieves xrefs for this transcript.  This does
               *not* include xrefs that are associated with the
               corresponding translations of this transcript (see
               get_all_xrefs()).

               This method will attempt to lazy-load xrefs from a
               database if an adaptor is available and no xrefs are
               present on the transcript (i.e. they have not already
               been added or loaded).

358
359
                NB: This method is an alias for the
                    get_all_DBentries() method.
360
361
362
363
364
365
366

  Return type: Listref of Bio::EnsEMBL::DBEntry objects

  Status     : Stable

=cut

367
368
369
370
sub get_all_object_xrefs {
  my $self = shift;
  return $self->get_all_DBEntries(@_);
}
371
372

=head2 add_DBEntry
373

374
375
  Arg [1]    : Bio::EnsEMBL::DBEntry $dbe
               The dbEntry to be added
376
377
378
379
380
  Example    : my $dbe = Bio::EnsEMBL::DBEntery->new(...);
               $transcript->add_DBEntry($dbe);
  Description: Associates a DBEntry with this transcript. Note that adding
               DBEntries will prevent future lazy-loading of DBEntries for this
               gene (see get_all_DBEntries).
381
382
383
  Returntype : none
  Exceptions : thrown on incorrect argument type
  Caller     : general
384
  Status     : Stable
385
386
387
388
389
390
391
392

=cut

sub add_DBEntry {
  my $self = shift;
  my $dbe = shift;

  unless($dbe && ref($dbe) && $dbe->isa('Bio::EnsEMBL::DBEntry')) {
393
    throw('Expected DBEntry argument');
394
395
396
397
  }

  $self->{'dbentries'} ||= [];
  push @{$self->{'dbentries'}}, $dbe;
398
399
}

400

401
402
=head2 get_all_supporting_features

403
  Example    : my @evidence = @{ $transcript->get_all_supporting_features };
Tiago Grego's avatar
Tiago Grego committed
404
  Description: Retrieves any supporting features added manually by 
405
               calls to add_supporting_features.
406
  Returntype : Listref of Bio::EnsEMBL::FeaturePair objects
407
408
  Exceptions : none
  Caller     : general
409
  Status     : Stable
410
411
412
413
414

=cut

sub get_all_supporting_features {
  my $self = shift;
415
416
417
418
419
420
421

  if( !exists  $self->{_supporting_evidence} )  {
    if($self->adaptor) {
      my $tsfa = $self->adaptor->db->get_TranscriptSupportingFeatureAdaptor();
      $self->{_supporting_evidence} = $tsfa->fetch_all_by_Transcript($self);
    }
  }
Tiago Grego's avatar
Tiago Grego committed
422
  
423
424
425
426
427
428
  return $self->{_supporting_evidence} || [];
}


=head2 add_supporting_features

429
430
431
  Arg [1-N]  : Bio::EnsEMBL::FeaturePair $feature
               The supporting features to add
  Example    : $transcript->add_supporting_features(@features);
432
  Description: Adds a list of supporting features to this Transcript.
433
434
               The added features can be retieved by
               get_all_supporting_features().
435
436
437
438
439
  Returntype : none
  Exceptions : throw if any of the features are not FeaturePairs
               throw if any of the features are not in the same coordinate
               system as the Transcript
  Caller     : general
440
  Status     : Stable
Tiago Grego's avatar
Tiago Grego committed
441
 
442
=cut
Tiago Grego's avatar
Tiago Grego committed
443
 
444
sub add_supporting_features {
445
  my ($self, @features) = @_;
446
447

  return unless @features;
Tiago Grego's avatar
Tiago Grego committed
448
 
449
  $self->{_supporting_evidence} ||= [];
Tiago Grego's avatar
Tiago Grego committed
450
  
451
452
453
  # check whether this feature object has been added already
  FEATURE: foreach my $feature (@features) {

454
455
456
457
458
    if (!defined($feature) || ref($feature) eq "ARRAY") {
      throw("Element in transcript supporting features array is undefined or is an ARRAY for " . $self->dbID);
    }
    if (!$feature || !$feature->isa("Bio::EnsEMBL::FeaturePair")) {
      print "feature = " . $feature . "\n";
459
460
      throw("Supporting feat [$feature] not a " .
            "Bio::EnsEMBL::FeaturePair");
Tiago Grego's avatar
Tiago Grego committed
461
462
    } 
    
463
    if ((defined $self->slice() && defined $feature->slice())&&
464
      ( $self->slice()->name() ne $feature->slice()->name())){
465
466
      throw("Supporting feat not in same coord system as transcript\n" .
            "transcript is attached to [".$self->slice()->name()."]\n" .
467
468
469
470
471
472
            "feat is attached to [".$feature->slice()->name()."]");
    }

    foreach my $added_feature ( @{ $self->{_supporting_evidence} } ){
      # compare objects
      if ( $feature == $added_feature ){
473
474
  #this feature has already been added
  next FEATURE;
475
476
      }
    }
Tiago Grego's avatar
Tiago Grego committed
477
    
478
    #no duplicate was found, add the feature
479
    push(@{$self->{_supporting_evidence}}, $feature);
480
481
482
483
  }
}


484
485
486
487
488
489
490
=head2 flush_supporting_features

  Example     : $transcript->flush_supporting_features;
  Description : Removes all supporting evidence from the transcript.
  Return type : (Empty) listref
  Exceptions  : none
  Caller      : general
491
  Status      : Stable
492
493
494
495
496
497
498
499
500

=cut

sub flush_supporting_features {
  my $self = shift;
  $self->{'_supporting_evidence'} = [];
}


501
502
=head2 external_db

503
504
  Arg [1]    : (optional) String - name of external db to set
  Example    : $transcript->external_db('HGNC');
Tiago Grego's avatar
Tiago Grego committed
505
506
  Description: Getter/setter for attribute external_db. The db is the one that 
               belongs to the external_name.  
507
508
509
510
  Returntype : String
  Exceptions : none
  Caller     : general
  Status     : Stable
511
512
513
514

=cut

sub external_db {
515
  my ( $self, $ext_dbname ) = @_;
516

Tiago Grego's avatar
Tiago Grego committed
517
  if(defined $ext_dbname) { 
518
    return ( $self->{'external_db'} = $ext_dbname );
519
  }
520

521
522
  if( exists $self->{'external_db'} ) {
    return $self->{'external_db'};
523
  }
524

525
  my $display_xref = $self->display_xref();
526

527
528
529
530
531
  if( defined $display_xref ) {
    return $display_xref->dbname()
  } else {
    return undef;
  }
532
533
}

534

535
536
=head2 external_status

537
538
539
540
541
542
543
544
  Arg [1]    : (optional) String - status of the external db
  Example    : $transcript->external_status('KNOWNXREF');
  Description: Getter/setter for attribute external_status. The status of
               the external db of the one that belongs to the external_name.
  Returntype : String
  Exceptions : none
  Caller     : general
  Status     : Stable
545
546
547

=cut

Tiago Grego's avatar
Tiago Grego committed
548
sub external_status { 
549
550
551
  my ( $self, $ext_status ) = @_;

  if(defined $ext_status) {
552
    return ( $self->{'external_status'} = $ext_status );
553
554
  }

555
556
  if( exists $self->{'external_status'} ) {
    return $self->{'external_status'};
557
558
559
560
561
562
563
564
565
566
567
568
  }

  my $display_xref = $self->display_xref();

  if( defined $display_xref ) {
    return $display_xref->status()
  } else {
    return undef;
  }
}


569
570
=head2 external_name

571
572
573
574
575
576
577
  Arg [1]    : (optional) String - the external name to set
  Example    : $transcript->external_name('BRCA2-001');
  Description: Getter/setter for attribute external_name.
  Returntype : String or undef
  Exceptions : none
  Caller     : general
  Status     : Stable
578
579
580
581

=cut

sub external_name {
582
  my ($self, $ext_name) = @_;
583

Tiago Grego's avatar
Tiago Grego committed
584
  if(defined $ext_name) { 
585
    return ( $self->{'external_name'} = $ext_name );
586
  }
587

588
589
  if( exists $self->{'external_name'} ) {
    return $self->{'external_name'};
590
  }
591

592
  my $display_xref = $self->display_xref();
593

594
595
596
597
598
  if( defined $display_xref ) {
    return $display_xref->display_id()
  } else {
    return undef;
  }
599
600
}

601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
=head2 source

  Arg [1]    : (optional) String - the source to set
  Example    : $transcript->source('ensembl');
  Description: Getter/setter for attribute source
  Returntype : String
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub source {
  my $self = shift;
  $self->{'source'} = shift if( @_ );
  return ( $self->{'source'} || "ensembl" );
}
618

Alistair Rust's avatar
Alistair Rust committed
619
=head2 display_xref
620

621
622
623
  Arg [1]    : (optional) Bio::EnsEMBL::DBEntry - the display xref to set
  Example    : $transcript->display_xref($db_entry);
  Description: Getter/setter for display_xref for this transcript.
624
  Returntype : Bio::EnsEMBL::DBEntry
625
626
  Exceptions : none
  Caller     : general
627
  Status     : Stable
628
629
630

=cut

631
sub display_xref {
632
633
634
  my $self = shift;
  $self->{'display_xref'} = shift if(@_);
  return $self->{'display_xref'};
Ewan Birney's avatar
Ewan Birney committed
635
636
}

637
638
639
640
641
642
643
=head2 is_canonical

  Args [1]      : (optional) Boolean is_canonical

  Example       : if ($transcript->is_canonical()) { ... }

  Description : Returns true (non-zero) if the transcript is the
Andy Yates's avatar
Andy Yates committed
644
645
646
647
648
                canonical transcript of its gene, false (0) if not. If the code
                returns an undefined it is because its state is not currently
                known. Internally the code will consult the database for this
                value if it is unknown and the transcript has a dbID and an
                attached adaptor
649
650
651
652
653
654
655
656
657

  Return type   : Boolean

  Status        : Stable

=cut

sub is_canonical {
  my ( $self, $value ) = @_;
Tiago Grego's avatar
Tiago Grego committed
658
  
659
660
  #Shortcut call
  return $self->{is_canonical} if defined $self->{is_canonical};
Tiago Grego's avatar
Tiago Grego committed
661
  
662
  if ( defined($value) ) {
663
664
665
666
667
668
    $self->{is_canonical} = ( $value ? 1 : 0 );
  }
  else {
    if(! defined $self->{is_canonical} && $self->dbID() && $self->adaptor()) {
      $self->{is_canonical} = $self->adaptor()->is_Transcript_canonical($self);
    }
669
670
  }

671
  return $self->{is_canonical};
672
}
Ewan Birney's avatar
Ewan Birney committed
673
674
675

=head2 translation

676
677
678
  Args       : None
  Example    : if ( $transcript->translation() ) {
                 print( $transcript->translation()->stable_id(), "\n" );
679
               } else {
680
                 print("Pseudogene\n");
681
               }
682
683
684
685
686
687
688
  Description: Getter/setter for the Translation object which
               defines the CDS (and as a result the peptide encoded
               by) this transcript.  This function will return
               undef if this transcript is a pseudogene, i.e. a
               non-translating transcript such as an ncRNA.  This
               is the accepted method of determining whether a
               transcript is a pseudogene or not.
689
  Returntype : Bio::EnsEMBL::Translation
690
691
  Exceptions : none
  Caller     : general
692
  Status     : Stable
Ewan Birney's avatar
Ewan Birney committed
693
694
695

=cut

Michele Clamp's avatar
Michele Clamp committed
696
sub translation {
697
  my ( $self, $translation ) = @_;
698

699
  if ( defined($translation) ) {
700
    assert_ref( $translation, 'Bio::EnsEMBL::Translation' );
701

702
703
    $self->{'translation'} = $translation;
    $translation->transcript($self);
704

705
706
    $self->{'cdna_coding_start'} = undef;
    $self->{'cdna_coding_end'}   = undef;
707
708
709
710

    $self->{'coding_region_start'} = undef;
    $self->{'coding_region_end'}   = undef;

711
    $self->{'transcript_mapper'} = undef;
712

713
714
715
  } elsif ( @_ > 1 ) {
    if ( defined( $self->{'translation'} ) ) {
      # Removing existing translation
716

717
718
      $self->{'translation'}->transcript(undef);
      delete( $self->{'translation'} );
719

720
721
      $self->{'cdna_coding_start'} = undef;
      $self->{'cdna_coding_end'}   = undef;
722

723
724
      $self->{'coding_region_start'} = undef;
      $self->{'coding_region_end'}   = undef;
725

726
727
      $self->{'transcript_mapper'} = undef;
    }
728
  } elsif (   !exists( $self->{'translation'} )
729
            && defined( $self->adaptor() ) )
730
  {
731
    $self->{'translation'} =
732
733
      $self->adaptor()->db()->get_TranslationAdaptor()
      ->fetch_by_Transcript($self);
734
  }
735

736
  return $self->{'translation'};
737
} ## end sub translation
Ewan Birney's avatar
Ewan Birney committed
738

739
740
741
742
743
744
745
746
747
748
749
=head2 get_all_alternative_translations

  Args       : None
  Example    :

    my @alt_translations =
      @{ $transcript->get_all_alternative_translations() };

  Description:  Fetches all alternative translations defined for this
                transcript.  The canonical translation is not returned.

750
  Returntype : Arrayref to Bio::EnsEMBL::Translation
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
751
  Exceptions : None
752
753
754
755
756
757
758
759
  Caller     : General
  Status     : Stable

=cut

sub get_all_alternative_translations {
  my ($self) = @_;

760
761
762
  if (   !exists( $self->{'alternative_translations'} )
       && defined( $self->adaptor() ) )
  {
763
764
765
    my $pa = $self->adaptor()->db()->get_TranslationAdaptor();
    my @translations =
      @{ $pa->fetch_all_alternative_by_Transcript($self) };
766

767
    $self->{'alternative_translations'} = \@translations;
768
769
770
  }

  return $self->{'alternative_translations'};
771
}
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803

=head2 add_alternative_translation

  Args       : Bio::EnsEMBL::Translation $translation
  Example    :

    $transcript->add_alternative_translation($translation);

  Description: Adds an alternative translation to this transcript.
  Returntype : None
  Exceptions : None
  Caller     : General
  Status     : Stable

=cut

sub add_alternative_translation {
  my ( $self, $translation ) = @_;

  if ( !(    defined($translation)
          && ref($translation)
          && $translation->isa('Bio::EnsEMBL::Translation') ) )
  {
    throw("Bio::EnsEMBL::Translation argument expected.");
  }

  # Load the existsing alternative translations from the database if
  # they haven't already been loaded.
  $self->get_all_alternative_translations();

  push( @{ $self->{'alternative_translations'} }, $translation );
}
Arne Stabenau's avatar
Arne Stabenau committed
804

805
806
=head2 spliced_seq

807
808
  Args       : soft_mask (opt)
               if specified, will return a sequence where UTR regions are lowercased
809
810
  Description: Retrieves all Exon sequences and concats them together.
               No phase padding magic is done, even if phases do not align.
811
  Returntype : Text
812
813
  Exceptions : none
  Caller     : general
814
  Status     : Stable
815
816
817
818

=cut

sub spliced_seq {
819
  my ( $self, $soft_mask ) = @_;
820

821
822
  my $seq_string = "";
  for my $ex ( @{$self->get_all_Exons()} ) {
823
824
825
826
827
828
    my $seq = $ex->seq();
    if(!$seq) {
      warning("Could not obtain seq for exon.  Transcript sequence may not " .
              "be correct.");
      $seq_string .= 'N' x $ex->length();
    } else {
829
830
831
832
833
      my $exon_seq = $seq->seq();
      if ($soft_mask) {
        my $padstr;
        if (!defined ($ex->coding_region_start($self))) {
          $exon_seq = lc($exon_seq);
834
835
836
        }

        if ($ex->coding_region_start($self) > $ex->start()) {
837
838
839
          my $forward_length = $ex->coding_region_start($self) - $ex->start();
          my $reverse_length = $ex->end() - $ex->coding_region_start($self);
          if ($ex->strand == 1) {
Tiago Grego's avatar
Tiago Grego committed
840
            $exon_seq = lc (substr($exon_seq, 0, $forward_length)) . substr($exon_seq, $forward_length); 
841
          } else {
842
            $exon_seq = substr($exon_seq, 0, $reverse_length+1) . lc(substr($exon_seq, $reverse_length+1));
843
          }
844
845
846
        }

        if ($ex->coding_region_end($self) < $ex->end()) {
847
848
849
850
851
852
853
          my $forward_length = $ex->coding_region_end($self) - $ex->start();
          my $reverse_length = $ex->end() - $ex->coding_region_end($self);
          if ($ex->strand == 1) {
            $exon_seq = substr($exon_seq, 0, $forward_length+1) . lc(substr($exon_seq, $forward_length+1));
          } else {
            $exon_seq = lc(substr($exon_seq, 0, $reverse_length)) . substr($exon_seq, $reverse_length);
          }
854
855
856
        }
      }
      $seq_string .= $exon_seq;
857
    }
858
859
  }

860
861
862
  # apply post transcriptional edits
  if($self->edits_enabled()) {
    my @seqeds = @{$self->get_all_SeqEdits()};
863

864
865
866
    # sort edits in reverse order to remove complication of
    # adjusting downstream edits
    @seqeds = sort {$b->start() <=> $a->start()} @seqeds;
867

868
869
    foreach my $se (@seqeds) {
      $se->apply_edit(\$seq_string);
870
    }
871
  }
872

873
  return $seq_string;
874
875
876
877
878
879
880
}


=head2 translateable_seq

  Args       : none
  Example    : print $transcript->translateable_seq(), "\n";
881
  Description: Returns a sequence string which is the the translateable part
882
               of the transcripts sequence.  This is formed by splicing all
883
884
885
886
887
               Exon sequences together and apply all defined RNA edits.
               Then the coding part of the sequence is extracted and returned.
               The code will not support monkey exons any more. If you want to
               have non phase matching exons, defined appropriate _rna_edit
               attributes!
888
889
890

               An empty string is returned if this transcript is a pseudogene
               (i.e. is non-translateable).
891
  Returntype : Text
892
893
  Exceptions : none
  Caller     : general
894
  Status     : Stable
895
896
897
898
899
900

=cut

sub translateable_seq {
  my ( $self ) = @_;

901
  if ( !$self->translation() ) {
902
903
904
    return '';
  }

905
  my $mrna = $self->spliced_seq();
906

907
  my $start = $self->cdna_coding_start();
908
  my $end   = $self->cdna_coding_end();
909

910
  $mrna = substr( $mrna, $start - 1, $end - $start + 1 );
911

912
913
914
915
  my $start_phase = $self->translation->start_Exon->phase();
  if( $start_phase > 0 ) {
    $mrna = "N"x$start_phase . $mrna;
  }
916
917
  if( ! $start || ! $end ) {
    return "";
918
  }
919

920
  return $mrna;
921
922
}

Arne Stabenau's avatar
Arne Stabenau committed
923

924
925
926
927
928
929
930
=head2 cdna_coding_start

  Arg [1]    : (optional) $value
  Example    : $relative_coding_start = $transcript->cdna_coding_start;
  Description: Retrieves the position of the coding start of this transcript
               in cdna coordinates (relative to the start of the 5prime end of
               the transcript, excluding introns, including utrs).
931
932
933

               This will return undef if this is a pseudogene (i.e. a
               transcript with no translation).
934
935
936
  Returntype : int
  Exceptions : none
  Caller     : five_prime_utr, get_all_snps, general
937
  Status     : Stable
938
939
940
941

=cut

sub cdna_coding_start {
942
943
944
945
  my $self = shift;

  if( @_ ) {
    $self->{'cdna_coding_start'} = shift;
946
  }
947

948
  if(!defined $self->{'cdna_coding_start'} && defined $self->translation){
949
    # calc coding start relative from the start of translation (in cdna coords)
950
951
952
    my $start = 0;

    my @exons = @{$self->get_all_Exons};
953
954
    my $exon;

955
    while($exon = shift @exons) {
956
      if($exon == $self->translation->start_Exon) {
957
958
959
        #add the utr portion of the start exon
        $start += $self->translation->start;
        last;
960
      } else {
961
962
        #add the entire length of this non-coding exon
        $start += $exon->length;
963
964
      }
    }
965
966
967
968

    # adjust cdna coords if sequence edits are enabled
    if($self->edits_enabled()) {
      my @seqeds = @{$self->get_all_SeqEdits()};
969
970
971
972
973
974
975
976
977
978
979
980
981
982
      if (scalar @seqeds) {
        my $transl_start = $self->get_all_Attributes('_transl_start');
        if (@{$transl_start}) {
          $start = $transl_start->[0]->value;
        } else {
          # sort in reverse order to avoid adjustment of downstream edits
          @seqeds = sort {$b->start() <=> $a->start()} @seqeds;

          foreach my $se (@seqeds) {
            # use less than start so that start of CDS can be extended
            if($se->start() < $start) {
              $start += $se->length_diff