Transcript.pm 89.6 KB
Newer Older
1
2
=head1 LICENSE

Magali Ruffier's avatar
Magali Ruffier committed
3
Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Tiago Grego's avatar
Tiago Grego committed
4
Copyright [2016-2019] EMBL-European Bioinformatics Institute
5

6
7
8
9
10
11
12
13
14
15
16
17
18
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

=cut
19
20
21
22
23


=head1 CONTACT

  Please email comments or questions to the public Ensembl
Magali Ruffier's avatar
Magali Ruffier committed
24
  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25
26

  Questions may also be sent to the Ensembl help desk at
Magali Ruffier's avatar
Magali Ruffier committed
27
  <http://www.ensembl.org/Help/Contact>.
28
29

=cut
Ewan Birney's avatar
Ewan Birney committed
30
31
32

=head1 NAME

33
Bio::EnsEMBL::Transcript - object representing an Ensembl transcript
Ewan Birney's avatar
Ewan Birney committed
34
35
36

=head1 SYNOPSIS

37
Creation:
38

39
40
  my $tran = new Bio::EnsEMBL::Transcript();
  my $tran = new Bio::EnsEMBL::Transcript( -EXONS => \@exons );
41
42
43

Manipulation:

44
45
  # Returns an array of Exon objects
  my @exons = @{ $tran->get_all_Exons() };
46

47
48
49
50
51
52
  # Returns the peptide translation of the exons as a Bio::Seq
  if ( $tran->translation() ) {
    my $pep = $tran->translate();
  } else {
    print "Transcript ", $tran->stable_id(), " is non-coding\n";
  }
53

54
55
=head1 DESCRIPTION

56
A representation of a transcript within the Ensembl system.  A transcript
57
58
consists of a set of Exons and (possibly) a Translation which defines the
coding and non-coding regions of the exons.
Ewan Birney's avatar
Ewan Birney committed
59

60
=cut
61

62
63
package Bio::EnsEMBL::Transcript;

Ewan Birney's avatar
Ewan Birney committed
64
65
use strict;

66
use Bio::EnsEMBL::Feature;
67
use Bio::EnsEMBL::UTR;
Ian Longden's avatar
Ian Longden committed
68
use Bio::EnsEMBL::Intron;
69
70
use Bio::EnsEMBL::ExonTranscript;
use Bio::EnsEMBL::CDS;
71
use Bio::EnsEMBL::TranscriptMapper;
72
use Bio::EnsEMBL::SeqEdit;
73
use Bio::EnsEMBL::Biotype;
74
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
Magali Ruffier's avatar
Magali Ruffier committed
75
use Bio::EnsEMBL::Utils::Exception qw(warning throw );
76
use Bio::EnsEMBL::Utils::Scalar qw( assert_ref );
77

78
use parent qw(Bio::EnsEMBL::Feature);
Ewan Birney's avatar
Ewan Birney committed
79

80
81
82
83
use constant SEQUENCE_ONTOLOGY => {
  acc  => 'SO:0000673',
  term => 'transcript',
};
Graham McVicker's avatar
Graham McVicker committed
84
85
86
87

=head2 new

  Arg [-EXONS] :
Tiago Grego's avatar
Tiago Grego committed
88
        reference to list of Bio::EnsEMBL::Exon objects - exons which make up 
Graham McVicker's avatar
Graham McVicker committed
89
90
91
92
93
94
95
96
97
98
99
100
101
102
        this transcript
  Arg [-STABLE_ID] :
        string - the stable identifier of this transcript
  Arg [-VERSION] :
        int - the version of the stable identifier of this transcript
  Arg [-EXTERNAL_NAME] :
        string - the external database name associated with this transcript
  Arg [-EXTERNAL_DB] :
        string - the name of the database the external name is from
  Arg [-EXTERNAL_STATUS]:
        string - the status of the external identifier
  Arg [-DISPLAY_XREF]:
        Bio::EnsEMBL::DBEntry - The external database entry that is used
        to label this transcript when it is displayed.
103
104
105
106
107
  Arg [-CREATED_DATE]:
        string - the date the transcript was created
  Arg [-MODIFIED_DATE]:
        string - the date the transcript was last modified
  Arg [-DESCRIPTION]:
108
        string - the transcripts description
Tiago Grego's avatar
Tiago Grego committed
109
  Arg [-BIOTYPE]: 
110
        string - the biotype e.g. "protein_coding"
111
112
  Arg [-IS_CURRENT]:
        Boolean - specifies if this is the current version of the transcript
113
114
115
  Arg [-SOURCE]:
        string - the transcript source, e.g. "ensembl"

Graham McVicker's avatar
Graham McVicker committed
116
117
118
119
120
  Example    : $tran = new Bio::EnsEMBL::Transcript(-EXONS => \@exons);
  Description: Constructor. Instantiates a Transcript object.
  Returntype : Bio::EnsEMBL::Transcript
  Exceptions : throw on bad arguments
  Caller     : general
121
  Status     : Stable
Graham McVicker's avatar
Graham McVicker committed
122
123
124

=cut

125
sub new {
126
  my $proto = shift;
127

128
  my $class = ref($proto) || $proto;
129

130
  my $self = $class->SUPER::new(@_);
Ewan Birney's avatar
Ewan Birney committed
131

132
133
134
135
136
  my (
    $exons,            $stable_id,    $version,
    $external_name,    $external_db,  $external_status,
    $display_xref,     $created_date, $modified_date,
    $description,      $biotype,      $confidence,
137
    $external_db_name, $is_current,
138
    $source
139
  );
140

141
142
143
144
145
    (
      $exons,            $stable_id,    $version,
      $external_name,    $external_db,  $external_status,
      $display_xref,     $created_date, $modified_date,
      $description,      $biotype,      $confidence,
146
      $external_db_name, $is_current,
147
      $source
148
149
150
151
152
153
154
155
      )
      = rearrange( [
        'EXONS',            'STABLE_ID',
        'VERSION',          'EXTERNAL_NAME',
        'EXTERNAL_DB',      'EXTERNAL_STATUS',
        'DISPLAY_XREF',     'CREATED_DATE',
        'MODIFIED_DATE',    'DESCRIPTION',
        'BIOTYPE',          'CONFIDENCE',
156
        'EXTERNAL_DB_NAME',
157
        'IS_CURRENT',       'SOURCE'
158
159
160
161
162
      ],
      @_
      );

  if ($exons) {
163
164
    $self->{'_trans_exon_array'} = $exons;
    $self->recalculate_coordinates();
165
  }
166

167
168
  $self->stable_id($stable_id);
  $self->{'created_date'}  = $created_date;
169
  $self->{'modified_date'} = $modified_date;
170
171
172
173
174
  $self->external_name($external_name) if ( defined $external_name );
  $self->external_db($external_db)     if ( defined $external_db );
  $self->external_status($external_status)
    if ( defined $external_status );
  $self->display_xref($display_xref) if ( defined $display_xref );
175
  $self->edits_enabled(1);
176

177
  $self->description($description);
178

179
  $self->{'biotype'} = $biotype;
180

181
  $self->source($source);
182

183
184
185
186
  # Default version
  if ( !defined($version) ) { $version = 1 }
  $self->{'version'} = $version;

187
  # default is_current
188
  $is_current = 1 unless ( defined($is_current) );
189
  $self->{'is_current'} = $is_current;
190

191
  return $self;
192
} ## end sub new
193

Graham McVicker's avatar
Graham McVicker committed
194
=head2 get_all_DBLinks
195

196
197
198
199
  Arg [1]    : String database name (optional)
               SQL wildcard characters (_ and %) can be used to
               specify patterns.

200
201
202
203
204
  Arg [2]    : (optional) String, external database type, can be one of
               ('ARRAY','ALT_TRANS','ALT_GENE','MISC','LIT','PRIMARY_DB_SYNONYM','ENSEMBL'),
               SQL wildcard characters (_ and %) can be used to
               specify patterns.

205
  Example    : my @dblinks = @{ $transcript->get_all_DBLinks() };
206
207
               @dblinks = @{ $transcript->get_all_DBLinks('Uniprot%') };}
               @dblinks = @{ $transcript->get_all_DBLinks('%', 'ENSEMBL') };
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223

  Description: Retrieves *all* related DBEntries for this
               transcript.  This includes all DBEntries that are
               associated with the corresponding translation.

               If you only want to retrieve the DBEntries associated
               with the transcript (and not the translation) then
               you should use the get_all_DBEntries() call instead.

               Note: Each entry may be listed more than once.  No
               uniqueness checks are done.  Also if you put in an
               incorrect external database name no checks are done
               to see if this exists, you will just get an empty
               list.

  Return type: Listref of Bio::EnsEMBL::DBEntry objects
224
225
  Exceptions : none
  Caller     : general
226
  Status     : Stable
227
228
229

=cut

Graham McVicker's avatar
Graham McVicker committed
230
sub get_all_DBLinks {
231
  my ( $self, $db_name_exp, $ex_db_type ) = @_;
232

233
234
  my @links =
    @{ $self->get_all_DBEntries( $db_name_exp, $ex_db_type ) };
235

236
  # Add all of the transcript and translation xrefs to the return list.
237
238
239
  my $translation = $self->translation();
  if ( defined($translation) ) {
    push( @links,
240
          @{$translation->get_all_DBEntries( $db_name_exp, $ex_db_type ) }
241
242
    );
  }
243

244
  @links = sort { _compare_xrefs() } @links;
245

246
  return \@links;
247
248
}

249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
=head2 get_all_xrefs

  Arg [1]    : String database name (optional)
               SQL wildcard characters (_ and %) can be used to
               specify patterns.

  Example    : @xrefs = @{ $transcript->get_all_xrefs() };
               @xrefs = @{ $transcript->get_all_xrefs('Uniprot%') };

  Description: Retrieves *all* related xrefs for this transcript.
               This includes all xrefs that are associated with the
               corresponding translation of this transcript.

               If you want to retrieve the xrefs associated with
               only the transcript (and not the translation) then
               you should use the get_all_object_xrefs() method
               instead.

               Note: Each entry may be listed more than once.  No
               uniqueness checks are done.  Also if you put in an
               incorrect external database name no checks are done
               to see if this exists, you will just get an empty
               list.

                NB: This method is an alias for the
                    get_all_DBLinks() method.

  Return type: Listref of Bio::EnsEMBL::DBEntry objects

  Status     : Stable

=cut

282
283
284
285
sub get_all_xrefs {
  my $self = shift;
  return $self->get_all_DBLinks(@_);
}
Graham McVicker's avatar
Graham McVicker committed
286

287
=head2 get_all_DBEntries
288

289
290
291
  Arg [1]    : (optional) String, external database name,
               SQL wildcard characters (_ and %) can be used to
               specify patterns.
292

293
294
295
296
  Arg [2]    : (optional) String, external database type, can be one of
               ('ARRAY','ALT_TRANS','ALT_GENE','MISC','LIT','PRIMARY_DB_SYNONYM','ENSEMBL'),
               SQL wildcard characters (_ and %) can be used to
               specify patterns.
297

298
  Example    : my @dbentries = @{ $transcript->get_all_DBEntries() };
299
300
               @dbentries = @{ $transcript->get_all_DBEntries('Uniprot%') };}
               @dbentries = @{ $transcript->get_all_DBEntries('%', 'ENSEMBL') };}
301
302
303
304
305
306
307
308
309
310

  Description: Retrieves DBEntries (xrefs) for this transcript.
               This does *not* include the corresponding
               translations DBEntries (see get_all_DBLinks()).

               This method will attempt to lazy-load DBEntries
               from a database if an adaptor is available and no
               DBEntries are present on the transcript (i.e. they
               have not already been added or loaded).

311
  Returntype : Listref of Bio::EnsEMBL::DBEntry objects
312
313
  Exceptions : none
  Caller     : get_all_DBLinks, TranscriptAdaptor::store
314
  Status     : Stable
315
316
317

=cut

318
sub get_all_DBEntries {
319
  my ( $self, $ex_db_exp, $ex_db_type ) = @_;
320

321
  my $cache_name = 'dbentries';
322

323
  if ( defined($ex_db_exp) ) {
324
325
    $cache_name .= $ex_db_exp;
  }
326

327
  if ( defined($ex_db_type) ) {
Ian Longden's avatar
Ian Longden committed
328
    $cache_name .= $ex_db_type;
329
  }
330

331
332
333
  # if not cached, retrieve all of the xrefs for this transcript
  if ( !defined( $self->{$cache_name} ) && defined( $self->adaptor() ) )
  {
334
    $self->{$cache_name} =
335
336
      $self->adaptor()->db()->get_DBEntryAdaptor()
      ->fetch_all_by_Transcript( $self, $ex_db_exp, $ex_db_type );
337
  }
338

339
  $self->{$cache_name} ||= [];
340

341
  return $self->{$cache_name};
342
} ## end sub get_all_DBEntries
343

344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
=head2 get_all_object_xrefs

  Arg [1]    : (optional) String, external database name

  Arg [2]    : (optional) String, external_db type

  Example    : @oxrefs = @{ $transcript->get_all_object_xrefs() };

  Description: Retrieves xrefs for this transcript.  This does
               *not* include xrefs that are associated with the
               corresponding translations of this transcript (see
               get_all_xrefs()).

               This method will attempt to lazy-load xrefs from a
               database if an adaptor is available and no xrefs are
               present on the transcript (i.e. they have not already
               been added or loaded).

362
363
                NB: This method is an alias for the
                    get_all_DBentries() method.
364
365
366
367
368
369
370

  Return type: Listref of Bio::EnsEMBL::DBEntry objects

  Status     : Stable

=cut

371
372
373
374
sub get_all_object_xrefs {
  my $self = shift;
  return $self->get_all_DBEntries(@_);
}
375
376

=head2 add_DBEntry
377

378
379
  Arg [1]    : Bio::EnsEMBL::DBEntry $dbe
               The dbEntry to be added
380
381
382
383
384
  Example    : my $dbe = Bio::EnsEMBL::DBEntery->new(...);
               $transcript->add_DBEntry($dbe);
  Description: Associates a DBEntry with this transcript. Note that adding
               DBEntries will prevent future lazy-loading of DBEntries for this
               gene (see get_all_DBEntries).
385
386
387
  Returntype : none
  Exceptions : thrown on incorrect argument type
  Caller     : general
388
  Status     : Stable
389
390
391
392
393
394
395
396

=cut

sub add_DBEntry {
  my $self = shift;
  my $dbe = shift;

  unless($dbe && ref($dbe) && $dbe->isa('Bio::EnsEMBL::DBEntry')) {
397
    throw('Expected DBEntry argument');
398
399
400
401
  }

  $self->{'dbentries'} ||= [];
  push @{$self->{'dbentries'}}, $dbe;
402
403
}

404

405
406
=head2 get_all_supporting_features

407
  Example    : my @evidence = @{ $transcript->get_all_supporting_features };
Tiago Grego's avatar
Tiago Grego committed
408
  Description: Retrieves any supporting features added manually by 
409
               calls to add_supporting_features.
410
  Returntype : Listref of Bio::EnsEMBL::FeaturePair objects
411
412
  Exceptions : none
  Caller     : general
413
  Status     : Stable
414
415
416
417
418

=cut

sub get_all_supporting_features {
  my $self = shift;
419
420
421
422
423
424
425

  if( !exists  $self->{_supporting_evidence} )  {
    if($self->adaptor) {
      my $tsfa = $self->adaptor->db->get_TranscriptSupportingFeatureAdaptor();
      $self->{_supporting_evidence} = $tsfa->fetch_all_by_Transcript($self);
    }
  }
Tiago Grego's avatar
Tiago Grego committed
426
  
427
428
429
430
431
432
  return $self->{_supporting_evidence} || [];
}


=head2 add_supporting_features

433
434
435
  Arg [1-N]  : Bio::EnsEMBL::FeaturePair $feature
               The supporting features to add
  Example    : $transcript->add_supporting_features(@features);
436
  Description: Adds a list of supporting features to this Transcript.
437
438
               The added features can be retieved by
               get_all_supporting_features().
439
440
441
442
443
  Returntype : none
  Exceptions : throw if any of the features are not FeaturePairs
               throw if any of the features are not in the same coordinate
               system as the Transcript
  Caller     : general
444
  Status     : Stable
Tiago Grego's avatar
Tiago Grego committed
445
 
446
=cut
Tiago Grego's avatar
Tiago Grego committed
447
 
448
sub add_supporting_features {
449
  my ($self, @features) = @_;
450
451

  return unless @features;
Tiago Grego's avatar
Tiago Grego committed
452
 
453
  $self->{_supporting_evidence} ||= [];
Tiago Grego's avatar
Tiago Grego committed
454
  
455
456
457
  # check whether this feature object has been added already
  FEATURE: foreach my $feature (@features) {

458
459
460
461
462
    if (!defined($feature) || ref($feature) eq "ARRAY") {
      throw("Element in transcript supporting features array is undefined or is an ARRAY for " . $self->dbID);
    }
    if (!$feature || !$feature->isa("Bio::EnsEMBL::FeaturePair")) {
      print "feature = " . $feature . "\n";
463
464
      throw("Supporting feat [$feature] not a " .
            "Bio::EnsEMBL::FeaturePair");
Tiago Grego's avatar
Tiago Grego committed
465
466
    } 
    
467
    if ((defined $self->slice() && defined $feature->slice())&&
468
      ( $self->slice()->name() ne $feature->slice()->name())){
469
470
      throw("Supporting feat not in same coord system as transcript\n" .
            "transcript is attached to [".$self->slice()->name()."]\n" .
471
472
473
474
475
476
            "feat is attached to [".$feature->slice()->name()."]");
    }

    foreach my $added_feature ( @{ $self->{_supporting_evidence} } ){
      # compare objects
      if ( $feature == $added_feature ){
477
478
  #this feature has already been added
  next FEATURE;
479
480
      }
    }
Tiago Grego's avatar
Tiago Grego committed
481
    
482
    #no duplicate was found, add the feature
483
    push(@{$self->{_supporting_evidence}}, $feature);
484
485
486
487
  }
}


488
489
490
491
492
493
494
=head2 flush_supporting_features

  Example     : $transcript->flush_supporting_features;
  Description : Removes all supporting evidence from the transcript.
  Return type : (Empty) listref
  Exceptions  : none
  Caller      : general
495
  Status      : Stable
496
497
498
499
500
501
502
503
504

=cut

sub flush_supporting_features {
  my $self = shift;
  $self->{'_supporting_evidence'} = [];
}


505
506
=head2 external_db

507
508
  Arg [1]    : (optional) String - name of external db to set
  Example    : $transcript->external_db('HGNC');
Tiago Grego's avatar
Tiago Grego committed
509
510
  Description: Getter/setter for attribute external_db. The db is the one that 
               belongs to the external_name.  
511
512
513
514
  Returntype : String
  Exceptions : none
  Caller     : general
  Status     : Stable
515
516
517
518

=cut

sub external_db {
519
  my ( $self, $ext_dbname ) = @_;
520

Tiago Grego's avatar
Tiago Grego committed
521
  if(defined $ext_dbname) { 
522
    return ( $self->{'external_db'} = $ext_dbname );
523
  }
524

525
526
  if( exists $self->{'external_db'} ) {
    return $self->{'external_db'};
527
  }
528

529
  my $display_xref = $self->display_xref();
530

531
532
533
534
535
  if( defined $display_xref ) {
    return $display_xref->dbname()
  } else {
    return undef;
  }
536
537
}

538

539
540
=head2 external_status

541
542
543
544
545
546
547
548
  Arg [1]    : (optional) String - status of the external db
  Example    : $transcript->external_status('KNOWNXREF');
  Description: Getter/setter for attribute external_status. The status of
               the external db of the one that belongs to the external_name.
  Returntype : String
  Exceptions : none
  Caller     : general
  Status     : Stable
549
550
551

=cut

Tiago Grego's avatar
Tiago Grego committed
552
sub external_status { 
553
554
555
  my ( $self, $ext_status ) = @_;

  if(defined $ext_status) {
556
    return ( $self->{'external_status'} = $ext_status );
557
558
  }

559
560
  if( exists $self->{'external_status'} ) {
    return $self->{'external_status'};
561
562
563
564
565
566
567
568
569
570
571
572
  }

  my $display_xref = $self->display_xref();

  if( defined $display_xref ) {
    return $display_xref->status()
  } else {
    return undef;
  }
}


573
574
=head2 external_name

575
576
577
578
579
580
581
  Arg [1]    : (optional) String - the external name to set
  Example    : $transcript->external_name('BRCA2-001');
  Description: Getter/setter for attribute external_name.
  Returntype : String or undef
  Exceptions : none
  Caller     : general
  Status     : Stable
582
583
584
585

=cut

sub external_name {
586
  my ($self, $ext_name) = @_;
587

Tiago Grego's avatar
Tiago Grego committed
588
  if(defined $ext_name) { 
589
    return ( $self->{'external_name'} = $ext_name );
590
  }
591

592
593
  if( exists $self->{'external_name'} ) {
    return $self->{'external_name'};
594
  }
595

596
  my $display_xref = $self->display_xref();
597

598
599
600
601
602
  if( defined $display_xref ) {
    return $display_xref->display_id()
  } else {
    return undef;
  }
603
604
}

605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
=head2 source

  Arg [1]    : (optional) String - the source to set
  Example    : $transcript->source('ensembl');
  Description: Getter/setter for attribute source
  Returntype : String
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub source {
  my $self = shift;
  $self->{'source'} = shift if( @_ );
  return ( $self->{'source'} || "ensembl" );
}
622

Alistair Rust's avatar
Alistair Rust committed
623
=head2 display_xref
624

625
626
627
  Arg [1]    : (optional) Bio::EnsEMBL::DBEntry - the display xref to set
  Example    : $transcript->display_xref($db_entry);
  Description: Getter/setter for display_xref for this transcript.
628
  Returntype : Bio::EnsEMBL::DBEntry
629
630
  Exceptions : none
  Caller     : general
631
  Status     : Stable
632
633
634

=cut

635
sub display_xref {
636
637
638
  my $self = shift;
  $self->{'display_xref'} = shift if(@_);
  return $self->{'display_xref'};
Ewan Birney's avatar
Ewan Birney committed
639
640
}

641
642
643
644
645
646
647
=head2 is_canonical

  Args [1]      : (optional) Boolean is_canonical

  Example       : if ($transcript->is_canonical()) { ... }

  Description : Returns true (non-zero) if the transcript is the
Andy Yates's avatar
Andy Yates committed
648
649
650
651
652
                canonical transcript of its gene, false (0) if not. If the code
                returns an undefined it is because its state is not currently
                known. Internally the code will consult the database for this
                value if it is unknown and the transcript has a dbID and an
                attached adaptor
653
654
655
656
657
658
659
660
661

  Return type   : Boolean

  Status        : Stable

=cut

sub is_canonical {
  my ( $self, $value ) = @_;
Tiago Grego's avatar
Tiago Grego committed
662
  
663
664
  #Shortcut call
  return $self->{is_canonical} if defined $self->{is_canonical};
Tiago Grego's avatar
Tiago Grego committed
665
  
666
  if ( defined($value) ) {
667
668
669
670
671
672
    $self->{is_canonical} = ( $value ? 1 : 0 );
  }
  else {
    if(! defined $self->{is_canonical} && $self->dbID() && $self->adaptor()) {
      $self->{is_canonical} = $self->adaptor()->is_Transcript_canonical($self);
    }
673
674
  }

675
  return $self->{is_canonical};
676
}
Ewan Birney's avatar
Ewan Birney committed
677
678
679

=head2 translation

680
681
682
  Args       : None
  Example    : if ( $transcript->translation() ) {
                 print( $transcript->translation()->stable_id(), "\n" );
683
               } else {
684
                 print("Pseudogene\n");
685
               }
686
687
688
689
690
691
692
  Description: Getter/setter for the Translation object which
               defines the CDS (and as a result the peptide encoded
               by) this transcript.  This function will return
               undef if this transcript is a pseudogene, i.e. a
               non-translating transcript such as an ncRNA.  This
               is the accepted method of determining whether a
               transcript is a pseudogene or not.
693
  Returntype : Bio::EnsEMBL::Translation
694
695
  Exceptions : none
  Caller     : general
696
  Status     : Stable
Ewan Birney's avatar
Ewan Birney committed
697
698
699

=cut

Michele Clamp's avatar
Michele Clamp committed
700
sub translation {
701
  my ( $self, $translation ) = @_;
702

703
  if ( defined($translation) ) {
704
    assert_ref( $translation, 'Bio::EnsEMBL::Translation' );
705

706
707
    $self->{'translation'} = $translation;
    $translation->transcript($self);
708

709
710
    $self->{'cdna_coding_start'} = undef;
    $self->{'cdna_coding_end'}   = undef;
711
712
713
714

    $self->{'coding_region_start'} = undef;
    $self->{'coding_region_end'}   = undef;

715
    $self->{'transcript_mapper'} = undef;
716

717
718
719
  } elsif ( @_ > 1 ) {
    if ( defined( $self->{'translation'} ) ) {
      # Removing existing translation
720

721
722
      $self->{'translation'}->transcript(undef);
      delete( $self->{'translation'} );
723

724
725
      $self->{'cdna_coding_start'} = undef;
      $self->{'cdna_coding_end'}   = undef;
726

727
728
      $self->{'coding_region_start'} = undef;
      $self->{'coding_region_end'}   = undef;
729

730
731
      $self->{'transcript_mapper'} = undef;
    }
732
  } elsif (   !exists( $self->{'translation'} )
733
            && defined( $self->adaptor() ) )
734
  {
735
    $self->{'translation'} =
736
737
      $self->adaptor()->db()->get_TranslationAdaptor()
      ->fetch_by_Transcript($self);
738
  }
739

740
  return $self->{'translation'};
741
} ## end sub translation
Ewan Birney's avatar
Ewan Birney committed
742

743
744
745
746
747
748
749
750
751
752
753
=head2 get_all_alternative_translations

  Args       : None
  Example    :

    my @alt_translations =
      @{ $transcript->get_all_alternative_translations() };

  Description:  Fetches all alternative translations defined for this
                transcript.  The canonical translation is not returned.

754
  Returntype : Arrayref to Bio::EnsEMBL::Translation
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
755
  Exceptions : None
756
757
758
759
760
761
762
763
  Caller     : General
  Status     : Stable

=cut

sub get_all_alternative_translations {
  my ($self) = @_;

764
765
766
  if (   !exists( $self->{'alternative_translations'} )
       && defined( $self->adaptor() ) )
  {
767
768
769
    my $pa = $self->adaptor()->db()->get_TranslationAdaptor();
    my @translations =
      @{ $pa->fetch_all_alternative_by_Transcript($self) };
770

771
    $self->{'alternative_translations'} = \@translations;
772
773
774
  }

  return $self->{'alternative_translations'};
775
}
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807

=head2 add_alternative_translation

  Args       : Bio::EnsEMBL::Translation $translation
  Example    :

    $transcript->add_alternative_translation($translation);

  Description: Adds an alternative translation to this transcript.
  Returntype : None
  Exceptions : None
  Caller     : General
  Status     : Stable

=cut

sub add_alternative_translation {
  my ( $self, $translation ) = @_;

  if ( !(    defined($translation)
          && ref($translation)
          && $translation->isa('Bio::EnsEMBL::Translation') ) )
  {
    throw("Bio::EnsEMBL::Translation argument expected.");
  }

  # Load the existsing alternative translations from the database if
  # they haven't already been loaded.
  $self->get_all_alternative_translations();

  push( @{ $self->{'alternative_translations'} }, $translation );
}
Arne Stabenau's avatar
Arne Stabenau committed
808

809
810
=head2 spliced_seq

811
812
  Args       : soft_mask (opt)
               if specified, will return a sequence where UTR regions are lowercased
813
814
  Description: Retrieves all Exon sequences and concats them together.
               No phase padding magic is done, even if phases do not align.
815
  Returntype : Text
816
817
  Exceptions : none
  Caller     : general
818
  Status     : Stable
819
820
821
822

=cut

sub spliced_seq {
823
  my ( $self, $soft_mask ) = @_;
824

825
826
  my $seq_string = "";
  for my $ex ( @{$self->get_all_Exons()} ) {
827
828
829
830
831
832
    my $seq = $ex->seq();
    if(!$seq) {
      warning("Could not obtain seq for exon.  Transcript sequence may not " .
              "be correct.");
      $seq_string .= 'N' x $ex->length();
    } else {
833
834
835
836
837
      my $exon_seq = $seq->seq();
      if ($soft_mask) {
        my $padstr;
        if (!defined ($ex->coding_region_start($self))) {
          $exon_seq = lc($exon_seq);
838
839
840
        }

        if ($ex->coding_region_start($self) > $ex->start()) {
841
842
843
          my $forward_length = $ex->coding_region_start($self) - $ex->start();
          my $reverse_length = $ex->end() - $ex->coding_region_start($self);
          if ($ex->strand == 1) {
Tiago Grego's avatar
Tiago Grego committed
844
            $exon_seq = lc (substr($exon_seq, 0, $forward_length)) . substr($exon_seq, $forward_length); 
845
          } else {
846
            $exon_seq = substr($exon_seq, 0, $reverse_length+1) . lc(substr($exon_seq, $reverse_length+1));
847
          }
848
849
850
        }

        if ($ex->coding_region_end($self) < $ex->end()) {
851
852
853
854
855
856
857
          my $forward_length = $ex->coding_region_end($self) - $ex->start();
          my $reverse_length = $ex->end() - $ex->coding_region_end($self);
          if ($ex->strand == 1) {
            $exon_seq = substr($exon_seq, 0, $forward_length+1) . lc(substr($exon_seq, $forward_length+1));
          } else {
            $exon_seq = lc(substr($exon_seq, 0, $reverse_length)) . substr($exon_seq, $reverse_length);
          }
858
859
860
        }
      }
      $seq_string .= $exon_seq;
861
    }
862
863
  }

864
865
866
  # apply post transcriptional edits
  if($self->edits_enabled()) {
    my @seqeds = @{$self->get_all_SeqEdits()};
867

868
869
870
    # sort edits in reverse order to remove complication of
    # adjusting downstream edits
    @seqeds = sort {$b->start() <=> $a->start()} @seqeds;
871

872
873
    foreach my $se (@seqeds) {
      $se->apply_edit(\$seq_string);
874
    }
875
  }
876

877
  return $seq_string;
878
879
880
881
882
883
884
}


=head2 translateable_seq

  Args       : none
  Example    : print $transcript->translateable_seq(), "\n";
885
  Description: Returns a sequence string which is the the translateable part
886
               of the transcripts sequence.  This is formed by splicing all
887
888
889
890
891
               Exon sequences together and apply all defined RNA edits.
               Then the coding part of the sequence is extracted and returned.
               The code will not support monkey exons any more. If you want to
               have non phase matching exons, defined appropriate _rna_edit
               attributes!
892
893
894

               An empty string is returned if this transcript is a pseudogene
               (i.e. is non-translateable).
895
  Returntype : Text
896
897
  Exceptions : none
  Caller     : general
898
  Status     : Stable
899
900
901
902
903
904

=cut

sub translateable_seq {
  my ( $self ) = @_;

905
  if ( !$self->translation() ) {
906
907
908
    return '';
  }

909
  my $mrna = $self->spliced_seq();
910

911
  my $start = $self->cdna_coding_start();
912
  my $end   = $self->cdna_coding_end();
913

914
  $mrna = substr( $mrna, $start - 1, $end - $start + 1 );
915

916
917
918
919
  my $start_phase = $self->translation->start_Exon->phase();
  if( $start_phase > 0 ) {
    $mrna = "N"x$start_phase . $mrna;
  }
920
921
  if( ! $start || ! $end ) {
    return "";
922
  }
923

924
  return $mrna;
925
926
}

Arne Stabenau's avatar
Arne Stabenau committed
927

928
929
930
931
932
933
934
=head2 cdna_coding_start

  Arg [1]    : (optional) $value
  Example    : $relative_coding_start = $transcript->cdna_coding_start;
  Description: Retrieves the position of the coding start of this transcript
               in cdna coordinates (relative to the start of the 5prime end of
               the transcript, excluding introns, including utrs).
935
936
937

               This will return undef if this is a pseudogene (i.e. a
               transcript with no translation).
938
939
940
  Returntype : int
  Exceptions : none
  Caller     : five_prime_utr, get_all_snps, general
941
  Status     : Stable
942
943
944
945

=cut

sub cdna_coding_start {
946
947
948
949
  my $self = shift;

  if( @_ ) {
    $self->{'cdna_coding_start'} = shift;
950
  }
951

952
  if(!defined $self->{'cdna_coding_start'} && defined $self->translation){
953
    # calc coding start relative from the start of translation (in cdna coords)
954
955
956
    my $start = 0;

    my @exons = @{$self->get_all_Exons};
957
958
    my $exon;

959
    while($exon = shift @exons) {
960
      if($exon == $self->translation->start_Exon) {
961
962
963
        #add the utr portion of the start exon
        $start += $self->translation->start;
        last;
964
      } else {
965
966
        #add the entire length of this non-coding exon
        $start += $exon->length;
967
968
      }
    }
969
970
971
972

    # adjust cdna coords if sequence edits are enabled
    if($self->edits_enabled()) {
      my @seqeds = @{$self->get_all_SeqEdits()};
973
974
975
976
977
978
979
980
981
982
983
984
985
986
      if (scalar @seqeds) {
        my $transl_start = $self->get_all_Attributes('_transl_start');
        if (@{$transl_start}) {
          $start = $transl_start->[0]->value;
        } else {
          # sort in reverse order to avoid adjustment of downstream edits
          @seqeds = sort {$b->start() <=> $a->start()} @seqeds;

          foreach my $se (@seqeds) {
            # use less than start so that start of CDS can be extended
            if($se->start() < $start) {
              $start += $se->length_diff();
            }
          }
987
988
989
990
        }
      }
    }

Graham McVicker's avatar
Graham McVicker committed
991
    $self->{'cdna_coding_start'} = $start;
992
993
994
995
996
997
998
999
1000
  }

  return $self->{'cdna_coding_start'};
}


=head2 cdna_coding_end

  Arg [1]    : (optional) $value
1001
  Example    : $cdna_coding_end = $transcript->cdna_coding_end;
1002
1003
  Description: Retrieves the end of the coding region of this transcript in
               cdna coordinates (relative to the five prime end of the
Graham McVicker's avatar
Graham McVicker committed
1004
               transcript, excluding introns, including utrs).
1005
1006
1007
1008

               This will return undef if this transcript is a pseudogene
               (i.e. a transcript with no translation and therefor no CDS).
  Returntype : int
1009
1010
  Exceptions : none
  Caller     : general
1011
  Status     : Stable
1012
1013
1014
1015

=cut

sub cdna_coding_end {
1016
  my $self = shift;
1017

1018
1019
  if( @_ ) {
    $self->{'cdna_coding_end'} = shift;
1020
1021
  }

1022
  if(!defined $self->{'cdna_coding_end'} && defined $self->translation) {
1023
1024
1025
    my @exons = @{$self->get_all_Exons};

    my $end = 0;
Graham McVicker's avatar
Graham McVicker committed
1026
    while(my $exon = shift @exons) {
1027
      if($exon == $self->translation->end_Exon) {
1028
1029
1030
        # add coding portion of the final coding exon
        $end += $self->translation->end;
        last;
1031
      } else {
1032
1033
        # add entire exon
        $end += $exon->length;
1034
1035
      }
    }
1036
1037
1038
1039

    # adjust cdna coords if sequence edits are enabled
    if($self->edits_enabled()) {
      my @seqeds = @{$self->get_all_SeqEdits()};
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
      if (scalar @seqeds) {
        my $transl_end = $self->get_all_Attributes('_transl_end');
        if (@{$transl_end}) {
          $end = $transl_end->[0]->value;
        } else {
          # sort in reverse order to avoid adjustment of downstream edits
          @seqeds = sort {$b->start() <=> $a->start()} @seqeds;

          foreach my $se (@seqeds) {
            # use less than or equal to end+1 so end of the CDS can be extended
            if($se->start() <= $end + 1) {
              $end += $se->length_diff();
            }
          }
1054
1055
1056
1057
        }
      }
    }

1058
1059
1060
1061
1062
1063
1064
    $self->{'cdna_coding_end'} = $end;
  }

  return $self->{'cdna_coding_end'};
}