Transcript.pm 87.8 KB
Newer Older
1
2
=head1 LICENSE

Magali Ruffier's avatar
Magali Ruffier committed
3
Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
premanand17's avatar
premanand17 committed
4
Copyright [2016-2018] EMBL-European Bioinformatics Institute
5

6
7
8
9
10
11
12
13
14
15
16
17
18
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

=cut
19
20
21
22
23


=head1 CONTACT

  Please email comments or questions to the public Ensembl
Magali Ruffier's avatar
Magali Ruffier committed
24
  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25
26

  Questions may also be sent to the Ensembl help desk at
Magali Ruffier's avatar
Magali Ruffier committed
27
  <http://www.ensembl.org/Help/Contact>.
28
29

=cut
Ewan Birney's avatar
Ewan Birney committed
30
31
32

=head1 NAME

33
Bio::EnsEMBL::Transcript - object representing an Ensembl transcript
Ewan Birney's avatar
Ewan Birney committed
34
35
36

=head1 SYNOPSIS

37
Creation:
38

39
40
  my $tran = new Bio::EnsEMBL::Transcript();
  my $tran = new Bio::EnsEMBL::Transcript( -EXONS => \@exons );
41
42
43

Manipulation:

44
45
  # Returns an array of Exon objects
  my @exons = @{ $tran->get_all_Exons() };
46

47
48
49
50
51
52
  # Returns the peptide translation of the exons as a Bio::Seq
  if ( $tran->translation() ) {
    my $pep = $tran->translate();
  } else {
    print "Transcript ", $tran->stable_id(), " is non-coding\n";
  }
53

54
55
=head1 DESCRIPTION

56
A representation of a transcript within the Ensembl system.  A transcript
57
58
consists of a set of Exons and (possibly) a Translation which defines the
coding and non-coding regions of the exons.
Ewan Birney's avatar
Ewan Birney committed
59

60
=cut
61

62
63
package Bio::EnsEMBL::Transcript;

Ewan Birney's avatar
Ewan Birney committed
64
use strict;
65
use warnings;
Ewan Birney's avatar
Ewan Birney committed
66

67
use Bio::EnsEMBL::Feature;
68
use Bio::EnsEMBL::UTR;
Ian Longden's avatar
Ian Longden committed
69
use Bio::EnsEMBL::Intron;
70
71
use Bio::EnsEMBL::ExonTranscript;
use Bio::EnsEMBL::CDS;
72
use Bio::EnsEMBL::TranscriptMapper;
73
use Bio::EnsEMBL::SeqEdit;
74

75
76
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
use Bio::EnsEMBL::Utils::Exception qw( deprecate warning throw );
77
use Bio::EnsEMBL::Utils::Scalar qw( assert_ref );
78

79
use base qw(Bio::EnsEMBL::Feature);
Ewan Birney's avatar
Ewan Birney committed
80

Graham McVicker's avatar
Graham McVicker committed
81
82
83
84

=head2 new

  Arg [-EXONS] :
85
        reference to list of Bio::EnsEMBL::Exon objects - exons which make up
Graham McVicker's avatar
Graham McVicker committed
86
87
88
89
90
91
92
93
94
95
96
97
98
99
        this transcript
  Arg [-STABLE_ID] :
        string - the stable identifier of this transcript
  Arg [-VERSION] :
        int - the version of the stable identifier of this transcript
  Arg [-EXTERNAL_NAME] :
        string - the external database name associated with this transcript
  Arg [-EXTERNAL_DB] :
        string - the name of the database the external name is from
  Arg [-EXTERNAL_STATUS]:
        string - the status of the external identifier
  Arg [-DISPLAY_XREF]:
        Bio::EnsEMBL::DBEntry - The external database entry that is used
        to label this transcript when it is displayed.
100
101
102
103
104
  Arg [-CREATED_DATE]:
        string - the date the transcript was created
  Arg [-MODIFIED_DATE]:
        string - the date the transcript was last modified
  Arg [-DESCRIPTION]:
105
        string - the transcripts description
106
  Arg [-BIOTYPE]:
107
        string - the biotype e.g. "protein_coding"
108
109
  Arg [-IS_CURRENT]:
        Boolean - specifies if this is the current version of the transcript
110
111
112
  Arg [-SOURCE]:
        string - the transcript source, e.g. "ensembl"

Graham McVicker's avatar
Graham McVicker committed
113
114
115
116
117
  Example    : $tran = new Bio::EnsEMBL::Transcript(-EXONS => \@exons);
  Description: Constructor. Instantiates a Transcript object.
  Returntype : Bio::EnsEMBL::Transcript
  Exceptions : throw on bad arguments
  Caller     : general
118
  Status     : Stable
Graham McVicker's avatar
Graham McVicker committed
119
120
121

=cut

122
sub new {
123
  my $proto = shift;
124

125
  my $class = ref($proto) || $proto;
126

127
  my $self = $class->SUPER::new(@_);
Ewan Birney's avatar
Ewan Birney committed
128

129
130
131
132
133
  my (
    $exons,            $stable_id,    $version,
    $external_name,    $external_db,  $external_status,
    $display_xref,     $created_date, $modified_date,
    $description,      $biotype,      $confidence,
134
    $external_db_name, $is_current,
135
    $source
136
  );
137

138
139
  # Catch for old style constructor calling:
  if ( ( @_ > 0 ) && ref( $_[0] ) ) {
140
    $exons = [@_];
141
142
143
144
145
146
147
148
149
    deprecate( "Transcript constructor should use named arguments.\n"
        . "Use Bio::EnsEMBL::Transcript->new(-EXONS => \@exons);\n"
        . "instead of Bio::EnsEMBL::Transcript->new(\@exons);" );
  } else {
    (
      $exons,            $stable_id,    $version,
      $external_name,    $external_db,  $external_status,
      $display_xref,     $created_date, $modified_date,
      $description,      $biotype,      $confidence,
150
      $external_db_name, $is_current,
151
      $source
152
153
154
155
156
157
158
159
      )
      = rearrange( [
        'EXONS',            'STABLE_ID',
        'VERSION',          'EXTERNAL_NAME',
        'EXTERNAL_DB',      'EXTERNAL_STATUS',
        'DISPLAY_XREF',     'CREATED_DATE',
        'MODIFIED_DATE',    'DESCRIPTION',
        'BIOTYPE',          'CONFIDENCE',
160
        'EXTERNAL_DB_NAME',
161
        'IS_CURRENT',       'SOURCE'
162
163
164
      ],
      @_
      );
165
  }
166
167

  if ($exons) {
168
169
    $self->{'_trans_exon_array'} = $exons;
    $self->recalculate_coordinates();
170
  }
171

172
173
  $self->stable_id($stable_id);
  $self->{'created_date'}  = $created_date;
174
  $self->{'modified_date'} = $modified_date;
175
176
177
178
179
  $self->external_name($external_name) if ( defined $external_name );
  $self->external_db($external_db)     if ( defined $external_db );
  $self->external_status($external_status)
    if ( defined $external_status );
  $self->display_xref($display_xref) if ( defined $display_xref );
180
  $self->edits_enabled(1);
181

182
  $self->description($description);
183
184
185
186

  # keep legacy behaviour of defaulting to 'protein_coding' biotype
  $self->{'biotype_id'} = $biotype // 'protein_coding';

187
  $self->source($source);
188

189
190
191
192
  # Default version
  if ( !defined($version) ) { $version = 1 }
  $self->{'version'} = $version;

193
  # default is_current
194
  $is_current = 1 unless ( defined($is_current) );
195
  $self->{'is_current'} = $is_current;
196

197
  return $self;
198
} ## end sub new
199

Graham McVicker's avatar
Graham McVicker committed
200
=head2 get_all_DBLinks
201

202
203
204
205
  Arg [1]    : String database name (optional)
               SQL wildcard characters (_ and %) can be used to
               specify patterns.

206
207
208
209
210
  Arg [2]    : (optional) String, external database type, can be one of
               ('ARRAY','ALT_TRANS','ALT_GENE','MISC','LIT','PRIMARY_DB_SYNONYM','ENSEMBL'),
               SQL wildcard characters (_ and %) can be used to
               specify patterns.

211
  Example    : my @dblinks = @{ $transcript->get_all_DBLinks() };
212
213
               @dblinks = @{ $transcript->get_all_DBLinks('Uniprot%') };}
               @dblinks = @{ $transcript->get_all_DBLinks('%', 'ENSEMBL') };
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229

  Description: Retrieves *all* related DBEntries for this
               transcript.  This includes all DBEntries that are
               associated with the corresponding translation.

               If you only want to retrieve the DBEntries associated
               with the transcript (and not the translation) then
               you should use the get_all_DBEntries() call instead.

               Note: Each entry may be listed more than once.  No
               uniqueness checks are done.  Also if you put in an
               incorrect external database name no checks are done
               to see if this exists, you will just get an empty
               list.

  Return type: Listref of Bio::EnsEMBL::DBEntry objects
230
231
  Exceptions : none
  Caller     : general
232
  Status     : Stable
233
234
235

=cut

Graham McVicker's avatar
Graham McVicker committed
236
sub get_all_DBLinks {
237
  my ( $self, $db_name_exp, $ex_db_type ) = @_;
238

239
240
  my @links =
    @{ $self->get_all_DBEntries( $db_name_exp, $ex_db_type ) };
241

242
  # Add all of the transcript and translation xrefs to the return list.
243
244
245
  my $translation = $self->translation();
  if ( defined($translation) ) {
    push( @links,
246
          @{$translation->get_all_DBEntries( $db_name_exp, $ex_db_type ) }
247
248
    );
  }
249

250
  @links = sort { _compare_xrefs() } @links;
251

252
  return \@links;
253
254
}

255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
=head2 get_all_xrefs

  Arg [1]    : String database name (optional)
               SQL wildcard characters (_ and %) can be used to
               specify patterns.

  Example    : @xrefs = @{ $transcript->get_all_xrefs() };
               @xrefs = @{ $transcript->get_all_xrefs('Uniprot%') };

  Description: Retrieves *all* related xrefs for this transcript.
               This includes all xrefs that are associated with the
               corresponding translation of this transcript.

               If you want to retrieve the xrefs associated with
               only the transcript (and not the translation) then
               you should use the get_all_object_xrefs() method
               instead.

               Note: Each entry may be listed more than once.  No
               uniqueness checks are done.  Also if you put in an
               incorrect external database name no checks are done
               to see if this exists, you will just get an empty
               list.

                NB: This method is an alias for the
                    get_all_DBLinks() method.

  Return type: Listref of Bio::EnsEMBL::DBEntry objects

  Status     : Stable

=cut

288
289
290
291
sub get_all_xrefs {
  my $self = shift;
  return $self->get_all_DBLinks(@_);
}
Graham McVicker's avatar
Graham McVicker committed
292

293
=head2 get_all_DBEntries
294

295
296
297
  Arg [1]    : (optional) String, external database name,
               SQL wildcard characters (_ and %) can be used to
               specify patterns.
298

299
300
301
302
  Arg [2]    : (optional) String, external database type, can be one of
               ('ARRAY','ALT_TRANS','ALT_GENE','MISC','LIT','PRIMARY_DB_SYNONYM','ENSEMBL'),
               SQL wildcard characters (_ and %) can be used to
               specify patterns.
303

304
  Example    : my @dbentries = @{ $transcript->get_all_DBEntries() };
305
306
               @dbentries = @{ $transcript->get_all_DBEntries('Uniprot%') };}
               @dbentries = @{ $transcript->get_all_DBEntries('%', 'ENSEMBL') };}
307
308
309
310
311
312
313
314
315
316

  Description: Retrieves DBEntries (xrefs) for this transcript.
               This does *not* include the corresponding
               translations DBEntries (see get_all_DBLinks()).

               This method will attempt to lazy-load DBEntries
               from a database if an adaptor is available and no
               DBEntries are present on the transcript (i.e. they
               have not already been added or loaded).

317
  Returntype : Listref of Bio::EnsEMBL::DBEntry objects
318
319
  Exceptions : none
  Caller     : get_all_DBLinks, TranscriptAdaptor::store
320
  Status     : Stable
321
322
323

=cut

324
sub get_all_DBEntries {
325
  my ( $self, $ex_db_exp, $ex_db_type ) = @_;
326

327
  my $cache_name = 'dbentries';
328

329
  if ( defined($ex_db_exp) ) {
330
331
    $cache_name .= $ex_db_exp;
  }
332

333
  if ( defined($ex_db_type) ) {
Ian Longden's avatar
Ian Longden committed
334
    $cache_name .= $ex_db_type;
335
  }
336

337
338
339
  # if not cached, retrieve all of the xrefs for this transcript
  if ( !defined( $self->{$cache_name} ) && defined( $self->adaptor() ) )
  {
340
    $self->{$cache_name} =
341
342
      $self->adaptor()->db()->get_DBEntryAdaptor()
      ->fetch_all_by_Transcript( $self, $ex_db_exp, $ex_db_type );
343
  }
344

345
  $self->{$cache_name} ||= [];
346

347
  return $self->{$cache_name};
348
} ## end sub get_all_DBEntries
349

350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
=head2 get_all_object_xrefs

  Arg [1]    : (optional) String, external database name

  Arg [2]    : (optional) String, external_db type

  Example    : @oxrefs = @{ $transcript->get_all_object_xrefs() };

  Description: Retrieves xrefs for this transcript.  This does
               *not* include xrefs that are associated with the
               corresponding translations of this transcript (see
               get_all_xrefs()).

               This method will attempt to lazy-load xrefs from a
               database if an adaptor is available and no xrefs are
               present on the transcript (i.e. they have not already
               been added or loaded).

368
369
                NB: This method is an alias for the
                    get_all_DBentries() method.
370
371
372
373
374
375
376

  Return type: Listref of Bio::EnsEMBL::DBEntry objects

  Status     : Stable

=cut

377
378
379
380
sub get_all_object_xrefs {
  my $self = shift;
  return $self->get_all_DBEntries(@_);
}
381
382

=head2 add_DBEntry
383

384
385
  Arg [1]    : Bio::EnsEMBL::DBEntry $dbe
               The dbEntry to be added
386
387
388
389
390
  Example    : my $dbe = Bio::EnsEMBL::DBEntery->new(...);
               $transcript->add_DBEntry($dbe);
  Description: Associates a DBEntry with this transcript. Note that adding
               DBEntries will prevent future lazy-loading of DBEntries for this
               gene (see get_all_DBEntries).
391
392
393
  Returntype : none
  Exceptions : thrown on incorrect argument type
  Caller     : general
394
  Status     : Stable
395
396
397
398
399
400
401
402

=cut

sub add_DBEntry {
  my $self = shift;
  my $dbe = shift;

  unless($dbe && ref($dbe) && $dbe->isa('Bio::EnsEMBL::DBEntry')) {
403
    throw('Expected DBEntry argument');
404
405
406
407
  }

  $self->{'dbentries'} ||= [];
  push @{$self->{'dbentries'}}, $dbe;
408
409
}

410

411
412
=head2 get_all_supporting_features

413
  Example    : my @evidence = @{ $transcript->get_all_supporting_features };
414
  Description: Retrieves any supporting features added manually by
415
               calls to add_supporting_features.
416
  Returntype : Listref of Bio::EnsEMBL::FeaturePair objects
417
418
  Exceptions : none
  Caller     : general
419
  Status     : Stable
420
421
422
423
424

=cut

sub get_all_supporting_features {
  my $self = shift;
425
426
427
428
429
430
431

  if( !exists  $self->{_supporting_evidence} )  {
    if($self->adaptor) {
      my $tsfa = $self->adaptor->db->get_TranscriptSupportingFeatureAdaptor();
      $self->{_supporting_evidence} = $tsfa->fetch_all_by_Transcript($self);
    }
  }
432

433
434
435
436
437
438
  return $self->{_supporting_evidence} || [];
}


=head2 add_supporting_features

439
440
441
  Arg [1-N]  : Bio::EnsEMBL::FeaturePair $feature
               The supporting features to add
  Example    : $transcript->add_supporting_features(@features);
442
  Description: Adds a list of supporting features to this Transcript.
443
444
               The added features can be retieved by
               get_all_supporting_features().
445
446
447
448
449
  Returntype : none
  Exceptions : throw if any of the features are not FeaturePairs
               throw if any of the features are not in the same coordinate
               system as the Transcript
  Caller     : general
450
  Status     : Stable
451

452
=cut
453

454
sub add_supporting_features {
455
  my ($self, @features) = @_;
456
457

  return unless @features;
458

459
  $self->{_supporting_evidence} ||= [];
460

461
462
463
  # check whether this feature object has been added already
  FEATURE: foreach my $feature (@features) {

464
465
466
467
468
    if (!defined($feature) || ref($feature) eq "ARRAY") {
      throw("Element in transcript supporting features array is undefined or is an ARRAY for " . $self->dbID);
    }
    if (!$feature || !$feature->isa("Bio::EnsEMBL::FeaturePair")) {
      print "feature = " . $feature . "\n";
469
470
      throw("Supporting feat [$feature] not a " .
            "Bio::EnsEMBL::FeaturePair");
471
472
    }

473
    if ((defined $self->slice() && defined $feature->slice())&&
474
      ( $self->slice()->name() ne $feature->slice()->name())){
475
476
      throw("Supporting feat not in same coord system as transcript\n" .
            "transcript is attached to [".$self->slice()->name()."]\n" .
477
478
479
480
481
482
            "feat is attached to [".$feature->slice()->name()."]");
    }

    foreach my $added_feature ( @{ $self->{_supporting_evidence} } ){
      # compare objects
      if ( $feature == $added_feature ){
483
484
  #this feature has already been added
  next FEATURE;
485
486
      }
    }
487

488
    #no duplicate was found, add the feature
489
    push(@{$self->{_supporting_evidence}}, $feature);
490
491
492
493
  }
}


494
495
496
497
498
499
500
=head2 flush_supporting_features

  Example     : $transcript->flush_supporting_features;
  Description : Removes all supporting evidence from the transcript.
  Return type : (Empty) listref
  Exceptions  : none
  Caller      : general
501
  Status      : Stable
502
503
504
505
506
507
508
509
510

=cut

sub flush_supporting_features {
  my $self = shift;
  $self->{'_supporting_evidence'} = [];
}


511
512
=head2 external_db

513
514
  Arg [1]    : (optional) String - name of external db to set
  Example    : $transcript->external_db('HGNC');
515
516
  Description: Getter/setter for attribute external_db. The db is the one that
               belongs to the external_name.
517
518
519
520
  Returntype : String
  Exceptions : none
  Caller     : general
  Status     : Stable
521
522
523
524

=cut

sub external_db {
525
  my ( $self, $ext_dbname ) = @_;
526

527
  if(defined $ext_dbname) {
528
    return ( $self->{'external_db'} = $ext_dbname );
529
  }
530

531
532
  if( exists $self->{'external_db'} ) {
    return $self->{'external_db'};
533
  }
534

535
  my $display_xref = $self->display_xref();
536

537
538
539
540
541
  if( defined $display_xref ) {
    return $display_xref->dbname()
  } else {
    return undef;
  }
542
543
}

544

545
546
=head2 external_status

547
548
549
550
551
552
553
554
  Arg [1]    : (optional) String - status of the external db
  Example    : $transcript->external_status('KNOWNXREF');
  Description: Getter/setter for attribute external_status. The status of
               the external db of the one that belongs to the external_name.
  Returntype : String
  Exceptions : none
  Caller     : general
  Status     : Stable
555
556
557

=cut

558
sub external_status {
559
560
561
  my ( $self, $ext_status ) = @_;

  if(defined $ext_status) {
562
    return ( $self->{'external_status'} = $ext_status );
563
564
  }

565
566
  if( exists $self->{'external_status'} ) {
    return $self->{'external_status'};
567
568
569
570
571
572
573
574
575
576
577
578
  }

  my $display_xref = $self->display_xref();

  if( defined $display_xref ) {
    return $display_xref->status()
  } else {
    return undef;
  }
}


579
580
=head2 external_name

581
582
583
584
585
586
587
  Arg [1]    : (optional) String - the external name to set
  Example    : $transcript->external_name('BRCA2-001');
  Description: Getter/setter for attribute external_name.
  Returntype : String or undef
  Exceptions : none
  Caller     : general
  Status     : Stable
588
589
590
591

=cut

sub external_name {
592
  my ($self, $ext_name) = @_;
593

594
  if(defined $ext_name) {
595
    return ( $self->{'external_name'} = $ext_name );
596
  }
597

598
599
  if( exists $self->{'external_name'} ) {
    return $self->{'external_name'};
600
  }
601

602
  my $display_xref = $self->display_xref();
603

604
605
606
607
608
  if( defined $display_xref ) {
    return $display_xref->display_id()
  } else {
    return undef;
  }
609
610
}

611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
=head2 source

  Arg [1]    : (optional) String - the source to set
  Example    : $transcript->source('ensembl');
  Description: Getter/setter for attribute source
  Returntype : String
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub source {
  my $self = shift;
  $self->{'source'} = shift if( @_ );
  return ( $self->{'source'} || "ensembl" );
}
628

Alistair Rust's avatar
Alistair Rust committed
629
=head2 display_xref
630

631
632
633
  Arg [1]    : (optional) Bio::EnsEMBL::DBEntry - the display xref to set
  Example    : $transcript->display_xref($db_entry);
  Description: Getter/setter for display_xref for this transcript.
634
  Returntype : Bio::EnsEMBL::DBEntry
635
636
  Exceptions : none
  Caller     : general
637
  Status     : Stable
638
639
640

=cut

641
sub display_xref {
642
643
644
  my $self = shift;
  $self->{'display_xref'} = shift if(@_);
  return $self->{'display_xref'};
Ewan Birney's avatar
Ewan Birney committed
645
646
}

647
648
649
650
651
652
653
=head2 is_canonical

  Args [1]      : (optional) Boolean is_canonical

  Example       : if ($transcript->is_canonical()) { ... }

  Description : Returns true (non-zero) if the transcript is the
Andy Yates's avatar
Andy Yates committed
654
655
656
657
658
                canonical transcript of its gene, false (0) if not. If the code
                returns an undefined it is because its state is not currently
                known. Internally the code will consult the database for this
                value if it is unknown and the transcript has a dbID and an
                attached adaptor
659
660
661
662
663
664
665
666
667

  Return type   : Boolean

  Status        : Stable

=cut

sub is_canonical {
  my ( $self, $value ) = @_;
668

669
670
  #Shortcut call
  return $self->{is_canonical} if defined $self->{is_canonical};
671

672
  if ( defined($value) ) {
673
674
675
676
677
678
    $self->{is_canonical} = ( $value ? 1 : 0 );
  }
  else {
    if(! defined $self->{is_canonical} && $self->dbID() && $self->adaptor()) {
      $self->{is_canonical} = $self->adaptor()->is_Transcript_canonical($self);
    }
679
680
  }

681
  return $self->{is_canonical};
682
}
Ewan Birney's avatar
Ewan Birney committed
683
684
685

=head2 translation

686
687
688
  Args       : None
  Example    : if ( $transcript->translation() ) {
                 print( $transcript->translation()->stable_id(), "\n" );
689
               } else {
690
                 print("Pseudogene\n");
691
               }
692
693
694
695
696
697
698
  Description: Getter/setter for the Translation object which
               defines the CDS (and as a result the peptide encoded
               by) this transcript.  This function will return
               undef if this transcript is a pseudogene, i.e. a
               non-translating transcript such as an ncRNA.  This
               is the accepted method of determining whether a
               transcript is a pseudogene or not.
699
  Returntype : Bio::EnsEMBL::Translation
700
701
  Exceptions : none
  Caller     : general
702
  Status     : Stable
Ewan Birney's avatar
Ewan Birney committed
703
704
705

=cut

Michele Clamp's avatar
Michele Clamp committed
706
sub translation {
707
  my ( $self, $translation ) = @_;
708

709
  if ( defined($translation) ) {
710
    assert_ref( $translation, 'Bio::EnsEMBL::Translation' );
711

712
713
    $self->{'translation'} = $translation;
    $translation->transcript($self);
714

715
716
    $self->{'cdna_coding_start'} = undef;
    $self->{'cdna_coding_end'}   = undef;
717
718
719
720

    $self->{'coding_region_start'} = undef;
    $self->{'coding_region_end'}   = undef;

721
    $self->{'transcript_mapper'} = undef;
722

723
724
725
  } elsif ( @_ > 1 ) {
    if ( defined( $self->{'translation'} ) ) {
      # Removing existing translation
726

727
728
      $self->{'translation'}->transcript(undef);
      delete( $self->{'translation'} );
729

730
731
      $self->{'cdna_coding_start'} = undef;
      $self->{'cdna_coding_end'}   = undef;
732

733
734
      $self->{'coding_region_start'} = undef;
      $self->{'coding_region_end'}   = undef;
735

736
737
      $self->{'transcript_mapper'} = undef;
    }
738
  } elsif (   !exists( $self->{'translation'} )
739
            && defined( $self->adaptor() ) )
740
  {
741
    $self->{'translation'} =
742
743
      $self->adaptor()->db()->get_TranslationAdaptor()
      ->fetch_by_Transcript($self);
744
  }
745

746
  return $self->{'translation'};
747
} ## end sub translation
Ewan Birney's avatar
Ewan Birney committed
748

749
750
751
752
753
754
755
756
757
758
759
=head2 get_all_alternative_translations

  Args       : None
  Example    :

    my @alt_translations =
      @{ $transcript->get_all_alternative_translations() };

  Description:  Fetches all alternative translations defined for this
                transcript.  The canonical translation is not returned.

760
  Returntype : Arrayref to Bio::EnsEMBL::Translation
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
761
  Exceptions : None
762
763
764
765
766
767
768
769
  Caller     : General
  Status     : Stable

=cut

sub get_all_alternative_translations {
  my ($self) = @_;

770
771
772
  if (   !exists( $self->{'alternative_translations'} )
       && defined( $self->adaptor() ) )
  {
773
774
775
    my $pa = $self->adaptor()->db()->get_TranslationAdaptor();
    my @translations =
      @{ $pa->fetch_all_alternative_by_Transcript($self) };
776

777
    $self->{'alternative_translations'} = \@translations;
778
779
780
  }

  return $self->{'alternative_translations'};
781
}
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813

=head2 add_alternative_translation

  Args       : Bio::EnsEMBL::Translation $translation
  Example    :

    $transcript->add_alternative_translation($translation);

  Description: Adds an alternative translation to this transcript.
  Returntype : None
  Exceptions : None
  Caller     : General
  Status     : Stable

=cut

sub add_alternative_translation {
  my ( $self, $translation ) = @_;

  if ( !(    defined($translation)
          && ref($translation)
          && $translation->isa('Bio::EnsEMBL::Translation') ) )
  {
    throw("Bio::EnsEMBL::Translation argument expected.");
  }

  # Load the existsing alternative translations from the database if
  # they haven't already been loaded.
  $self->get_all_alternative_translations();

  push( @{ $self->{'alternative_translations'} }, $translation );
}
Arne Stabenau's avatar
Arne Stabenau committed
814

815
816
=head2 spliced_seq

817
818
  Args       : soft_mask (opt)
               if specified, will return a sequence where UTR regions are lowercased
819
820
  Description: Retrieves all Exon sequences and concats them together.
               No phase padding magic is done, even if phases do not align.
821
  Returntype : Text
822
823
  Exceptions : none
  Caller     : general
824
  Status     : Stable
825
826
827
828

=cut

sub spliced_seq {
829
  my ( $self, $soft_mask ) = @_;
830

831
832
  my $seq_string = "";
  for my $ex ( @{$self->get_all_Exons()} ) {
833
834
835
836
837
838
    my $seq = $ex->seq();
    if(!$seq) {
      warning("Could not obtain seq for exon.  Transcript sequence may not " .
              "be correct.");
      $seq_string .= 'N' x $ex->length();
    } else {
839
840
841
842
843
      my $exon_seq = $seq->seq();
      if ($soft_mask) {
        my $padstr;
        if (!defined ($ex->coding_region_start($self))) {
          $exon_seq = lc($exon_seq);
844
845
846
        }

        if ($ex->coding_region_start($self) > $ex->start()) {
847
848
849
          my $forward_length = $ex->coding_region_start($self) - $ex->start();
          my $reverse_length = $ex->end() - $ex->coding_region_start($self);
          if ($ex->strand == 1) {
850
            $exon_seq = lc (substr($exon_seq, 0, $forward_length)) . substr($exon_seq, $forward_length);
851
          } else {
852
            $exon_seq = substr($exon_seq, 0, $reverse_length+1) . lc(substr($exon_seq, $reverse_length+1));
853
          }
854
855
856
        }

        if ($ex->coding_region_end($self) < $ex->end()) {
857
858
859
860
861
862
863
          my $forward_length = $ex->coding_region_end($self) - $ex->start();
          my $reverse_length = $ex->end() - $ex->coding_region_end($self);
          if ($ex->strand == 1) {
            $exon_seq = substr($exon_seq, 0, $forward_length+1) . lc(substr($exon_seq, $forward_length+1));
          } else {
            $exon_seq = lc(substr($exon_seq, 0, $reverse_length)) . substr($exon_seq, $reverse_length);
          }
864
865
866
        }
      }
      $seq_string .= $exon_seq;
867
    }
868
869
  }

870
871
872
  # apply post transcriptional edits
  if($self->edits_enabled()) {
    my @seqeds = @{$self->get_all_SeqEdits()};
873

874
875
876
    # sort edits in reverse order to remove complication of
    # adjusting downstream edits
    @seqeds = sort {$b->start() <=> $a->start()} @seqeds;
877

878
879
    foreach my $se (@seqeds) {
      $se->apply_edit(\$seq_string);
880
    }
881
  }
882

883
  return $seq_string;
884
885
886
887
888
889
890
}


=head2 translateable_seq

  Args       : none
  Example    : print $transcript->translateable_seq(), "\n";
891
  Description: Returns a sequence string which is the the translateable part
892
               of the transcripts sequence.  This is formed by splicing all
893
894
895
896
897
               Exon sequences together and apply all defined RNA edits.
               Then the coding part of the sequence is extracted and returned.
               The code will not support monkey exons any more. If you want to
               have non phase matching exons, defined appropriate _rna_edit
               attributes!
898
899
900

               An empty string is returned if this transcript is a pseudogene
               (i.e. is non-translateable).
901
  Returntype : Text
902
903
  Exceptions : none
  Caller     : general
904
  Status     : Stable
905
906
907
908
909
910

=cut

sub translateable_seq {
  my ( $self ) = @_;

911
  if ( !$self->translation() ) {
912
913
914
    return '';
  }

915
  my $mrna = $self->spliced_seq();
916

917
  my $start = $self->cdna_coding_start();
918
  my $end   = $self->cdna_coding_end();
919

920
  $mrna = substr( $mrna, $start - 1, $end - $start + 1 );
921

922
923
924
925
  my $start_phase = $self->translation->start_Exon->phase();
  if( $start_phase > 0 ) {
    $mrna = "N"x$start_phase . $mrna;
  }
926
927
  if( ! $start || ! $end ) {
    return "";
928
  }
929

930
  return $mrna;
931
932
}

Arne Stabenau's avatar
Arne Stabenau committed
933

934
935
936
937
938
939
940
=head2 cdna_coding_start

  Arg [1]    : (optional) $value
  Example    : $relative_coding_start = $transcript->cdna_coding_start;
  Description: Retrieves the position of the coding start of this transcript
               in cdna coordinates (relative to the start of the 5prime end of
               the transcript, excluding introns, including utrs).
941
942
943

               This will return undef if this is a pseudogene (i.e. a
               transcript with no translation).
944
945
946
  Returntype : int
  Exceptions : none
  Caller     : five_prime_utr, get_all_snps, general
947
  Status     : Stable
948
949
950
951

=cut

sub cdna_coding_start {
952
953
954
955
  my $self = shift;

  if( @_ ) {
    $self->{'cdna_coding_start'} = shift;
956
  }
957

958
  if(!defined $self->{'cdna_coding_start'} && defined $self->translation){
959
    # calc coding start relative from the start of translation (in cdna coords)
960
961
962
    my $start = 0;

    my @exons = @{$self->get_all_Exons};
963
964
    my $exon;

965
    while($exon = shift @exons) {
966
      if($exon == $self->translation->start_Exon) {
967
968
969
        #add the utr portion of the start exon
        $start += $self->translation->start;
        last;
970
      } else {
971
972
        #add the entire length of this non-coding exon
        $start += $exon->length;