Transcript.pm 88 KB
Newer Older
1
2
=head1 LICENSE

Magali Ruffier's avatar
Magali Ruffier committed
3
Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
premanand17's avatar
premanand17 committed
4
Copyright [2016-2018] EMBL-European Bioinformatics Institute
5

6
7
8
9
10
11
12
13
14
15
16
17
18
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

=cut
19
20
21
22
23


=head1 CONTACT

  Please email comments or questions to the public Ensembl
Magali Ruffier's avatar
Magali Ruffier committed
24
  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25
26

  Questions may also be sent to the Ensembl help desk at
Magali Ruffier's avatar
Magali Ruffier committed
27
  <http://www.ensembl.org/Help/Contact>.
28
29

=cut
Ewan Birney's avatar
Ewan Birney committed
30
31
32

=head1 NAME

33
Bio::EnsEMBL::Transcript - object representing an Ensembl transcript
Ewan Birney's avatar
Ewan Birney committed
34
35
36

=head1 SYNOPSIS

37
Creation:
38

39
40
  my $tran = new Bio::EnsEMBL::Transcript();
  my $tran = new Bio::EnsEMBL::Transcript( -EXONS => \@exons );
41
42
43

Manipulation:

44
45
  # Returns an array of Exon objects
  my @exons = @{ $tran->get_all_Exons() };
46

47
48
49
50
51
52
  # Returns the peptide translation of the exons as a Bio::Seq
  if ( $tran->translation() ) {
    my $pep = $tran->translate();
  } else {
    print "Transcript ", $tran->stable_id(), " is non-coding\n";
  }
53

54
55
=head1 DESCRIPTION

56
A representation of a transcript within the Ensembl system.  A transcript
57
58
consists of a set of Exons and (possibly) a Translation which defines the
coding and non-coding regions of the exons.
Ewan Birney's avatar
Ewan Birney committed
59

60
=cut
61

62
63
package Bio::EnsEMBL::Transcript;

Ewan Birney's avatar
Ewan Birney committed
64
65
use strict;

66
use Bio::EnsEMBL::Feature;
67
use Bio::EnsEMBL::UTR;
Ian Longden's avatar
Ian Longden committed
68
use Bio::EnsEMBL::Intron;
69
70
use Bio::EnsEMBL::ExonTranscript;
use Bio::EnsEMBL::CDS;
71
use Bio::EnsEMBL::TranscriptMapper;
72
use Bio::EnsEMBL::SeqEdit;
73

74
75
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
use Bio::EnsEMBL::Utils::Exception qw( deprecate warning throw );
76
use Bio::EnsEMBL::Utils::Scalar qw( assert_ref );
77

78
use parent qw(Bio::EnsEMBL::Feature);
Ewan Birney's avatar
Ewan Birney committed
79

Graham McVicker's avatar
Graham McVicker committed
80
81
82
83

=head2 new

  Arg [-EXONS] :
84
        reference to list of Bio::EnsEMBL::Exon objects - exons which make up
Graham McVicker's avatar
Graham McVicker committed
85
86
87
88
89
90
91
92
93
94
95
96
97
98
        this transcript
  Arg [-STABLE_ID] :
        string - the stable identifier of this transcript
  Arg [-VERSION] :
        int - the version of the stable identifier of this transcript
  Arg [-EXTERNAL_NAME] :
        string - the external database name associated with this transcript
  Arg [-EXTERNAL_DB] :
        string - the name of the database the external name is from
  Arg [-EXTERNAL_STATUS]:
        string - the status of the external identifier
  Arg [-DISPLAY_XREF]:
        Bio::EnsEMBL::DBEntry - The external database entry that is used
        to label this transcript when it is displayed.
99
100
101
102
103
  Arg [-CREATED_DATE]:
        string - the date the transcript was created
  Arg [-MODIFIED_DATE]:
        string - the date the transcript was last modified
  Arg [-DESCRIPTION]:
104
        string - the transcripts description
105
  Arg [-BIOTYPE]:
106
        string - the biotype e.g. "protein_coding"
107
108
  Arg [-IS_CURRENT]:
        Boolean - specifies if this is the current version of the transcript
109
110
111
  Arg [-SOURCE]:
        string - the transcript source, e.g. "ensembl"

Graham McVicker's avatar
Graham McVicker committed
112
113
114
115
116
  Example    : $tran = new Bio::EnsEMBL::Transcript(-EXONS => \@exons);
  Description: Constructor. Instantiates a Transcript object.
  Returntype : Bio::EnsEMBL::Transcript
  Exceptions : throw on bad arguments
  Caller     : general
117
  Status     : Stable
Graham McVicker's avatar
Graham McVicker committed
118
119
120

=cut

121
sub new {
122
  my $proto = shift;
123

124
  my $class = ref($proto) || $proto;
125

126
  my $self = $class->SUPER::new(@_);
Ewan Birney's avatar
Ewan Birney committed
127

128
129
130
131
132
  my (
    $exons,            $stable_id,    $version,
    $external_name,    $external_db,  $external_status,
    $display_xref,     $created_date, $modified_date,
    $description,      $biotype,      $confidence,
133
    $external_db_name, $is_current,
134
    $source
135
  );
136

137
138
  # Catch for old style constructor calling:
  if ( ( @_ > 0 ) && ref( $_[0] ) ) {
139
    $exons = [@_];
140
141
142
143
144
145
146
147
148
    deprecate( "Transcript constructor should use named arguments.\n"
        . "Use Bio::EnsEMBL::Transcript->new(-EXONS => \@exons);\n"
        . "instead of Bio::EnsEMBL::Transcript->new(\@exons);" );
  } else {
    (
      $exons,            $stable_id,    $version,
      $external_name,    $external_db,  $external_status,
      $display_xref,     $created_date, $modified_date,
      $description,      $biotype,      $confidence,
149
      $external_db_name, $is_current,
150
      $source
151
152
153
154
155
156
157
158
      )
      = rearrange( [
        'EXONS',            'STABLE_ID',
        'VERSION',          'EXTERNAL_NAME',
        'EXTERNAL_DB',      'EXTERNAL_STATUS',
        'DISPLAY_XREF',     'CREATED_DATE',
        'MODIFIED_DATE',    'DESCRIPTION',
        'BIOTYPE',          'CONFIDENCE',
159
        'EXTERNAL_DB_NAME',
160
        'IS_CURRENT',       'SOURCE'
161
162
163
      ],
      @_
      );
164
  }
165
166

  if ($exons) {
167
168
    $self->{'_trans_exon_array'} = $exons;
    $self->recalculate_coordinates();
169
  }
170

171
172
  $self->stable_id($stable_id);
  $self->{'created_date'}  = $created_date;
173
  $self->{'modified_date'} = $modified_date;
174
175
176
177
178
  $self->external_name($external_name) if ( defined $external_name );
  $self->external_db($external_db)     if ( defined $external_db );
  $self->external_status($external_status)
    if ( defined $external_status );
  $self->display_xref($display_xref) if ( defined $display_xref );
179
  $self->edits_enabled(1);
180

181
  $self->description($description);
182
183

  # keep legacy behaviour of defaulting to 'protein_coding' biotype
184
  $self->{'biotype'} = $biotype;
185

186
  $self->source($source);
187

188
189
190
191
  # Default version
  if ( !defined($version) ) { $version = 1 }
  $self->{'version'} = $version;

192
  # default is_current
193
  $is_current = 1 unless ( defined($is_current) );
194
  $self->{'is_current'} = $is_current;
195

196
  return $self;
197
} ## end sub new
198

Graham McVicker's avatar
Graham McVicker committed
199
=head2 get_all_DBLinks
200

201
202
203
204
  Arg [1]    : String database name (optional)
               SQL wildcard characters (_ and %) can be used to
               specify patterns.

205
206
207
208
209
  Arg [2]    : (optional) String, external database type, can be one of
               ('ARRAY','ALT_TRANS','ALT_GENE','MISC','LIT','PRIMARY_DB_SYNONYM','ENSEMBL'),
               SQL wildcard characters (_ and %) can be used to
               specify patterns.

210
  Example    : my @dblinks = @{ $transcript->get_all_DBLinks() };
211
212
               @dblinks = @{ $transcript->get_all_DBLinks('Uniprot%') };}
               @dblinks = @{ $transcript->get_all_DBLinks('%', 'ENSEMBL') };
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228

  Description: Retrieves *all* related DBEntries for this
               transcript.  This includes all DBEntries that are
               associated with the corresponding translation.

               If you only want to retrieve the DBEntries associated
               with the transcript (and not the translation) then
               you should use the get_all_DBEntries() call instead.

               Note: Each entry may be listed more than once.  No
               uniqueness checks are done.  Also if you put in an
               incorrect external database name no checks are done
               to see if this exists, you will just get an empty
               list.

  Return type: Listref of Bio::EnsEMBL::DBEntry objects
229
230
  Exceptions : none
  Caller     : general
231
  Status     : Stable
232
233
234

=cut

Graham McVicker's avatar
Graham McVicker committed
235
sub get_all_DBLinks {
236
  my ( $self, $db_name_exp, $ex_db_type ) = @_;
237

238
239
  my @links =
    @{ $self->get_all_DBEntries( $db_name_exp, $ex_db_type ) };
240

241
  # Add all of the transcript and translation xrefs to the return list.
242
243
244
  my $translation = $self->translation();
  if ( defined($translation) ) {
    push( @links,
245
          @{$translation->get_all_DBEntries( $db_name_exp, $ex_db_type ) }
246
247
    );
  }
248

249
  @links = sort { _compare_xrefs() } @links;
250

251
  return \@links;
252
253
}

254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
=head2 get_all_xrefs

  Arg [1]    : String database name (optional)
               SQL wildcard characters (_ and %) can be used to
               specify patterns.

  Example    : @xrefs = @{ $transcript->get_all_xrefs() };
               @xrefs = @{ $transcript->get_all_xrefs('Uniprot%') };

  Description: Retrieves *all* related xrefs for this transcript.
               This includes all xrefs that are associated with the
               corresponding translation of this transcript.

               If you want to retrieve the xrefs associated with
               only the transcript (and not the translation) then
               you should use the get_all_object_xrefs() method
               instead.

               Note: Each entry may be listed more than once.  No
               uniqueness checks are done.  Also if you put in an
               incorrect external database name no checks are done
               to see if this exists, you will just get an empty
               list.

                NB: This method is an alias for the
                    get_all_DBLinks() method.

  Return type: Listref of Bio::EnsEMBL::DBEntry objects

  Status     : Stable

=cut

287
288
289
290
sub get_all_xrefs {
  my $self = shift;
  return $self->get_all_DBLinks(@_);
}
Graham McVicker's avatar
Graham McVicker committed
291

292
=head2 get_all_DBEntries
293

294
295
296
  Arg [1]    : (optional) String, external database name,
               SQL wildcard characters (_ and %) can be used to
               specify patterns.
297

298
299
300
301
  Arg [2]    : (optional) String, external database type, can be one of
               ('ARRAY','ALT_TRANS','ALT_GENE','MISC','LIT','PRIMARY_DB_SYNONYM','ENSEMBL'),
               SQL wildcard characters (_ and %) can be used to
               specify patterns.
302

303
  Example    : my @dbentries = @{ $transcript->get_all_DBEntries() };
304
305
               @dbentries = @{ $transcript->get_all_DBEntries('Uniprot%') };}
               @dbentries = @{ $transcript->get_all_DBEntries('%', 'ENSEMBL') };}
306
307
308
309
310
311
312
313
314
315

  Description: Retrieves DBEntries (xrefs) for this transcript.
               This does *not* include the corresponding
               translations DBEntries (see get_all_DBLinks()).

               This method will attempt to lazy-load DBEntries
               from a database if an adaptor is available and no
               DBEntries are present on the transcript (i.e. they
               have not already been added or loaded).

316
  Returntype : Listref of Bio::EnsEMBL::DBEntry objects
317
318
  Exceptions : none
  Caller     : get_all_DBLinks, TranscriptAdaptor::store
319
  Status     : Stable
320
321
322

=cut

323
sub get_all_DBEntries {
324
  my ( $self, $ex_db_exp, $ex_db_type ) = @_;
325

326
  my $cache_name = 'dbentries';
327

328
  if ( defined($ex_db_exp) ) {
329
330
    $cache_name .= $ex_db_exp;
  }
331

332
  if ( defined($ex_db_type) ) {
Ian Longden's avatar
Ian Longden committed
333
    $cache_name .= $ex_db_type;
334
  }
335

336
337
338
  # if not cached, retrieve all of the xrefs for this transcript
  if ( !defined( $self->{$cache_name} ) && defined( $self->adaptor() ) )
  {
339
    $self->{$cache_name} =
340
341
      $self->adaptor()->db()->get_DBEntryAdaptor()
      ->fetch_all_by_Transcript( $self, $ex_db_exp, $ex_db_type );
342
  }
343

344
  $self->{$cache_name} ||= [];
345

346
  return $self->{$cache_name};
347
} ## end sub get_all_DBEntries
348

349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
=head2 get_all_object_xrefs

  Arg [1]    : (optional) String, external database name

  Arg [2]    : (optional) String, external_db type

  Example    : @oxrefs = @{ $transcript->get_all_object_xrefs() };

  Description: Retrieves xrefs for this transcript.  This does
               *not* include xrefs that are associated with the
               corresponding translations of this transcript (see
               get_all_xrefs()).

               This method will attempt to lazy-load xrefs from a
               database if an adaptor is available and no xrefs are
               present on the transcript (i.e. they have not already
               been added or loaded).

367
368
                NB: This method is an alias for the
                    get_all_DBentries() method.
369
370
371
372
373
374
375

  Return type: Listref of Bio::EnsEMBL::DBEntry objects

  Status     : Stable

=cut

376
377
378
379
sub get_all_object_xrefs {
  my $self = shift;
  return $self->get_all_DBEntries(@_);
}
380
381

=head2 add_DBEntry
382

383
384
  Arg [1]    : Bio::EnsEMBL::DBEntry $dbe
               The dbEntry to be added
385
386
387
388
389
  Example    : my $dbe = Bio::EnsEMBL::DBEntery->new(...);
               $transcript->add_DBEntry($dbe);
  Description: Associates a DBEntry with this transcript. Note that adding
               DBEntries will prevent future lazy-loading of DBEntries for this
               gene (see get_all_DBEntries).
390
391
392
  Returntype : none
  Exceptions : thrown on incorrect argument type
  Caller     : general
393
  Status     : Stable
394
395
396
397
398
399
400
401

=cut

sub add_DBEntry {
  my $self = shift;
  my $dbe = shift;

  unless($dbe && ref($dbe) && $dbe->isa('Bio::EnsEMBL::DBEntry')) {
402
    throw('Expected DBEntry argument');
403
404
405
406
  }

  $self->{'dbentries'} ||= [];
  push @{$self->{'dbentries'}}, $dbe;
407
408
}

409

410
411
=head2 get_all_supporting_features

412
  Example    : my @evidence = @{ $transcript->get_all_supporting_features };
413
  Description: Retrieves any supporting features added manually by
414
               calls to add_supporting_features.
415
  Returntype : Listref of Bio::EnsEMBL::FeaturePair objects
416
417
  Exceptions : none
  Caller     : general
418
  Status     : Stable
419
420
421
422
423

=cut

sub get_all_supporting_features {
  my $self = shift;
424
425
426
427
428
429
430

  if( !exists  $self->{_supporting_evidence} )  {
    if($self->adaptor) {
      my $tsfa = $self->adaptor->db->get_TranscriptSupportingFeatureAdaptor();
      $self->{_supporting_evidence} = $tsfa->fetch_all_by_Transcript($self);
    }
  }
431

432
433
434
435
436
437
  return $self->{_supporting_evidence} || [];
}


=head2 add_supporting_features

438
439
440
  Arg [1-N]  : Bio::EnsEMBL::FeaturePair $feature
               The supporting features to add
  Example    : $transcript->add_supporting_features(@features);
441
  Description: Adds a list of supporting features to this Transcript.
442
443
               The added features can be retieved by
               get_all_supporting_features().
444
445
446
447
448
  Returntype : none
  Exceptions : throw if any of the features are not FeaturePairs
               throw if any of the features are not in the same coordinate
               system as the Transcript
  Caller     : general
449
  Status     : Stable
450

451
=cut
452

453
sub add_supporting_features {
454
  my ($self, @features) = @_;
455
456

  return unless @features;
457

458
  $self->{_supporting_evidence} ||= [];
459

460
461
462
  # check whether this feature object has been added already
  FEATURE: foreach my $feature (@features) {

463
464
465
466
467
    if (!defined($feature) || ref($feature) eq "ARRAY") {
      throw("Element in transcript supporting features array is undefined or is an ARRAY for " . $self->dbID);
    }
    if (!$feature || !$feature->isa("Bio::EnsEMBL::FeaturePair")) {
      print "feature = " . $feature . "\n";
468
469
      throw("Supporting feat [$feature] not a " .
            "Bio::EnsEMBL::FeaturePair");
470
471
    }

472
    if ((defined $self->slice() && defined $feature->slice())&&
473
      ( $self->slice()->name() ne $feature->slice()->name())){
474
475
      throw("Supporting feat not in same coord system as transcript\n" .
            "transcript is attached to [".$self->slice()->name()."]\n" .
476
477
478
479
480
481
            "feat is attached to [".$feature->slice()->name()."]");
    }

    foreach my $added_feature ( @{ $self->{_supporting_evidence} } ){
      # compare objects
      if ( $feature == $added_feature ){
482
483
  #this feature has already been added
  next FEATURE;
484
485
      }
    }
486

487
    #no duplicate was found, add the feature
488
    push(@{$self->{_supporting_evidence}}, $feature);
489
490
491
492
  }
}


493
494
495
496
497
498
499
=head2 flush_supporting_features

  Example     : $transcript->flush_supporting_features;
  Description : Removes all supporting evidence from the transcript.
  Return type : (Empty) listref
  Exceptions  : none
  Caller      : general
500
  Status      : Stable
501
502
503
504
505
506
507
508
509

=cut

sub flush_supporting_features {
  my $self = shift;
  $self->{'_supporting_evidence'} = [];
}


510
511
=head2 external_db

512
513
  Arg [1]    : (optional) String - name of external db to set
  Example    : $transcript->external_db('HGNC');
514
515
  Description: Getter/setter for attribute external_db. The db is the one that
               belongs to the external_name.
516
517
518
519
  Returntype : String
  Exceptions : none
  Caller     : general
  Status     : Stable
520
521
522
523

=cut

sub external_db {
524
  my ( $self, $ext_dbname ) = @_;
525

526
  if(defined $ext_dbname) {
527
    return ( $self->{'external_db'} = $ext_dbname );
528
  }
529

530
531
  if( exists $self->{'external_db'} ) {
    return $self->{'external_db'};
532
  }
533

534
  my $display_xref = $self->display_xref();
535

536
537
538
539
540
  if( defined $display_xref ) {
    return $display_xref->dbname()
  } else {
    return undef;
  }
541
542
}

543

544
545
=head2 external_status

546
547
548
549
550
551
552
553
  Arg [1]    : (optional) String - status of the external db
  Example    : $transcript->external_status('KNOWNXREF');
  Description: Getter/setter for attribute external_status. The status of
               the external db of the one that belongs to the external_name.
  Returntype : String
  Exceptions : none
  Caller     : general
  Status     : Stable
554
555
556

=cut

557
sub external_status {
558
559
560
  my ( $self, $ext_status ) = @_;

  if(defined $ext_status) {
561
    return ( $self->{'external_status'} = $ext_status );
562
563
  }

564
565
  if( exists $self->{'external_status'} ) {
    return $self->{'external_status'};
566
567
568
569
570
571
572
573
574
575
576
577
  }

  my $display_xref = $self->display_xref();

  if( defined $display_xref ) {
    return $display_xref->status()
  } else {
    return undef;
  }
}


578
579
=head2 external_name

580
581
582
583
584
585
586
  Arg [1]    : (optional) String - the external name to set
  Example    : $transcript->external_name('BRCA2-001');
  Description: Getter/setter for attribute external_name.
  Returntype : String or undef
  Exceptions : none
  Caller     : general
  Status     : Stable
587
588
589
590

=cut

sub external_name {
591
  my ($self, $ext_name) = @_;
592

593
  if(defined $ext_name) {
594
    return ( $self->{'external_name'} = $ext_name );
595
  }
596

597
598
  if( exists $self->{'external_name'} ) {
    return $self->{'external_name'};
599
  }
600

601
  my $display_xref = $self->display_xref();
602

603
604
605
606
607
  if( defined $display_xref ) {
    return $display_xref->display_id()
  } else {
    return undef;
  }
608
609
}

610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
=head2 source

  Arg [1]    : (optional) String - the source to set
  Example    : $transcript->source('ensembl');
  Description: Getter/setter for attribute source
  Returntype : String
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub source {
  my $self = shift;
  $self->{'source'} = shift if( @_ );
  return ( $self->{'source'} || "ensembl" );
}
627

Alistair Rust's avatar
Alistair Rust committed
628
=head2 display_xref
629

630
631
632
  Arg [1]    : (optional) Bio::EnsEMBL::DBEntry - the display xref to set
  Example    : $transcript->display_xref($db_entry);
  Description: Getter/setter for display_xref for this transcript.
633
  Returntype : Bio::EnsEMBL::DBEntry
634
635
  Exceptions : none
  Caller     : general
636
  Status     : Stable
637
638
639

=cut

640
sub display_xref {
641
642
643
  my $self = shift;
  $self->{'display_xref'} = shift if(@_);
  return $self->{'display_xref'};
Ewan Birney's avatar
Ewan Birney committed
644
645
}

646
647
648
649
650
651
652
=head2 is_canonical

  Args [1]      : (optional) Boolean is_canonical

  Example       : if ($transcript->is_canonical()) { ... }

  Description : Returns true (non-zero) if the transcript is the
Andy Yates's avatar
Andy Yates committed
653
654
655
656
657
                canonical transcript of its gene, false (0) if not. If the code
                returns an undefined it is because its state is not currently
                known. Internally the code will consult the database for this
                value if it is unknown and the transcript has a dbID and an
                attached adaptor
658
659
660
661
662
663
664
665
666

  Return type   : Boolean

  Status        : Stable

=cut

sub is_canonical {
  my ( $self, $value ) = @_;
667

668
669
  #Shortcut call
  return $self->{is_canonical} if defined $self->{is_canonical};
670

671
  if ( defined($value) ) {
672
673
674
675
676
677
    $self->{is_canonical} = ( $value ? 1 : 0 );
  }
  else {
    if(! defined $self->{is_canonical} && $self->dbID() && $self->adaptor()) {
      $self->{is_canonical} = $self->adaptor()->is_Transcript_canonical($self);
    }
678
679
  }

680
  return $self->{is_canonical};
681
}
Ewan Birney's avatar
Ewan Birney committed
682
683
684

=head2 translation

685
686
687
  Args       : None
  Example    : if ( $transcript->translation() ) {
                 print( $transcript->translation()->stable_id(), "\n" );
688
               } else {
689
                 print("Pseudogene\n");
690
               }
691
692
693
694
695
696
697
  Description: Getter/setter for the Translation object which
               defines the CDS (and as a result the peptide encoded
               by) this transcript.  This function will return
               undef if this transcript is a pseudogene, i.e. a
               non-translating transcript such as an ncRNA.  This
               is the accepted method of determining whether a
               transcript is a pseudogene or not.
698
  Returntype : Bio::EnsEMBL::Translation
699
700
  Exceptions : none
  Caller     : general
701
  Status     : Stable
Ewan Birney's avatar
Ewan Birney committed
702
703
704

=cut

Michele Clamp's avatar
Michele Clamp committed
705
sub translation {
706
  my ( $self, $translation ) = @_;
707

708
  if ( defined($translation) ) {
709
    assert_ref( $translation, 'Bio::EnsEMBL::Translation' );
710

711
712
    $self->{'translation'} = $translation;
    $translation->transcript($self);
713

714
715
    $self->{'cdna_coding_start'} = undef;
    $self->{'cdna_coding_end'}   = undef;
716
717
718
719

    $self->{'coding_region_start'} = undef;
    $self->{'coding_region_end'}   = undef;

720
    $self->{'transcript_mapper'} = undef;
721

722
723
724
  } elsif ( @_ > 1 ) {
    if ( defined( $self->{'translation'} ) ) {
      # Removing existing translation
725

726
727
      $self->{'translation'}->transcript(undef);
      delete( $self->{'translation'} );
728

729
730
      $self->{'cdna_coding_start'} = undef;
      $self->{'cdna_coding_end'}   = undef;
731

732
733
      $self->{'coding_region_start'} = undef;
      $self->{'coding_region_end'}   = undef;
734

735
736
      $self->{'transcript_mapper'} = undef;
    }
737
  } elsif (   !exists( $self->{'translation'} )
738
            && defined( $self->adaptor() ) )
739
  {
740
    $self->{'translation'} =
741
742
      $self->adaptor()->db()->get_TranslationAdaptor()
      ->fetch_by_Transcript($self);
743
  }
744

745
  return $self->{'translation'};
746
} ## end sub translation
Ewan Birney's avatar
Ewan Birney committed
747

748
749
750
751
752
753
754
755
756
757
758
=head2 get_all_alternative_translations

  Args       : None
  Example    :

    my @alt_translations =
      @{ $transcript->get_all_alternative_translations() };

  Description:  Fetches all alternative translations defined for this
                transcript.  The canonical translation is not returned.

759
  Returntype : Arrayref to Bio::EnsEMBL::Translation
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
760
  Exceptions : None
761
762
763
764
765
766
767
768
  Caller     : General
  Status     : Stable

=cut

sub get_all_alternative_translations {
  my ($self) = @_;

769
770
771
  if (   !exists( $self->{'alternative_translations'} )
       && defined( $self->adaptor() ) )
  {
772
773
774
    my $pa = $self->adaptor()->db()->get_TranslationAdaptor();
    my @translations =
      @{ $pa->fetch_all_alternative_by_Transcript($self) };
775

776
    $self->{'alternative_translations'} = \@translations;
777
778
779
  }

  return $self->{'alternative_translations'};
780
}
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812

=head2 add_alternative_translation

  Args       : Bio::EnsEMBL::Translation $translation
  Example    :

    $transcript->add_alternative_translation($translation);

  Description: Adds an alternative translation to this transcript.
  Returntype : None
  Exceptions : None
  Caller     : General
  Status     : Stable

=cut

sub add_alternative_translation {
  my ( $self, $translation ) = @_;

  if ( !(    defined($translation)
          && ref($translation)
          && $translation->isa('Bio::EnsEMBL::Translation') ) )
  {
    throw("Bio::EnsEMBL::Translation argument expected.");
  }

  # Load the existsing alternative translations from the database if
  # they haven't already been loaded.
  $self->get_all_alternative_translations();

  push( @{ $self->{'alternative_translations'} }, $translation );
}
Arne Stabenau's avatar
Arne Stabenau committed
813

814
815
=head2 spliced_seq

816
817
  Args       : soft_mask (opt)
               if specified, will return a sequence where UTR regions are lowercased
818
819
  Description: Retrieves all Exon sequences and concats them together.
               No phase padding magic is done, even if phases do not align.
820
  Returntype : Text
821
822
  Exceptions : none
  Caller     : general
823
  Status     : Stable
824
825
826
827

=cut

sub spliced_seq {
828
  my ( $self, $soft_mask ) = @_;
829

830
831
  my $seq_string = "";
  for my $ex ( @{$self->get_all_Exons()} ) {
832
833
834
835
836
837
    my $seq = $ex->seq();
    if(!$seq) {
      warning("Could not obtain seq for exon.  Transcript sequence may not " .
              "be correct.");
      $seq_string .= 'N' x $ex->length();
    } else {
838
839
840
841
842
      my $exon_seq = $seq->seq();
      if ($soft_mask) {
        my $padstr;
        if (!defined ($ex->coding_region_start($self))) {
          $exon_seq = lc($exon_seq);
843
844
845
        }

        if ($ex->coding_region_start($self) > $ex->start()) {
846
847
848
          my $forward_length = $ex->coding_region_start($self) - $ex->start();
          my $reverse_length = $ex->end() - $ex->coding_region_start($self);
          if ($ex->strand == 1) {
849
            $exon_seq = lc (substr($exon_seq, 0, $forward_length)) . substr($exon_seq, $forward_length);
850
          } else {
851
            $exon_seq = substr($exon_seq, 0, $reverse_length+1) . lc(substr($exon_seq, $reverse_length+1));
852
          }
853
854
855
        }

        if ($ex->coding_region_end($self) < $ex->end()) {
856
857
858
859
860
861
862
          my $forward_length = $ex->coding_region_end($self) - $ex->start();
          my $reverse_length = $ex->end() - $ex->coding_region_end($self);
          if ($ex->strand == 1) {
            $exon_seq = substr($exon_seq, 0, $forward_length+1) . lc(substr($exon_seq, $forward_length+1));
          } else {
            $exon_seq = lc(substr($exon_seq, 0, $reverse_length)) . substr($exon_seq, $reverse_length);
          }
863
864
865
        }
      }
      $seq_string .= $exon_seq;
866
    }
867
868
  }

869
870
871
  # apply post transcriptional edits
  if($self->edits_enabled()) {
    my @seqeds = @{$self->get_all_SeqEdits()};
872

873
874
875
    # sort edits in reverse order to remove complication of
    # adjusting downstream edits
    @seqeds = sort {$b->start() <=> $a->start()} @seqeds;
876

877
878
    foreach my $se (@seqeds) {
      $se->apply_edit(\$seq_string);
879
    }
880
  }
881

882
  return $seq_string;
883
884
885
886
887
888
889
}


=head2 translateable_seq

  Args       : none
  Example    : print $transcript->translateable_seq(), "\n";
890
  Description: Returns a sequence string which is the the translateable part
891
               of the transcripts sequence.  This is formed by splicing all
892
893
894
895
896
               Exon sequences together and apply all defined RNA edits.
               Then the coding part of the sequence is extracted and returned.
               The code will not support monkey exons any more. If you want to
               have non phase matching exons, defined appropriate _rna_edit
               attributes!
897
898
899

               An empty string is returned if this transcript is a pseudogene
               (i.e. is non-translateable).
900
  Returntype : Text
901
902
  Exceptions : none
  Caller     : general
903
  Status     : Stable
904
905
906
907
908
909

=cut

sub translateable_seq {
  my ( $self ) = @_;

910
  if ( !$self->translation() ) {
911
912
913
    return '';
  }

914
  my $mrna = $self->spliced_seq();
915

916
  my $start = $self->cdna_coding_start();
917
  my $end   = $self->cdna_coding_end();
918

919
  $mrna = substr( $mrna, $start - 1, $end - $start + 1 );
920

921
922
923
924
  my $start_phase = $self->translation->start_Exon->phase();
  if( $start_phase > 0 ) {
    $mrna = "N"x$start_phase . $mrna;
  }
925
926
  if( ! $start || ! $end ) {
    return "";
927
  }
928

929
  return $mrna;
930
931
}

Arne Stabenau's avatar
Arne Stabenau committed
932

933
934
935
936
937
938
939
=head2 cdna_coding_start

  Arg [1]    : (optional) $value
  Example    : $relative_coding_start = $transcript->cdna_coding_start;
  Description: Retrieves the position of the coding start of this transcript
               in cdna coordinates (relative to the start of the 5prime end of
               the transcript, excluding introns, including utrs).
940
941
942

               This will return undef if this is a pseudogene (i.e. a
               transcript with no translation).
943
944
945
  Returntype : int
  Exceptions : none
  Caller     : five_prime_utr, get_all_snps, general
946
  Status     : Stable
947
948
949
950

=cut

sub cdna_coding_start {
951
952
953
954
  my $self = shift;

  if( @_ ) {
    $self->{'cdna_coding_start'} = shift;
955
  }
956

957
  if(!defined $self->{'cdna_coding_start'} && defined $self->translation){
958
    # calc coding start relative from the start of translation (in cdna coords)
959
960
961
    my $start = 0;

    my @exons = @{$self->get_all_Exons};
962
963
    my $exon;

964
    while($exon = shift @exons) {
965
      if($exon == $self->translation->start_Exon) {
966
967
968
        #add the utr portion of the start exon
        $start += $self->translation->start;
        last;
969
      } else {
970
971
        #add the entire length of this non-coding exon
        $start += $exon->length;