Transcript.pm 86.9 KB
Newer Older
1
2
=head1 LICENSE

Magali Ruffier's avatar
Magali Ruffier committed
3
Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Magali Ruffier's avatar
Magali Ruffier committed
4
Copyright [2016-2017] EMBL-European Bioinformatics Institute
5

6
7
8
9
10
11
12
13
14
15
16
17
18
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

=cut
19
20
21
22
23


=head1 CONTACT

  Please email comments or questions to the public Ensembl
Magali Ruffier's avatar
Magali Ruffier committed
24
  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25
26

  Questions may also be sent to the Ensembl help desk at
Magali Ruffier's avatar
Magali Ruffier committed
27
  <http://www.ensembl.org/Help/Contact>.
28
29

=cut
Ewan Birney's avatar
Ewan Birney committed
30
31
32

=head1 NAME

33
Bio::EnsEMBL::Transcript - object representing an Ensembl transcript
Ewan Birney's avatar
Ewan Birney committed
34
35
36

=head1 SYNOPSIS

37
Creation:
38

39
40
  my $tran = new Bio::EnsEMBL::Transcript();
  my $tran = new Bio::EnsEMBL::Transcript( -EXONS => \@exons );
41
42
43

Manipulation:

44
45
  # Returns an array of Exon objects
  my @exons = @{ $tran->get_all_Exons() };
46

47
48
49
50
51
52
  # Returns the peptide translation of the exons as a Bio::Seq
  if ( $tran->translation() ) {
    my $pep = $tran->translate();
  } else {
    print "Transcript ", $tran->stable_id(), " is non-coding\n";
  }
53

54
55
=head1 DESCRIPTION

56
A representation of a transcript within the Ensembl system.  A transcript
57
58
consists of a set of Exons and (possibly) a Translation which defines the
coding and non-coding regions of the exons.
Ewan Birney's avatar
Ewan Birney committed
59

60
=cut
61

62
63
package Bio::EnsEMBL::Transcript;

Ewan Birney's avatar
Ewan Birney committed
64
65
use strict;

66
use Bio::EnsEMBL::Feature;
67
use Bio::EnsEMBL::UTR;
Ian Longden's avatar
Ian Longden committed
68
use Bio::EnsEMBL::Intron;
69
70
use Bio::EnsEMBL::ExonTranscript;
use Bio::EnsEMBL::CDS;
71
use Bio::EnsEMBL::TranscriptMapper;
72
use Bio::EnsEMBL::SeqEdit;
73

74
75
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
use Bio::EnsEMBL::Utils::Exception qw( deprecate warning throw );
76
use Bio::EnsEMBL::Utils::Scalar qw( assert_ref );
77

78
use vars qw(@ISA);
79
@ISA = qw(Bio::EnsEMBL::Feature);
Ewan Birney's avatar
Ewan Birney committed
80

Graham McVicker's avatar
Graham McVicker committed
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99

=head2 new

  Arg [-EXONS] :
        reference to list of Bio::EnsEMBL::Exon objects - exons which make up 
        this transcript
  Arg [-STABLE_ID] :
        string - the stable identifier of this transcript
  Arg [-VERSION] :
        int - the version of the stable identifier of this transcript
  Arg [-EXTERNAL_NAME] :
        string - the external database name associated with this transcript
  Arg [-EXTERNAL_DB] :
        string - the name of the database the external name is from
  Arg [-EXTERNAL_STATUS]:
        string - the status of the external identifier
  Arg [-DISPLAY_XREF]:
        Bio::EnsEMBL::DBEntry - The external database entry that is used
        to label this transcript when it is displayed.
100
101
102
103
104
  Arg [-CREATED_DATE]:
        string - the date the transcript was created
  Arg [-MODIFIED_DATE]:
        string - the date the transcript was last modified
  Arg [-DESCRIPTION]:
105
        string - the transcripts description
106
107
  Arg [-BIOTYPE]: 
        string - the biotype e.g. "protein_coding"
108
109
  Arg [-STATUS]:
        string - the transcripts status i.e. "KNOWN","NOVEL"
110
111
  Arg [-IS_CURRENT]:
        Boolean - specifies if this is the current version of the transcript
112
113
114
  Arg [-SOURCE]:
        string - the transcript source, e.g. "ensembl"

Graham McVicker's avatar
Graham McVicker committed
115
116
117
118
119
  Example    : $tran = new Bio::EnsEMBL::Transcript(-EXONS => \@exons);
  Description: Constructor. Instantiates a Transcript object.
  Returntype : Bio::EnsEMBL::Transcript
  Exceptions : throw on bad arguments
  Caller     : general
120
  Status     : Stable
Graham McVicker's avatar
Graham McVicker committed
121
122
123

=cut

124
sub new {
125
  my $proto = shift;
126

127
  my $class = ref($proto) || $proto;
128

129
  my $self = $class->SUPER::new(@_);
Ewan Birney's avatar
Ewan Birney committed
130

131
132
133
134
135
  my (
    $exons,            $stable_id,    $version,
    $external_name,    $external_db,  $external_status,
    $display_xref,     $created_date, $modified_date,
    $description,      $biotype,      $confidence,
136
137
    $external_db_name, $status,       $is_current,
    $source
138
  );
139

140
141
  # Catch for old style constructor calling:
  if ( ( @_ > 0 ) && ref( $_[0] ) ) {
142
    $exons = [@_];
143
144
145
146
147
148
149
150
151
    deprecate( "Transcript constructor should use named arguments.\n"
        . "Use Bio::EnsEMBL::Transcript->new(-EXONS => \@exons);\n"
        . "instead of Bio::EnsEMBL::Transcript->new(\@exons);" );
  } else {
    (
      $exons,            $stable_id,    $version,
      $external_name,    $external_db,  $external_status,
      $display_xref,     $created_date, $modified_date,
      $description,      $biotype,      $confidence,
152
153
      $external_db_name, $status,       $is_current,
      $source
154
155
156
157
158
159
160
161
162
      )
      = rearrange( [
        'EXONS',            'STABLE_ID',
        'VERSION',          'EXTERNAL_NAME',
        'EXTERNAL_DB',      'EXTERNAL_STATUS',
        'DISPLAY_XREF',     'CREATED_DATE',
        'MODIFIED_DATE',    'DESCRIPTION',
        'BIOTYPE',          'CONFIDENCE',
        'EXTERNAL_DB_NAME', 'STATUS',
163
        'IS_CURRENT',       'SOURCE'
164
165
166
      ],
      @_
      );
167
  }
168
169

  if ($exons) {
170
171
    $self->{'_trans_exon_array'} = $exons;
    $self->recalculate_coordinates();
172
  }
173

174
175
176
  $self->stable_id($stable_id);
  $self->version($version);
  $self->{'created_date'}  = $created_date;
177
  $self->{'modified_date'} = $modified_date;
178
179
180
181
182
  $self->external_name($external_name) if ( defined $external_name );
  $self->external_db($external_db)     if ( defined $external_db );
  $self->external_status($external_status)
    if ( defined $external_status );
  $self->display_xref($display_xref) if ( defined $display_xref );
183
  $self->edits_enabled(1);
184

185
186
187
188
  $self->description($description);
  $self->status($confidence);    # old style name
  $self->status($status);        # new style name
  $self->biotype($biotype);
189
  $self->source($source);
190
191

  # default is_current
192
  $is_current = 1 unless ( defined($is_current) );
193
  $self->{'is_current'} = $is_current;
194

195
  return $self;
196
} ## end sub new
197

Graham McVicker's avatar
Graham McVicker committed
198
=head2 get_all_DBLinks
199

200
201
202
203
  Arg [1]    : String database name (optional)
               SQL wildcard characters (_ and %) can be used to
               specify patterns.

204
  Example    : my @dblinks = @{ $transcript->get_all_DBLinks() };
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
               my @dblinks = @{ $transcript->get_all_DBLinks('Uniprot%') };

  Description: Retrieves *all* related DBEntries for this
               transcript.  This includes all DBEntries that are
               associated with the corresponding translation.

               If you only want to retrieve the DBEntries associated
               with the transcript (and not the translation) then
               you should use the get_all_DBEntries() call instead.

               Note: Each entry may be listed more than once.  No
               uniqueness checks are done.  Also if you put in an
               incorrect external database name no checks are done
               to see if this exists, you will just get an empty
               list.

  Return type: Listref of Bio::EnsEMBL::DBEntry objects
222
223
  Exceptions : none
  Caller     : general
224
  Status     : Stable
225
226
227

=cut

Graham McVicker's avatar
Graham McVicker committed
228
sub get_all_DBLinks {
229
  my ( $self, $db_name_exp, $ex_db_type ) = @_;
230

231
232
  my @links =
    @{ $self->get_all_DBEntries( $db_name_exp, $ex_db_type ) };
233

234
  # Add all of the transcript and translation xrefs to the return list.
235
236
237
  my $translation = $self->translation();
  if ( defined($translation) ) {
    push( @links,
238
          @{$translation->get_all_DBEntries( $db_name_exp, $ex_db_type ) }
239
240
    );
  }
241

242
  @links = sort { _compare_xrefs() } @links;
243

244
  return \@links;
245
246
}

247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
=head2 get_all_xrefs

  Arg [1]    : String database name (optional)
               SQL wildcard characters (_ and %) can be used to
               specify patterns.

  Example    : @xrefs = @{ $transcript->get_all_xrefs() };
               @xrefs = @{ $transcript->get_all_xrefs('Uniprot%') };

  Description: Retrieves *all* related xrefs for this transcript.
               This includes all xrefs that are associated with the
               corresponding translation of this transcript.

               If you want to retrieve the xrefs associated with
               only the transcript (and not the translation) then
               you should use the get_all_object_xrefs() method
               instead.

               Note: Each entry may be listed more than once.  No
               uniqueness checks are done.  Also if you put in an
               incorrect external database name no checks are done
               to see if this exists, you will just get an empty
               list.

                NB: This method is an alias for the
                    get_all_DBLinks() method.

  Return type: Listref of Bio::EnsEMBL::DBEntry objects

  Status     : Stable

=cut

280
281
282
283
sub get_all_xrefs {
  my $self = shift;
  return $self->get_all_DBLinks(@_);
}
Graham McVicker's avatar
Graham McVicker committed
284

285
=head2 get_all_DBEntries
286

287
  Arg [1]    : (optional) String, external database name
288

289
  Arg [2]    : (optional) String, external database type
290

291
  Example    : my @dbentries = @{ $transcript->get_all_DBEntries() };
292
293
294
295
296
297
298
299
300
301

  Description: Retrieves DBEntries (xrefs) for this transcript.
               This does *not* include the corresponding
               translations DBEntries (see get_all_DBLinks()).

               This method will attempt to lazy-load DBEntries
               from a database if an adaptor is available and no
               DBEntries are present on the transcript (i.e. they
               have not already been added or loaded).

302
  Returntype : Listref of Bio::EnsEMBL::DBEntry objects
303
304
  Exceptions : none
  Caller     : get_all_DBLinks, TranscriptAdaptor::store
305
  Status     : Stable
306
307
308

=cut

309
sub get_all_DBEntries {
310
  my ( $self, $ex_db_exp, $ex_db_type ) = @_;
311

312
  my $cache_name = 'dbentries';
313

314
  if ( defined($ex_db_exp) ) {
315
316
    $cache_name .= $ex_db_exp;
  }
317

318
  if ( defined($ex_db_type) ) {
Ian Longden's avatar
Ian Longden committed
319
    $cache_name .= $ex_db_type;
320
  }
321

322
323
324
  # if not cached, retrieve all of the xrefs for this transcript
  if ( !defined( $self->{$cache_name} ) && defined( $self->adaptor() ) )
  {
325
    $self->{$cache_name} =
326
327
      $self->adaptor()->db()->get_DBEntryAdaptor()
      ->fetch_all_by_Transcript( $self, $ex_db_exp, $ex_db_type );
328
  }
329

330
  $self->{$cache_name} ||= [];
331

332
  return $self->{$cache_name};
333
} ## end sub get_all_DBEntries
334

335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
=head2 get_all_object_xrefs

  Arg [1]    : (optional) String, external database name

  Arg [2]    : (optional) String, external_db type

  Example    : @oxrefs = @{ $transcript->get_all_object_xrefs() };

  Description: Retrieves xrefs for this transcript.  This does
               *not* include xrefs that are associated with the
               corresponding translations of this transcript (see
               get_all_xrefs()).

               This method will attempt to lazy-load xrefs from a
               database if an adaptor is available and no xrefs are
               present on the transcript (i.e. they have not already
               been added or loaded).

353
354
                NB: This method is an alias for the
                    get_all_DBentries() method.
355
356
357
358
359
360
361

  Return type: Listref of Bio::EnsEMBL::DBEntry objects

  Status     : Stable

=cut

362
363
364
365
sub get_all_object_xrefs {
  my $self = shift;
  return $self->get_all_DBEntries(@_);
}
366
367

=head2 add_DBEntry
368

369
370
  Arg [1]    : Bio::EnsEMBL::DBEntry $dbe
               The dbEntry to be added
371
372
373
374
375
  Example    : my $dbe = Bio::EnsEMBL::DBEntery->new(...);
               $transcript->add_DBEntry($dbe);
  Description: Associates a DBEntry with this transcript. Note that adding
               DBEntries will prevent future lazy-loading of DBEntries for this
               gene (see get_all_DBEntries).
376
377
378
  Returntype : none
  Exceptions : thrown on incorrect argument type
  Caller     : general
379
  Status     : Stable
380
381
382
383
384
385
386
387

=cut

sub add_DBEntry {
  my $self = shift;
  my $dbe = shift;

  unless($dbe && ref($dbe) && $dbe->isa('Bio::EnsEMBL::DBEntry')) {
388
    throw('Expected DBEntry argument');
389
390
391
392
  }

  $self->{'dbentries'} ||= [];
  push @{$self->{'dbentries'}}, $dbe;
393
394
}

395

396
397
=head2 get_all_supporting_features

398
  Example    : my @evidence = @{ $transcript->get_all_supporting_features };
399
  Description: Retrieves any supporting features added manually by 
400
               calls to add_supporting_features.
401
  Returntype : Listref of Bio::EnsEMBL::FeaturePair objects
402
403
  Exceptions : none
  Caller     : general
404
  Status     : Stable
405
406
407
408
409

=cut

sub get_all_supporting_features {
  my $self = shift;
410
411
412
413
414
415
416

  if( !exists  $self->{_supporting_evidence} )  {
    if($self->adaptor) {
      my $tsfa = $self->adaptor->db->get_TranscriptSupportingFeatureAdaptor();
      $self->{_supporting_evidence} = $tsfa->fetch_all_by_Transcript($self);
    }
  }
417
418
419
420
421
422
423
  
  return $self->{_supporting_evidence} || [];
}


=head2 add_supporting_features

424
425
426
  Arg [1-N]  : Bio::EnsEMBL::FeaturePair $feature
               The supporting features to add
  Example    : $transcript->add_supporting_features(@features);
427
  Description: Adds a list of supporting features to this Transcript.
428
429
               The added features can be retieved by
               get_all_supporting_features().
430
431
432
433
434
  Returntype : none
  Exceptions : throw if any of the features are not FeaturePairs
               throw if any of the features are not in the same coordinate
               system as the Transcript
  Caller     : general
435
  Status     : Stable
436
437
438
439
 
=cut
 
sub add_supporting_features {
440
  my ($self, @features) = @_;
441
442
443
444
445
446
447
448

  return unless @features;
 
  $self->{_supporting_evidence} ||= [];
  
  # check whether this feature object has been added already
  FEATURE: foreach my $feature (@features) {

449
450
451
452
453
    if (!defined($feature) || ref($feature) eq "ARRAY") {
      throw("Element in transcript supporting features array is undefined or is an ARRAY for " . $self->dbID);
    }
    if (!$feature || !$feature->isa("Bio::EnsEMBL::FeaturePair")) {
      print "feature = " . $feature . "\n";
454
455
456
457
458
      throw("Supporting feat [$feature] not a " .
            "Bio::EnsEMBL::FeaturePair");
    } 
    
    if ((defined $self->slice() && defined $feature->slice())&&
459
      ( $self->slice()->name() ne $feature->slice()->name())){
460
461
      throw("Supporting feat not in same coord system as transcript\n" .
            "transcript is attached to [".$self->slice()->name()."]\n" .
462
463
464
465
466
467
            "feat is attached to [".$feature->slice()->name()."]");
    }

    foreach my $added_feature ( @{ $self->{_supporting_evidence} } ){
      # compare objects
      if ( $feature == $added_feature ){
468
469
  #this feature has already been added
  next FEATURE;
470
471
472
473
      }
    }
    
    #no duplicate was found, add the feature
474
    push(@{$self->{_supporting_evidence}}, $feature);
475
476
477
478
  }
}


479
480
481
482
483
484
485
=head2 flush_supporting_features

  Example     : $transcript->flush_supporting_features;
  Description : Removes all supporting evidence from the transcript.
  Return type : (Empty) listref
  Exceptions  : none
  Caller      : general
486
  Status      : Stable
487
488
489
490
491
492
493
494
495

=cut

sub flush_supporting_features {
  my $self = shift;
  $self->{'_supporting_evidence'} = [];
}


496
497
=head2 external_db

498
499
500
501
502
503
504
505
  Arg [1]    : (optional) String - name of external db to set
  Example    : $transcript->external_db('HGNC');
  Description: Getter/setter for attribute external_db. The db is the one that 
               belongs to the external_name.  
  Returntype : String
  Exceptions : none
  Caller     : general
  Status     : Stable
506
507
508
509

=cut

sub external_db {
510
  my ( $self, $ext_dbname ) = @_;
511

512
  if(defined $ext_dbname) { 
513
    return ( $self->{'external_db'} = $ext_dbname );
514
  }
515

516
517
  if( exists $self->{'external_db'} ) {
    return $self->{'external_db'};
518
  }
519

520
  my $display_xref = $self->display_xref();
521

522
523
524
525
526
  if( defined $display_xref ) {
    return $display_xref->dbname()
  } else {
    return undef;
  }
527
528
}

529

530
531
=head2 external_status

532
533
534
535
536
537
538
539
  Arg [1]    : (optional) String - status of the external db
  Example    : $transcript->external_status('KNOWNXREF');
  Description: Getter/setter for attribute external_status. The status of
               the external db of the one that belongs to the external_name.
  Returntype : String
  Exceptions : none
  Caller     : general
  Status     : Stable
540
541
542
543
544
545
546

=cut

sub external_status { 
  my ( $self, $ext_status ) = @_;

  if(defined $ext_status) {
547
    return ( $self->{'external_status'} = $ext_status );
548
549
  }

550
551
  if( exists $self->{'external_status'} ) {
    return $self->{'external_status'};
552
553
554
555
556
557
558
559
560
561
562
563
  }

  my $display_xref = $self->display_xref();

  if( defined $display_xref ) {
    return $display_xref->status()
  } else {
    return undef;
  }
}


564
565
=head2 external_name

566
567
568
569
570
571
572
  Arg [1]    : (optional) String - the external name to set
  Example    : $transcript->external_name('BRCA2-001');
  Description: Getter/setter for attribute external_name.
  Returntype : String or undef
  Exceptions : none
  Caller     : general
  Status     : Stable
573
574
575
576

=cut

sub external_name {
577
  my ($self, $ext_name) = @_;
578

579
  if(defined $ext_name) { 
580
    return ( $self->{'external_name'} = $ext_name );
581
  }
582

583
584
  if( exists $self->{'external_name'} ) {
    return $self->{'external_name'};
585
  }
586

587
  my $display_xref = $self->display_xref();
588

589
590
591
592
593
  if( defined $display_xref ) {
    return $display_xref->display_id()
  } else {
    return undef;
  }
594
595
596
}


597
598
=head2 is_known

599
600
  Example    : print "Transcript ".$transcript->stable_id." is KNOWN\n" if
                  $transcript->is_known;
601
  Description: DEPRECATED. Returns TRUE if this gene has a status of 'KNOWN'
602
  Returntype : TRUE if known, FALSE otherwise
603
604
  Exceptions : none
  Caller     : general
605
  Status     : Stable
606
607
608

=cut

609
610
sub is_known {
  my $self = shift;
611
  deprecate("is_known is deprecated and will be removed in e90. Please consider checking supporting features instead");
612
  return ( $self->{'status'} eq "KNOWN" || $self->{'status'} eq "KNOWN_BY_PROJECTION" );
613
614
615
}


616
617
618
=head2 status

  Arg [1]    : string $status
619
  Description: DEPRECATED. get/set for attribute status
620
621
622
623
624
625
626
627
628
  Returntype : string
  Exceptions : none
  Caller     : general
  Status     : Medium Risk

=cut

sub status {
   my $self = shift;
629
  deprecate("status is deprecated and will be removed in e90. Please consider checking supporting features instead");
630
631
632
633
  $self->{'status'} = shift if( @_ );
  return $self->{'status'};
}

634
635
636
637
638
639
640
=head2 biotype

  Arg [1]    : string $biotype
  Description: get/set for attribute biotype
  Returntype : string
  Exceptions : none
  Caller     : general
641
  Status     : Stable
642
643
644
645
646
647
648

=cut

sub biotype {
   my $self = shift;
  $self->{'biotype'} = shift if( @_ );
  return ( $self->{'biotype'} || "protein_coding" );
649
650
}

651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
=head2 source

  Arg [1]    : (optional) String - the source to set
  Example    : $transcript->source('ensembl');
  Description: Getter/setter for attribute source
  Returntype : String
  Exceptions : none
  Caller     : general
  Status     : Stable

=cut

sub source {
  my $self = shift;
  $self->{'source'} = shift if( @_ );
  return ( $self->{'source'} || "ensembl" );
}
668

Alistair Rust's avatar
Alistair Rust committed
669
=head2 display_xref
670

671
672
673
  Arg [1]    : (optional) Bio::EnsEMBL::DBEntry - the display xref to set
  Example    : $transcript->display_xref($db_entry);
  Description: Getter/setter for display_xref for this transcript.
674
  Returntype : Bio::EnsEMBL::DBEntry
675
676
  Exceptions : none
  Caller     : general
677
  Status     : Stable
678
679
680

=cut

681
sub display_xref {
682
683
684
  my $self = shift;
  $self->{'display_xref'} = shift if(@_);
  return $self->{'display_xref'};
Ewan Birney's avatar
Ewan Birney committed
685
686
}

687
688
689
690
691
692
693
=head2 is_canonical

  Args [1]      : (optional) Boolean is_canonical

  Example       : if ($transcript->is_canonical()) { ... }

  Description : Returns true (non-zero) if the transcript is the
Andy Yates's avatar
Andy Yates committed
694
695
696
697
698
                canonical transcript of its gene, false (0) if not. If the code
                returns an undefined it is because its state is not currently
                known. Internally the code will consult the database for this
                value if it is unknown and the transcript has a dbID and an
                attached adaptor
699
700
701
702
703
704
705
706
707

  Return type   : Boolean

  Status        : Stable

=cut

sub is_canonical {
  my ( $self, $value ) = @_;
708
709
710
711
  
  #Shortcut call
  return $self->{is_canonical} if defined $self->{is_canonical};
  
712
  if ( defined($value) ) {
713
714
715
716
717
718
    $self->{is_canonical} = ( $value ? 1 : 0 );
  }
  else {
    if(! defined $self->{is_canonical} && $self->dbID() && $self->adaptor()) {
      $self->{is_canonical} = $self->adaptor()->is_Transcript_canonical($self);
    }
719
720
  }

721
  return $self->{is_canonical};
722
}
Ewan Birney's avatar
Ewan Birney committed
723
724
725

=head2 translation

726
727
728
  Args       : None
  Example    : if ( $transcript->translation() ) {
                 print( $transcript->translation()->stable_id(), "\n" );
729
               } else {
730
                 print("Pseudogene\n");
731
               }
732
733
734
735
736
737
738
  Description: Getter/setter for the Translation object which
               defines the CDS (and as a result the peptide encoded
               by) this transcript.  This function will return
               undef if this transcript is a pseudogene, i.e. a
               non-translating transcript such as an ncRNA.  This
               is the accepted method of determining whether a
               transcript is a pseudogene or not.
739
  Returntype : Bio::EnsEMBL::Translation
740
741
  Exceptions : none
  Caller     : general
742
  Status     : Stable
Ewan Birney's avatar
Ewan Birney committed
743
744
745

=cut

Michele Clamp's avatar
Michele Clamp committed
746
sub translation {
747
  my ( $self, $translation ) = @_;
748

749
  if ( defined($translation) ) {
750
    assert_ref( $translation, 'Bio::EnsEMBL::Translation' );
751

752
753
    $self->{'translation'} = $translation;
    $translation->transcript($self);
754

755
756
    $self->{'cdna_coding_start'} = undef;
    $self->{'cdna_coding_end'}   = undef;
757
758
759
760

    $self->{'coding_region_start'} = undef;
    $self->{'coding_region_end'}   = undef;

761
    $self->{'transcript_mapper'} = undef;
762

763
764
765
  } elsif ( @_ > 1 ) {
    if ( defined( $self->{'translation'} ) ) {
      # Removing existing translation
766

767
768
      $self->{'translation'}->transcript(undef);
      delete( $self->{'translation'} );
769

770
771
      $self->{'cdna_coding_start'} = undef;
      $self->{'cdna_coding_end'}   = undef;
772

773
774
      $self->{'coding_region_start'} = undef;
      $self->{'coding_region_end'}   = undef;
775

776
777
      $self->{'transcript_mapper'} = undef;
    }
778
  } elsif (   !exists( $self->{'translation'} )
779
            && defined( $self->adaptor() ) )
780
  {
781
    $self->{'translation'} =
782
783
      $self->adaptor()->db()->get_TranslationAdaptor()
      ->fetch_by_Transcript($self);
784
  }
785

786
  return $self->{'translation'};
787
} ## end sub translation
Ewan Birney's avatar
Ewan Birney committed
788

789
790
791
792
793
794
795
796
797
798
799
=head2 get_all_alternative_translations

  Args       : None
  Example    :

    my @alt_translations =
      @{ $transcript->get_all_alternative_translations() };

  Description:  Fetches all alternative translations defined for this
                transcript.  The canonical translation is not returned.

800
  Returntype : Arrayref to Bio::EnsEMBL::Translation
Andreas Kusalananda Kähäri's avatar
Andreas Kusalananda Kähäri committed
801
  Exceptions : None
802
803
804
805
806
807
808
809
  Caller     : General
  Status     : Stable

=cut

sub get_all_alternative_translations {
  my ($self) = @_;

810
811
812
  if (   !exists( $self->{'alternative_translations'} )
       && defined( $self->adaptor() ) )
  {
813
814
815
    my $pa = $self->adaptor()->db()->get_TranslationAdaptor();
    my @translations =
      @{ $pa->fetch_all_alternative_by_Transcript($self) };
816

817
    $self->{'alternative_translations'} = \@translations;
818
819
820
  }

  return $self->{'alternative_translations'};
821
}
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853

=head2 add_alternative_translation

  Args       : Bio::EnsEMBL::Translation $translation
  Example    :

    $transcript->add_alternative_translation($translation);

  Description: Adds an alternative translation to this transcript.
  Returntype : None
  Exceptions : None
  Caller     : General
  Status     : Stable

=cut

sub add_alternative_translation {
  my ( $self, $translation ) = @_;

  if ( !(    defined($translation)
          && ref($translation)
          && $translation->isa('Bio::EnsEMBL::Translation') ) )
  {
    throw("Bio::EnsEMBL::Translation argument expected.");
  }

  # Load the existsing alternative translations from the database if
  # they haven't already been loaded.
  $self->get_all_alternative_translations();

  push( @{ $self->{'alternative_translations'} }, $translation );
}
Arne Stabenau's avatar
Arne Stabenau committed
854

855
856
=head2 spliced_seq

857
858
  Args       : soft_mask (opt)
               if specified, will return a sequence where UTR regions are lowercased
859
860
  Description: Retrieves all Exon sequences and concats them together.
               No phase padding magic is done, even if phases do not align.
861
  Returntype : Text
862
863
  Exceptions : none
  Caller     : general
864
  Status     : Stable
865
866
867
868

=cut

sub spliced_seq {
869
  my ( $self, $soft_mask ) = @_;
870

871
872
  my $seq_string = "";
  for my $ex ( @{$self->get_all_Exons()} ) {
873
874
875
876
877
878
879
    my $seq = $ex->seq();

    if(!$seq) {
      warning("Could not obtain seq for exon.  Transcript sequence may not " .
              "be correct.");
      $seq_string .= 'N' x $ex->length();
    } else {
880
881
882
883
884
885
      my $exon_seq = $seq->seq();
      if ($soft_mask) {
        my $padstr;
        if (!defined ($ex->coding_region_start($self))) {
          $exon_seq = lc($exon_seq);
        } elsif ($ex->coding_region_start($self) > $ex->start()) {
886
887
888
889
890
891
892
          my $forward_length = $ex->coding_region_start($self) - $ex->start();
          my $reverse_length = $ex->end() - $ex->coding_region_start($self);
          if ($ex->strand == 1) {
            $exon_seq = lc (substr($exon_seq, 0, $forward_length)) . substr($exon_seq, $forward_length); 
          } else {
            $exon_seq = substr($exon_seq, 0, $reverse_length) . lc(substr($exon_seq, $reverse_length));
          }
893
        } elsif ($ex->coding_region_end($self) < $ex->end()) {
894
895
896
897
898
899
900
          my $forward_length = $ex->coding_region_end($self) - $ex->start();
          my $reverse_length = $ex->end() - $ex->coding_region_end($self);
          if ($ex->strand == 1) {
            $exon_seq = substr($exon_seq, 0, $forward_length+1) . lc(substr($exon_seq, $forward_length+1));
          } else {
            $exon_seq = lc(substr($exon_seq, 0, $reverse_length)) . substr($exon_seq, $reverse_length);
          }
901
902
903
        }
      }
      $seq_string .= $exon_seq;
904
    }
905
906
  }

907
908
909
  # apply post transcriptional edits
  if($self->edits_enabled()) {
    my @seqeds = @{$self->get_all_SeqEdits()};
910

911
912
913
    # sort edits in reverse order to remove complication of
    # adjusting downstream edits
    @seqeds = sort {$b->start() <=> $a->start()} @seqeds;
914

915
916
    foreach my $se (@seqeds) {
      $se->apply_edit(\$seq_string);
917
    }
918
  }
919

920
  return $seq_string;
921
922
923
924
925
926
927
}


=head2 translateable_seq

  Args       : none
  Example    : print $transcript->translateable_seq(), "\n";
928
  Description: Returns a sequence string which is the the translateable part
929
               of the transcripts sequence.  This is formed by splicing all
930
931
932
933
934
               Exon sequences together and apply all defined RNA edits.
               Then the coding part of the sequence is extracted and returned.
               The code will not support monkey exons any more. If you want to
               have non phase matching exons, defined appropriate _rna_edit
               attributes!
935
936
937

               An empty string is returned if this transcript is a pseudogene
               (i.e. is non-translateable).
938
  Returntype : Text
939
940
  Exceptions : none
  Caller     : general
941
  Status     : Stable
942
943
944
945
946
947

=cut

sub translateable_seq {
  my ( $self ) = @_;

948
  if ( !$self->translation() ) {
949
950
951
    return '';
  }

952
  my $mrna = $self->spliced_seq();
953

954
  my $start = $self->cdna_coding_start();
955
  my $end   = $self->cdna_coding_end();
956

957
  $mrna = substr( $mrna, $start - 1, $end - $start + 1 );
958

959
960
961
962
  my $start_phase = $self->translation->start_Exon->phase();
  if( $start_phase > 0 ) {
    $mrna = "N"x$start_phase . $mrna;
  }
963
964
  if( ! $start || ! $end ) {
    return "";
965
  }
966

967
  return $mrna;
968
969
}

Arne Stabenau's avatar
Arne Stabenau committed
970

971
972
973
974
975
976
977
=head2 cdna_coding_start

  Arg [1]    : (optional) $value
  Example    : $relative_coding_start = $transcript->cdna_coding_start;
  Description: Retrieves the position of the coding start of this transcript
               in cdna coordinates (relative to the start of the 5prime end of
               the transcript, excluding introns, including utrs).
978
979
980

               This will return undef if this is a pseudogene (i.e. a
               transcript with no translation).
981
982
983
  Returntype : int
  Exceptions : none
  Caller     : five_prime_utr, get_all_snps, general
984
  Status     : Stable
985
986
987
988

=cut

sub cdna_coding_start {
989
990
991
992
  my $self = shift;

  if( @_ ) {
    $self->{'cdna_coding_start'} = shift;
993
  }
994

995
  if(!defined $self->{'cdna_coding_start'} && defined $self->translation){
996
    # calc coding start relative from the start of translation (in cdna coords)
997
998
999
    my $start = 0;

    my @exons = @{$self->get_all_Exons};
1000
1001
    my $exon;

1002
    while($exon = shift @exons) {
1003
      if($exon == $self->translation->start_Exon) {
1004
1005
1006
        #add the utr portion of the start exon
        $start += $self->translation->start;
        last;
1007
      } else {
1008
1009
        #add the entire length of this non-coding exon
        $start += $exon->length;
1010
1011
      }
    }
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026

    # adjust cdna coords if sequence edits are enabled
    if($self->edits_enabled()) {
      my @seqeds = @{$self->get_all_SeqEdits()};
      # sort in reverse order to avoid adjustment of downstream edits
      @seqeds = sort {$b->start() <=> $a->start()} @seqeds;

      foreach my $se (@seqeds) {
        # use less than start so that start of CDS can be extended
        if($se->start() < $start) {
          $start += $se->length_diff();
        }
      }
    }

Graham McVicker's avatar
Graham McVicker committed
1027
    $self->{'cdna_coding_start'} = $start;
1028
1029
1030
1031
1032
1033
1034
1035
1036
  }

  return $self->{'cdna_coding_start'};
}


=head2 cdna_coding_end

  Arg [1]    : (optional) $value