diff --git a/modules/Bio/EnsEMBL/PredictionTranscript.pm b/modules/Bio/EnsEMBL/PredictionTranscript.pm index 9ce0ab4b7dfd55ccefdccfd2730e824e377fefb4..041247253d4c89ee8e82a6341c0ebcffb07c5d8b 100644 --- a/modules/Bio/EnsEMBL/PredictionTranscript.pm +++ b/modules/Bio/EnsEMBL/PredictionTranscript.pm @@ -331,7 +331,8 @@ sub translation { =head2 translate - Args : none + Arg [1] : Boolean, emulate the behavior of old bioperl versions where + an incomplete final codon of 2 characters is padded and guessed Function : Give a peptide translation of all exons currently in the PT. Gives empty string when none is in. Returntype: a Bio::Seq as in transcript->translate() @@ -343,7 +344,7 @@ sub translation { sub translate { - my ($self) = @_; + my ($self, $complete_codon) = @_; my $dna = $self->translateable_seq(); @@ -358,6 +359,11 @@ sub translate { } $codon_table_id ||= 1; #default will be vertebrates + # Remove the final stop codon from the mrna + # sequence produced if it is present, this is so any peptide produced + # won't have a terminal stop codon + # if you want to have a terminal stop codon either comment this line out + # or call translatable seq directly and produce a translation from it if( CORE::length( $dna ) % 3 == 0 ) { # $dna =~ s/TAG$|TGA$|TAA$//i; my $codon_table = Bio::Tools::CodonTable->new( -id => $codon_table_id ); @@ -365,12 +371,21 @@ sub translate { if ( $codon_table->is_ter_codon( substr( $dna, -3, 3 ) ) ) { substr( $dna, -3, 3, '' ); } + } elsif ( CORE::length($dna) % 3 == 2 ) { + # If we have a partial codon of 2 bp we need to decide if we + # trim it or not to fix some bad behaviour in older bioperl + # versions + if ( $complete_codon ) { + # If we want to do the bad behavior of bioperl 1.6.1 and older + # where we guess the last codon if inomplete, pad an N + # to the mrna sequence + $dna .= 'N'; + } else { + # Otherwise trim those last two bp off so the behavior is + # consistent across bioperl versions + substr( $dna, -2, 2, '' ); + } } - # the above line will remove the final stop codon from the mrna - # sequence produced if it is present, this is so any peptide produced - # won't have a terminal stop codon - # if you want to have a terminal stop codon either comment this line out - # or call translatable seq directly and produce a translation from it my $bioseq = new Bio::Seq( -id => $self->display_id, -seq => $dna, diff --git a/modules/Bio/EnsEMBL/Transcript.pm b/modules/Bio/EnsEMBL/Transcript.pm index e41d0ffd818ce44016cfe892cc6ea2bd29561aae..9da16ac06bfdc2627f1ea1e808e3a233a37d8303 100755 --- a/modules/Bio/EnsEMBL/Transcript.pm +++ b/modules/Bio/EnsEMBL/Transcript.pm @@ -2092,7 +2092,8 @@ sub get_all_translateable_Exons { =head2 translate - Args : none + Arg [1] : Boolean, emulate the behavior of old bioperl versions where + an incomplete final codon of 2 characters is padded and guessed Example : none Description: Return the peptide (plus eventual stop codon) for this transcript. Does N-padding of non-phase @@ -2107,7 +2108,7 @@ sub get_all_translateable_Exons { =cut sub translate { - my ($self) = @_; + my ($self, $complete_codon) = @_; if ( !defined( $self->translation() ) ) { return undef } @@ -2152,6 +2153,20 @@ sub translate { if ( $codon_table->is_ter_codon( substr( $mrna, -3, 3 ) ) ) { substr( $mrna, -3, 3, '' ); } + } elsif ( CORE::length($mrna) % 3 == 2 ) { + # If we have a partial codon of 2 bp we need to decide if we + # trim it or not to fix some bad behaviour in older bioperl + # versions + if ( $complete_codon ) { + # If we want to do the bad behavior of bioperl 1.6.1 and older + # where we guess the last codon if inomplete, pad an N + # to the mrna sequence + $mrna .= 'N'; + } else { + # Otherwise trim those last two bp off so the behavior is + # consistent across bioperl versions + substr( $mrna, -2, 2, '' ); + } } if ( CORE::length($mrna) < 1 ) { return undef } diff --git a/modules/t/transcript.t b/modules/t/transcript.t index 77061d6a64a2880ea060527dae4eb3d8510dfdee..9b5cf356f30f12ff27deb56f08a65f6257991d9c 100644 --- a/modules/t/transcript.t +++ b/modules/t/transcript.t @@ -210,6 +210,12 @@ $tr->flush_Exons(); is( scalar( @{$tr->get_all_Exons()} ), 0, 'No exons left after flushing' ); +# Fetch a fresh tr, check incomplete codon behavior +$tr = $ta->fetch_by_stable_id( "ENST00000300425" ); + +# By default the incomplete codon should be dropped +is( $tr->translate()->seq() =~ /P$/, 1, "Incomplete codon is not translated"); +is( $tr->translate(1)->seq() =~ /PL$/, 1, "Incomplete codon is padded then translated"); # get a fresh tr to check the update method $tr = $ta->fetch_by_stable_id( "ENST00000217347" );