Skip to content
Snippets Groups Projects
Commit d322d934 authored by Matthew Laird's avatar Matthew Laird
Browse files

Fix for ENSCORESW-1816, bioperl's inconsistent handling of incomplete codons...

Fix for ENSCORESW-1816, bioperl's inconsistent handling of incomplete codons across versions. Trim any partial codons when translating unless told not to.
parent 54f99ebf
No related branches found
No related tags found
No related merge requests found
...@@ -331,7 +331,8 @@ sub translation { ...@@ -331,7 +331,8 @@ sub translation {
=head2 translate =head2 translate
Args : none Arg [1] : Boolean, emulate the behavior of old bioperl versions where
an incomplete final codon of 2 characters is padded and guessed
Function : Give a peptide translation of all exons currently in Function : Give a peptide translation of all exons currently in
the PT. Gives empty string when none is in. the PT. Gives empty string when none is in.
Returntype: a Bio::Seq as in transcript->translate() Returntype: a Bio::Seq as in transcript->translate()
...@@ -343,7 +344,7 @@ sub translation { ...@@ -343,7 +344,7 @@ sub translation {
sub translate { sub translate {
my ($self) = @_; my ($self, $complete_codon) = @_;
my $dna = $self->translateable_seq(); my $dna = $self->translateable_seq();
...@@ -358,6 +359,11 @@ sub translate { ...@@ -358,6 +359,11 @@ sub translate {
} }
$codon_table_id ||= 1; #default will be vertebrates $codon_table_id ||= 1; #default will be vertebrates
# Remove the final stop codon from the mrna
# sequence produced if it is present, this is so any peptide produced
# won't have a terminal stop codon
# if you want to have a terminal stop codon either comment this line out
# or call translatable seq directly and produce a translation from it
if( CORE::length( $dna ) % 3 == 0 ) { if( CORE::length( $dna ) % 3 == 0 ) {
# $dna =~ s/TAG$|TGA$|TAA$//i; # $dna =~ s/TAG$|TGA$|TAA$//i;
my $codon_table = Bio::Tools::CodonTable->new( -id => $codon_table_id ); my $codon_table = Bio::Tools::CodonTable->new( -id => $codon_table_id );
...@@ -365,12 +371,21 @@ sub translate { ...@@ -365,12 +371,21 @@ sub translate {
if ( $codon_table->is_ter_codon( substr( $dna, -3, 3 ) ) ) { if ( $codon_table->is_ter_codon( substr( $dna, -3, 3 ) ) ) {
substr( $dna, -3, 3, '' ); substr( $dna, -3, 3, '' );
} }
} elsif ( CORE::length($dna) % 3 == 2 ) {
# If we have a partial codon of 2 bp we need to decide if we
# trim it or not to fix some bad behaviour in older bioperl
# versions
if ( $complete_codon ) {
# If we want to do the bad behavior of bioperl 1.6.1 and older
# where we guess the last codon if inomplete, pad an N
# to the mrna sequence
$dna .= 'N';
} else {
# Otherwise trim those last two bp off so the behavior is
# consistent across bioperl versions
substr( $dna, -2, 2, '' );
}
} }
# the above line will remove the final stop codon from the mrna
# sequence produced if it is present, this is so any peptide produced
# won't have a terminal stop codon
# if you want to have a terminal stop codon either comment this line out
# or call translatable seq directly and produce a translation from it
my $bioseq = new Bio::Seq( -id => $self->display_id, my $bioseq = new Bio::Seq( -id => $self->display_id,
-seq => $dna, -seq => $dna,
......
...@@ -2092,7 +2092,8 @@ sub get_all_translateable_Exons { ...@@ -2092,7 +2092,8 @@ sub get_all_translateable_Exons {
=head2 translate =head2 translate
Args : none Arg [1] : Boolean, emulate the behavior of old bioperl versions where
an incomplete final codon of 2 characters is padded and guessed
Example : none Example : none
Description: Return the peptide (plus eventual stop codon) for Description: Return the peptide (plus eventual stop codon) for
this transcript. Does N-padding of non-phase this transcript. Does N-padding of non-phase
...@@ -2107,7 +2108,7 @@ sub get_all_translateable_Exons { ...@@ -2107,7 +2108,7 @@ sub get_all_translateable_Exons {
=cut =cut
sub translate { sub translate {
my ($self) = @_; my ($self, $complete_codon) = @_;
if ( !defined( $self->translation() ) ) { return undef } if ( !defined( $self->translation() ) ) { return undef }
...@@ -2152,6 +2153,20 @@ sub translate { ...@@ -2152,6 +2153,20 @@ sub translate {
if ( $codon_table->is_ter_codon( substr( $mrna, -3, 3 ) ) ) { if ( $codon_table->is_ter_codon( substr( $mrna, -3, 3 ) ) ) {
substr( $mrna, -3, 3, '' ); substr( $mrna, -3, 3, '' );
} }
} elsif ( CORE::length($mrna) % 3 == 2 ) {
# If we have a partial codon of 2 bp we need to decide if we
# trim it or not to fix some bad behaviour in older bioperl
# versions
if ( $complete_codon ) {
# If we want to do the bad behavior of bioperl 1.6.1 and older
# where we guess the last codon if inomplete, pad an N
# to the mrna sequence
$mrna .= 'N';
} else {
# Otherwise trim those last two bp off so the behavior is
# consistent across bioperl versions
substr( $mrna, -2, 2, '' );
}
} }
if ( CORE::length($mrna) < 1 ) { return undef } if ( CORE::length($mrna) < 1 ) { return undef }
......
...@@ -210,6 +210,12 @@ $tr->flush_Exons(); ...@@ -210,6 +210,12 @@ $tr->flush_Exons();
is( scalar( @{$tr->get_all_Exons()} ), 0, 'No exons left after flushing' ); is( scalar( @{$tr->get_all_Exons()} ), 0, 'No exons left after flushing' );
# Fetch a fresh tr, check incomplete codon behavior
$tr = $ta->fetch_by_stable_id( "ENST00000300425" );
# By default the incomplete codon should be dropped
is( $tr->translate()->seq() =~ /P$/, 1, "Incomplete codon is not translated");
is( $tr->translate(1)->seq() =~ /PL$/, 1, "Incomplete codon is padded then translated");
# get a fresh tr to check the update method # get a fresh tr to check the update method
$tr = $ta->fetch_by_stable_id( "ENST00000217347" ); $tr = $ta->fetch_by_stable_id( "ENST00000217347" );
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment