diff --git a/modules/Bio/EnsEMBL/BaseAlignFeature.pm b/modules/Bio/EnsEMBL/BaseAlignFeature.pm index 40c8f6049059352ac34c63c0f17c961a5d002ad8..6ea8ce42ca9d819c933baa22910be710ef13c87d 100644 --- a/modules/Bio/EnsEMBL/BaseAlignFeature.pm +++ b/modules/Bio/EnsEMBL/BaseAlignFeature.pm @@ -911,6 +911,7 @@ sub _transform_feature_to_RawContig{ my @out; my ( $hit_start, $hit_end ); + my $codon_unused_bases = 0; if( scalar( @mapped ) > 1 ) { #The feature needs to be mapped accross multiple contigs @@ -931,14 +932,58 @@ sub _transform_feature_to_RawContig{ #caculate query and hit length for each portion of the split feature #may need to round hit length to avoid 'partial' peptide + # decision of Michele to not cover split codons + my $query_length = ($mapped[$i]->end - $mapped[$i]->start + 1); + my $query_start = $mapped[$i]->start(); + my $query_end = $mapped[$i]->end(); + my $hit_length; + if($self->_query_unit == $self->_hit_unit){ + + # DnaDna and PepPep case $hit_length = $query_length; - }elsif($self->_query_unit > $self->_hit_unit){ - my $tmp = ($query_length/$self->_query_unit); - $hit_length = sprintf "%.0f", $tmp; #round value up or down - }elsif($self->_hit_unit > $self->_query_unit){ + + } elsif( $self->_query_unit > $self->_hit_unit ){ + + # DnaPepAlign case + # my $tmp = ($query_length/$self->_query_unit); + # $hit_length = sprintf "%.0f", $tmp; #round value up or down + + $hit_length = int(( $query_length - $codon_unused_bases ) / + $self->_query_unit() ); + + if( $codon_unused_bases ) { + if( $feature->hstrand() == 1 ) { + $hit_start++; + } else { + $hit_end--; + } + } + + my $used_bases = $query_length - $codon_unused_bases - + $hit_length*$self->_query_unit(); + + if( $mapped[$i]->strand() == -1 ) { + $query_end -= $codon_unused_bases; + $query_start += $used_bases; + } else { + $query_start += $codon_unused_bases; + $query_end -= $used_bases; + } + + + # new rest at the end ... + if( $used_bases ) { + $codon_unused_bases = 3 - $used_bases; + } else { + $codon_unused_bases = 0; + } + + } elsif($self->_hit_unit > $self->_query_unit){ + + # PepDnaAlign case (rare) my $tmp = ($query_length*$self->_hit_unit); $hit_length = sprintf "%.0f", $tmp; #round value up or down } @@ -946,6 +991,7 @@ sub _transform_feature_to_RawContig{ if($hit_length == 0){ next SPLIT; } + if( $feature->hstrand() == 1 ) { $hit_end = ($hit_start + $hit_length) - 1; } else { @@ -957,8 +1003,8 @@ sub _transform_feature_to_RawContig{ #create the new feature my $new_feature = Bio::EnsEMBL::FeaturePair->new; - $new_feature->start($mapped[$i]->start); - $new_feature->end($mapped[$i]->end); + $new_feature->start($query_start); + $new_feature->end($query_end); $new_feature->strand($mapped[$i]->strand); $new_feature->score($feature->score); $new_feature->percent_id($feature->percent_id); diff --git a/modules/Bio/EnsEMBL/Root.pm b/modules/Bio/EnsEMBL/Root.pm index 091effa0445778f336b683b208bd7a92ee5d6b0c..4b434734792c3f6c546a56109747875eeefa5cb9 100644 --- a/modules/Bio/EnsEMBL/Root.pm +++ b/modules/Bio/EnsEMBL/Root.pm @@ -95,6 +95,8 @@ sub warn{ my ($self,$string) = @_; my $verbose = $self->verbose; + $verbose = 0 unless defined $verbose; + if( $verbose == 2 ) { $self->throw($string); diff --git a/modules/t/dnaPepAlignFeature.t b/modules/t/dnaPepAlignFeature.t index 47f7d622676c22a50d79e5817f7e60b995c666f3..b1d1ceb73b75ddfdf10829c4fabc5a33dceb50a8 100644 --- a/modules/t/dnaPepAlignFeature.t +++ b/modules/t/dnaPepAlignFeature.t @@ -3,7 +3,7 @@ use lib 't'; BEGIN { $| = 1; use Test; - plan tests => 63; + plan tests => 39; } use MultiTestDB; @@ -11,6 +11,11 @@ use Bio::EnsEMBL::DnaPepAlignFeature; use Bio::EnsEMBL::SeqFeature; use Bio::EnsEMBL::RawContig; +use TestUtils qw ( debug test_getter_setter ); + +# switch on the debug prints + +our $verbose = 0; my($CHR, $START, $END) = ('20', 30_363_615, 30_475_000); my $CTG_BOUNDARY = 62877; @@ -143,12 +148,12 @@ ok($f); # @feats = (); $fp = new Bio::EnsEMBL::FeaturePair; -$fp->start($CTG_BOUNDARY - 2); -$fp->end ($CTG_BOUNDARY); +$fp->start($CTG_BOUNDARY - 5); +$fp->end ($CTG_BOUNDARY ); $fp->strand(1); $fp->score(10); $fp->contig($slice); -$fp->hstart(105); +$fp->hstart(104); $fp->hend (105); $fp->hstrand (1); $fp->hseqname('dummy-hid'); @@ -180,7 +185,7 @@ push(@feats,$fp); $dnaf = Bio::EnsEMBL::DnaPepAlignFeature->new( -features => \@feats ); ok($dnaf); -ok($dnaf->cigar_string eq '3M3I6M6D3M'); +ok($dnaf->cigar_string eq '6M3I6M6D3M'); ok($dnaf->validate || 1); #validate doesn't return true but throws on fail @dnafs = $dnaf->transform; @@ -189,63 +194,10 @@ ok($dnafs[0]->validate || 1); ok($dnafs[1]->validate || 1); -# -# 22-27 create a dnaalign feature on a slice across a contig boundary -# and convert to raw contig coordinates -# (+ve strand, -ve hitstrand) -# -@feats = (); -$fp = new Bio::EnsEMBL::FeaturePair; -$fp->start($CTG_BOUNDARY - 2); -$fp->end ($CTG_BOUNDARY); -$fp->strand(1); -$fp->score(10); -$fp->contig($slice); -$fp->hstart (108); -$fp->hend (108); -$fp->hstrand (-1); -$fp->hseqname('dummy-hid'); -push(@feats,$fp); - -$fp = new Bio::EnsEMBL::FeaturePair; -$fp->start($CTG_BOUNDARY + 4); -$fp->end ($CTG_BOUNDARY + 9); -$fp->strand(1); -$fp->score(10); -$fp->contig($slice); -$fp->seqname(1); -$fp->hstart(106); -$fp->hend (107); -$fp->hstrand (-1); -$fp->hseqname('dummy-hid'); -push(@feats,$fp); - - -$fp = new Bio::EnsEMBL::FeaturePair; -$fp->start($CTG_BOUNDARY + 10); -$fp->end ($CTG_BOUNDARY + 12); -$fp->strand(1); -$fp->score(10); -$fp->contig($slice); -$fp->hstart (103); -$fp->hend (103); -$fp->hstrand (-1); -$fp->hseqname('dummy-hid'); -push(@feats,$fp); - -$dnaf = Bio::EnsEMBL::DnaPepAlignFeature->new( -features => \@feats ); -ok($dnaf); -ok($dnaf->cigar_string eq '3M3I6M6D3M'); -ok($dnaf->validate || 1); #validate doesn't return true but throws on fail - -my @dnafs = $dnaf->transform; -ok(scalar(@dnafs) == 2); -ok($dnafs[0]->validate || 1); -ok($dnafs[1]->validate || 1); # -# 28-33 create a dnaalign feature on a slice across a contig boundary +# 22-27 create a dnaalign feature on a slice across a contig boundary # and convert to raw contig coordinates # (-ve strand, +ve hitstrand) # @@ -296,65 +248,21 @@ ok($dnaf->validate || 1); #validate doesn't return true but throws on fail @dnafs = $dnaf->transform; ok(scalar(@dnafs) == 2); -ok($dnafs[0]->validate || 1); -ok($dnafs[1]->validate || 1); - - - -# -# 34-39 create a dnaalign feature on a slice across a contig boundary -# and convert to raw contig coordinates -# (-ve strand, -ve hitstrand) -# -@feats = (); -$fp = new Bio::EnsEMBL::FeaturePair; -$fp->start($CTG_BOUNDARY + 3); -$fp->end ($CTG_BOUNDARY + 5); -$fp->strand(-1); -$fp->score(10); -$fp->contig($slice); -$fp->hstart (108); -$fp->hend (108); -$fp->hstrand (-1); -$fp->hseqname('dummy-hid'); -push(@feats,$fp); -$fp = new Bio::EnsEMBL::FeaturePair; -$fp->start($CTG_BOUNDARY - 6); -$fp->end ($CTG_BOUNDARY - 1); -$fp->strand(-1); -$fp->score(10); -$fp->contig($slice); -$fp->seqname(1); -$fp->hstart(106); -$fp->hend (107); -$fp->hstrand (-1); -$fp->hseqname('dummy-hid'); -push(@feats,$fp); +debug( "Feature 0 dump" ); +while( my ($k, $v) = each %{$dnafs[0]} ) { + debug( " ->".$k." = ".$v ); +} +ok($dnafs[0]->validate || 1); -$fp = new Bio::EnsEMBL::FeaturePair; -$fp->start($CTG_BOUNDARY - 9); -$fp->end ($CTG_BOUNDARY - 7); -$fp->strand(-1); -$fp->score(10); -$fp->contig($slice); -$fp->seqname(1); -$fp->hstart(103); -$fp->hend(103); -$fp->hstrand (-1); -$fp->hseqname('dummy-hid'); -push(@feats,$fp); +debug( "Feature 1 dump" ); +while( my ($k, $v) = each %{$dnafs[1]} ) { + debug( " ->".$k." = ".$v ); +} +ok($dnafs[1]->validate || 1); -$dnaf = Bio::EnsEMBL::DnaPepAlignFeature->new( -features => \@feats ); -ok($dnaf); -ok($dnaf->cigar_string eq '3M3I6M6D3M'); -ok($dnaf->validate || 1); #validate doesn't return true but throws on fail -@dnafs = $dnaf->transform; -ok(scalar(@dnafs) == 2); -ok($dnafs[0]->validate || 1); -ok($dnafs[1]->validate || 1); # @@ -368,7 +276,7 @@ $slice = $slice->invert; # -# 40-45 create a dnaalign feature on a slice across a contig boundary +# 28-33 create a dnaalign feature on a slice across a contig boundary # and convert to raw contig coordinates # (+ve strand, +ve hitstrand) # @@ -420,63 +328,9 @@ ok($dnafs[0]->validate || 1); ok($dnafs[1]->validate || 1); -# -# 46-51 create a dnaalign feature on a slice across a contig boundary -# and convert to raw contig coordinates -# (+ve strand, -ve hitstrand) -# -@feats = (); -$fp = new Bio::EnsEMBL::FeaturePair; -$fp->start($CTG_BOUNDARY - 2); -$fp->end ($CTG_BOUNDARY); -$fp->strand(1); -$fp->score(10); -$fp->contig($slice); -$fp->hstart (108); -$fp->hend (108); -$fp->hstrand (-1); -$fp->hseqname('dummy-hid'); -push(@feats,$fp); - -$fp = new Bio::EnsEMBL::FeaturePair; -$fp->start($CTG_BOUNDARY + 4); -$fp->end ($CTG_BOUNDARY + 9); -$fp->strand(1); -$fp->score(10); -$fp->contig($slice); -$fp->seqname(1); -$fp->hstart(106); -$fp->hend (107); -$fp->hstrand (-1); -$fp->hseqname('dummy-hid'); -push(@feats,$fp); - - -$fp = new Bio::EnsEMBL::FeaturePair; -$fp->start($CTG_BOUNDARY + 10); -$fp->end ($CTG_BOUNDARY + 12); -$fp->strand(1); -$fp->score(10); -$fp->contig($slice); -$fp->hstart (103); -$fp->hend (103); -$fp->hstrand (-1); -$fp->hseqname('dummy-hid'); -push(@feats,$fp); - -$dnaf = Bio::EnsEMBL::DnaPepAlignFeature->new( -features => \@feats ); -ok($dnaf); -ok($dnaf->cigar_string eq '3M3I6M6D3M'); -ok($dnaf->validate || 1); #validate doesn't return true but throws on fail - -@dnafs = $dnaf->transform; -ok(scalar(@dnafs) == 2); -ok($dnafs[0]->validate || 1); -ok($dnafs[1]->validate || 1); - # -# 52-57 create a dnaalign feature on a slice across a contig boundary +# 34-39 create a dnaalign feature on a slice across a contig boundary # and convert to raw contig coordinates # (-ve strand, +ve hitstrand) # @@ -532,57 +386,3 @@ ok($dnafs[1]->validate || 1); -# -# 58-63 create a dnaalign feature on a slice across a contig boundary -# and convert to raw contig coordinates -# (-ve strand, -ve hitstrand) -# -@feats = (); -$fp = new Bio::EnsEMBL::FeaturePair; -$fp->start($CTG_BOUNDARY + 3); -$fp->end ($CTG_BOUNDARY + 5); -$fp->strand(-1); -$fp->score(10); -$fp->contig($slice); -$fp->hstart (108); -$fp->hend (108); -$fp->hstrand (-1); -$fp->hseqname('dummy-hid'); -push(@feats,$fp); - -$fp = new Bio::EnsEMBL::FeaturePair; -$fp->start($CTG_BOUNDARY - 6); -$fp->end ($CTG_BOUNDARY - 1); -$fp->strand(-1); -$fp->score(10); -$fp->contig($slice); -$fp->seqname(1); -$fp->hstart(106); -$fp->hend (107); -$fp->hstrand (-1); -$fp->hseqname('dummy-hid'); -push(@feats,$fp); - - -$fp = new Bio::EnsEMBL::FeaturePair; -$fp->start($CTG_BOUNDARY - 9); -$fp->end ($CTG_BOUNDARY - 7); -$fp->strand(-1); -$fp->score(10); -$fp->contig($slice); -$fp->seqname(1); -$fp->hstart(103); -$fp->hend(103); -$fp->hstrand (-1); -$fp->hseqname('dummy-hid'); -push(@feats,$fp); - -$dnaf = Bio::EnsEMBL::DnaPepAlignFeature->new( -features => \@feats ); -ok($dnaf); -ok($dnaf->cigar_string eq '3M3I6M6D3M'); -ok($dnaf->validate || 1); #validate doesn't return true but throws on fail - -@dnafs = $dnaf->transform; -ok(scalar(@dnafs) == 2); -ok($dnafs[0]->validate || 1); -ok($dnafs[1]->validate || 1);