Skip to content
Snippets Groups Projects
Commit 95c3a80a authored by Kieron Taylor's avatar Kieron Taylor :angry:
Browse files

Attempt to speed up. Previous use of Algorithm::Diff was prohibitively slow,...

Attempt to speed up. Previous use of Algorithm::Diff was prohibitively slow, so attempting to use bitmasking on simple cases to go 10x faster on the easy cases.
parent 751902ad
No related branches found
No related tags found
No related merge requests found
...@@ -212,10 +212,46 @@ foreach my $chr ($support->sort_chromosomes) { ...@@ -212,10 +212,46 @@ foreach my $chr ($support->sort_chromosomes) {
$i++; $i++;
my @Ref = split(//,$R_seq);
my @Alt = split(//,$A_seq); if (length($R_seq) == length($A_seq)){
my @diffs = diff( \@Ref, \@Alt ); #my $diffs = ($R_seq ^ $A_seq) =~ tr/\0//c; # A concatenation of differences
$global_diff_bins[scalar(@diffs)]++; # this approach is x10 faster than relying on Algorithm::Diff, as long as there
# are no InDels, and the lengths are comparable.
my $mask = ($R_seq ^ $A_seq);
my @diffs = split (//,$mask);
my ($in_change,$change_start,$change_end);
for (my $x=0; $x<scalar(@diffs); $x++) {
if ($in_change) {
if ($diffs[$x] eq "\0") {
$in_change = 0;
$change_end = $x;
my $length = $change_end - $change_start + 1;
$global_diff_bins[$length]++;
}
else {
next;
}
} elsif ($diffs[$x] ne "\0") {
$in_change = 1;
$change_start = $x;
}
}
} else {
my @Ref = split(//,$R_seq);
my @Alt = split(//,$A_seq);
my @diffs = diff( \@Ref, \@Alt );
foreach (@diffs) {
my $length = 0;
foreach my $desc (@{$_}) {
if ($desc->[0] eq '+') {$length++;}
if ($desc->[0] eq '-') {$length--;}
};
$global_diff_bins[$length]++;
}
}
} }
$k++; $k++;
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment