From 07dbf49d6d4d6ecda8a482e57dcde76abba1644f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20Kusalananda=20K=C3=A4h=C3=A4ri?= <ak4@sanger.ac.uk> Date: Tue, 26 Aug 2003 16:36:21 +0000 Subject: [PATCH] Percent-identity-bug hopefully fixed. --- .../protein_match/new_pmatch_wrapper.pl | 58 +++++++++++-------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/misc-scripts/protein_match/new_pmatch_wrapper.pl b/misc-scripts/protein_match/new_pmatch_wrapper.pl index 174d933377..ff43613d56 100755 --- a/misc-scripts/protein_match/new_pmatch_wrapper.pl +++ b/misc-scripts/protein_match/new_pmatch_wrapper.pl @@ -127,10 +127,39 @@ while (defined(my $line = <PMATCH>)) { QEND => $qend, TSTART => $tstart, TEND => $tend }); + + push(@{ $hits{$qid}{$tid}{QRANGES} }, + new Bio::Range(-start => $qstart, -end => $qend)); + push(@{ $hits{$qid}{$tid}{TRANGES} }, + new Bio::Range(-start => $tstart, -end => $tend)); } close(PMATCH); +# Stitch Q and T ranges +foreach my $query (values(%hits)) { + foreach my $target (values(%{ $query })) { + foreach my $c ('Q', 'T') { + my @stitched; + foreach my $range (@{ $target->{$c . 'RANGES'} }) { + + my @new; + for (my $i = 0; $i < scalar @stitched; ++$i) { + if (defined ($stitched[$i]) && $range->overlaps($stitched[$i])) { + $range = $stitched[$i]->union($range); + } else { + push(@new, $stitched[$i]); + } + } + push(@new, $range); + @stitched = @new; + + } + } + } +} + + if (!$opts{'k'}) { unlink($pmatch_out); # Get rid of pmatch output file } else { @@ -146,36 +175,17 @@ foreach my $query (values(%hits)) { foreach my $c ('Q', 'T') { - my $overlap = 0; # Total query overlap length my $totlen = 0; # Total hit length - my @pair; - foreach my $hit ( - sort { $a->{$c . 'START'} <=> - $b->{$c . 'START'} } - @{ $target->{HITS} }) { - - $totlen += $hit->{$c . 'END'} - - $hit->{$c . 'START'} + 1; - - shift(@pair) if (scalar(@pair) == 2); - push(@pair, $hit); - next if (scalar(@pair) != 2); - - $r1->start($pair[0]{$c . 'START'}); - $r1->end($pair[0]{$c . 'END'}); - - $r2->start($pair[1]{$c . 'START'}); - $r2->end($pair[1]{$c . 'END'}); - - if ($r1->overlaps($r2)) { - $overlap += $r1->intersection($r2)->length; - } + foreach my $range (@{ $target->{$c . 'STITCH'} }) { + next unless defined $range; + $totlen += $range->length; } # Calculate the query and target identities $target->{$c . 'IDENT'} = - 100*($totlen - $overlap)/$target->{$c . 'LEN'}; + 100 * $totlen / $target->{$c . 'LEN'}; + } } } -- GitLab