conservation_statistics.pl 2.11 KB
Newer Older
1
#!/usr/bin/env perl
2
# Copyright [1999-2016] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
3 4 5 6 7 8 9 10 11 12 13 14 15
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#      http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

16 17 18 19 20 21 22 23 24 25 26 27 28 29 30

## Script used in conjunction with exon_conservation_check.pl. Requires
## Tie::IxHash for key ordering.

## RUN:
##     conservation_statistics.pl my_conservation.log

use strict;
use warnings;
use Tie::IxHash;

tie my %exon_states, 'Tie::IxHash', (
  '!!' => 'Protein Coding MisMatch',
  '%%' => 'Non-Coding MisMatch',
  '??' => 'Missed mapping',
31
  '&&' => 'Eval error'
32 33 34
);
tie my %transcript_states, 'Tie::IxHash', (
  '@@' => 'Protein Coding Protein MisMatch',
35
  ';;' => 'Protein Coding cDNA MisMatch',
36 37
  '**' => 'Non-Coding MisMatch',
  'XX' => 'Missed mapping',
38
  '^^' => 'Eval error'
39 40 41 42 43 44 45 46 47
);

my $file = $ARGV[0];
die "Cannot find $file" if ! -f $file;
my %states = map { $_, 0 } (keys %exon_states, keys %transcript_states);
my %totals;
open my $fh, '<', $file or die "Cannot open file '$file': $!";
while(my $line = <$fh>) {
  chomp $line;
48
  if($line =~ /(^[!%?&@;*X^]{2})/xms) {
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
    $states{$1}++;
    next;
  }
  if($line =~ /^Total ((?:exons|transcripts)) : (\d+)$/) {
    $totals{$1} = $2;
  }
}
close $fh;

my $format = "%s : %d (%.2f%%)\n";

print "Exon summary\n";
foreach my $key (keys %exon_states) {
  my $count = $states{$key};
  my $percentage = ($count / $totals{exons})*100;
  printf $format, $exon_states{$key}, $count, $percentage;
}

print "\nTranscript summary\n";
foreach my $key (keys %transcript_states) {
  my $count = $states{$key};
  my $percentage = ($count / $totals{transcripts})*100;
  printf $format, $transcript_states{$key}, $count, $percentage;
72
}