Skip to content
Snippets Groups Projects
Commit 0f5f78d2 authored by Patrick Meidl's avatar Patrick Meidl
Browse files

script to compare scores from two stable ID mapping runs

parent ec74ab6a
No related branches found
No related tags found
No related merge requests found
#!/software/bin/perl
=head1 NAME
=head1 SYNOPSIS
.pl [arguments]
Required arguments:
--dumppath=PATH base directory of ID mapping results
Optional arguments:
--conffile, --conf=FILE read parameters from FILE
(default: conf/Conversion.ini)
--logfile, --log=FILE log to FILE (default: *STDOUT)
--logpath=PATH write logfile to PATH (default: .)
--logappend, --log_append append to logfile (default: truncate)
--loglevel=LEVEL define log level (default: INFO)
-i, --interactive=0|1 run script interactively (default: true)
-h, --help, -? print help (this message)
=head1 DESCRIPTION
=head1 LICENCE
This code is distributed under an Apache style licence. Please see
http://www.ensembl.org/info/about/code_licence.html for details.
=head1 AUTHOR
Patrick Meidl <meidl@ebi.ac.uk>, Ensembl core API team
=head1 CONTACT
Please post comments/questions to the Ensembl development list
<ensembl-dev@ebi.ac.uk>
=cut
use strict;
use warnings;
no warnings 'uninitialized';
use FindBin qw($Bin);
use Bio::EnsEMBL::Utils::ConfParser;
use Bio::EnsEMBL::Utils::Logger;
use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
# parse configuration and commandline arguments
my $conf = new Bio::EnsEMBL::Utils::ConfParser(
-SERVERROOT => "$Bin/../../../..",
-DEFAULT_CONF => "$Bin/default.conf"
);
$conf->parse_options(
'path1|p1=s' => 1,
'path2|p2=s' => 1,
'type|t=s@' => 0,
);
# set default logpath
unless ($conf->param('logpath')) {
$conf->param('logpath', path_append($conf->param('dumppath'), 'log'));
}
# get log filehandle and print heading and parameters to logfile
my $logger = new Bio::EnsEMBL::Utils::Logger(
-LOGFILE => $conf->param('logfile'),
-LOGAUTO => $conf->param('logauto'),
-LOGAUTOBASE => 'compare_scores',
-LOGAUTOID => $conf->param('logautoid'),
-LOGPATH => $conf->param('logpath'),
-LOGAPPEND => $conf->param('logappend'),
-LOGLEVEL => $conf->param('loglevel'),
);
# initialise log
$logger->init_log($conf->list_param_values);
my @types = $conf->param('type') || qw(exon transcript gene);
foreach my $type (@types) {
&compare_scores($type);
}
# finish logfile
$logger->finish_log;
### END main ###
sub compare_scores {
my $type = shift;
# read scores from file
$logger->info("Reading $type scores...\n", 0, 'stamped');
my $scores1 = &parse_file($conf->param('path1')."/${type}_scores.txt");
my $scores2 = &parse_file($conf->param('path2')."/${type}_scores.txt");
$logger->info("Done.\n\n", 0, 'stamped');
# look for pairs with scores in both result sets
my %stats;
my @both = ();
my @only1 = ();
my @only2 = ();
foreach my $key (keys %$scores1) {
if ($scores2->{$key}) {
push @both, $key;
} else {
push @only1, $key;
}
}
foreach my $key (keys %$scores2) {
unless ($scores1->{$key}) {
push @only2, $key;
}
}
$stats{'TOT1'} = keys %$scores1;
$stats{'TOT2'} = keys %$scores2;
$stats{'BOTH'} = @both;
$stats{'ONLY1'} = @only1;
$stats{'ONLY2'} = @only2;
$logger->info("Only in set 1 (first 10 shown):\n");
my $i;
foreach my $key (sort @only1) {
$logger->info(sprintf("%-20s%-10s\n", $key, $scores1->{$key}), 1);
last if ($i++ == 10);
}
$logger->info("\nOnly in set 2 (first 10 shown):\n");
my $j;
foreach my $key (sort @only2) {
$logger->info(sprintf("%-20s%-10s\n", $key, $scores2->{$key}), 1);
last if ($j++ == 10);
}
# compare scores which are present in both result sets
$logger->info("\nScores different (first 10 shown):\n");
foreach my $key (@both) {
my $s1 = $scores1->{$key};
my $s2 = $scores2->{$key};
my $diff = $s1 - $s2;
$diff = -$diff if ($diff < 0);
unless ($diff < 0.000002) {
$stats{'BOTH_DIFF'}++;
if ($stats{'BOTH_DIFF'} <= 10) {
$logger->info(sprintf("%-20s%-10s%-10s\n", $key, $s1, $s2), 1);
}
}
}
$logger->info("\nStats:\n");
foreach my $t (qw(TOT1 TOT2 BOTH ONLY1 ONLY2 BOTH_DIFF)) {
$logger->info(sprintf("%-10s%8d\n", $t, $stats{$t}), 1);
}
}
sub parse_file {
my $file = shift;
open(my $fh, '<', $file) or
throw("Unable to open $file for reading: $!");
my %scores = ();
while (my $line = <$fh>) {
chomp $line;
my ($old_id, $new_id, $score) = split(/\s+/, $line);
$scores{"$old_id:$new_id"} = sprintf("%.6f", $score);
}
close($fh);
return \%scores;
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment