Commit dadd5b12 authored by Andreas Kusalananda Kähäri's avatar Andreas Kusalananda Kähäri
Browse files

Reformat for readability.

parent 6092c0c5
=head1 LICENSE
Copyright (c) 1999-2011 The European Bioinformatics Institute and
......@@ -41,41 +42,43 @@ use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
use Bio::EnsEMBL::IdMapping::MappingList;
# scores are considered the same if (2.0 * (s1-s2))/(s1 + s2) < this
use constant SIMILAR_SCORE_RATIO => 0.01;
#
# find the highest unambiguous score for all sources and targets in a scoring
# matrix
#
sub basic_mapping {
my $self = shift;
my $matrix = shift;
my $self = shift;
my $matrix = shift;
my $mapping_name = shift;
# argument checks
unless ($matrix and
$matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
unless ($matrix
and $matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix') )
{
throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
}
throw('Need a name for serialising the mapping.') unless ($mapping_name);
# Create a new MappingList object. Specify AUTO_LOAD to load serialised
# existing mappings if found
my $dump_path = path_append($self->conf->param('basedir'), 'mapping');
my $mappings = Bio::EnsEMBL::IdMapping::MappingList->new(
-DUMP_PATH => $dump_path,
-CACHE_FILE => "${mapping_name}.ser",
-AUTO_LOAD => 1,
);
throw('Need a name for serialising the mapping.')
unless ($mapping_name);
# Create a new MappingList object. Specify AUTO_LOAD to load
# serialised existing mappings if found
my $dump_path =
path_append( $self->conf->param('basedir'), 'mapping' );
my $mappings =
Bio::EnsEMBL::IdMapping::MappingList->new(
-DUMP_PATH => $dump_path,
-CACHE_FILE => "${mapping_name}.ser",
-AUTO_LOAD => 1, );
# checkpoint test: return a previously stored MappingList
if ($mappings->loaded) {
$self->logger->info("Read existing mappings from ${mapping_name}.ser.\n");
if ( $mappings->loaded ) {
$self->logger->info(
"Read existing mappings from ${mapping_name}.ser.\n");
return $mappings;
}
......@@ -83,169 +86,187 @@ sub basic_mapping {
my $targets_done = {};
# sort scoring matrix entries by descending score
my @sorted_entries = sort { $b->score <=> $a->score }
@{ $matrix->get_all_Entries };
my @sorted_entries =
sort { $b->score <=> $a->score } @{ $matrix->get_all_Entries };
# debug
#my $idx = substr($mapping_name, -1);
while (my $entry = shift(@sorted_entries)) {
while ( my $entry = shift(@sorted_entries) ) {
#$self->logger->debug("\nxxx$idx ".$entry->to_string." ");
# we already found a mapping for either source or target
next if ($sources_done->{$entry->source} or
$targets_done->{$entry->target});
next
if ( $sources_done->{ $entry->source }
or $targets_done->{ $entry->target } );
#$self->logger->debug('d');
# there's a better mapping for either source or target
next if ($self->higher_score_exists($entry, $matrix, $sources_done,
$targets_done));
next
if ( $self->higher_score_exists(
$entry, $matrix, $sources_done, $targets_done
) );
#$self->logger->debug('h');
# check for ambiguous mappings; they are dealt with later
my $other_sources = [];
my $other_targets = [];
if ($self->ambiguous_mapping($entry, $matrix, $other_sources, $other_targets)) {
if ( $self->ambiguous_mapping( $entry, $matrix,
$other_sources, $other_targets ) )
{
#$self->logger->debug('a');
$other_sources = $self->filter_sources($other_sources, $sources_done);
$other_targets = $self->filter_targets($other_targets, $targets_done);
next if (scalar(@$other_sources) or scalar(@$other_targets));
$other_sources =
$self->filter_sources( $other_sources, $sources_done );
$other_targets =
$self->filter_targets( $other_targets, $targets_done );
next if ( scalar(@$other_sources) or scalar(@$other_targets) );
}
#$self->logger->debug('A');
# this is the best mapping, add it
$mappings->add_Entry($entry);
$sources_done->{$entry->source} = 1;
$targets_done->{$entry->target} = 1;
}
$sources_done->{ $entry->source } = 1;
$targets_done->{ $entry->target } = 1;
} ## end while ( my $entry = shift...)
# create checkpoint
$mappings->write_to_file;
return $mappings;
}
} ## end sub basic_mapping
sub higher_score_exists {
my ($self, $entry, $matrix, $sources_done, $targets_done) = @_;
my ( $self, $entry, $matrix, $sources_done, $targets_done ) = @_;
my $source = $entry->source;
my $target = $entry->target;
my $score = $entry->score;
foreach my $other_source (@{ $matrix->get_sources_for_target($target) }) {
if ($other_source != $source and !$sources_done->{$other_source} and
$score < $matrix->get_score($other_source, $target)) {
return 1;
my $score = $entry->score;
foreach
my $other_source ( @{ $matrix->get_sources_for_target($target) } )
{
if ( $other_source != $source
and !$sources_done->{$other_source}
and $score < $matrix->get_score( $other_source, $target ) )
{
return 1;
}
}
foreach my $other_target (@{ $matrix->get_targets_for_source($source) }) {
if ($other_target != $target and !$targets_done->{$other_target} and
$score < $matrix->get_score($source, $other_target)) {
return 1;
foreach
my $other_target ( @{ $matrix->get_targets_for_source($source) } )
{
if ( $other_target != $target
and !$targets_done->{$other_target}
and $score < $matrix->get_score( $source, $other_target ) )
{
return 1;
}
}
return 0;
}
} ## end sub higher_score_exists
#
# find ambiguous mappings (see scores_similar() for definition)
#
sub ambiguous_mapping {
my ($self, $entry, $matrix, $other_sources, $other_targets) = @_;
my ( $self, $entry, $matrix, $other_sources, $other_targets ) = @_;
my $source = $entry->source;
my $target = $entry->target;
my $score = $entry->score;
my $score = $entry->score;
my $retval = 0;
foreach my $other_source (@{ $matrix->get_sources_for_target($target) }) {
my $other_score = $matrix->get_score($other_source, $target);
if ($other_source != $source and
($self->scores_similar($score, $other_score) or $score < $other_score)) {
$retval = 1;
push @{ $other_sources }, $other_source;
foreach
my $other_source ( @{ $matrix->get_sources_for_target($target) } )
{
my $other_score = $matrix->get_score( $other_source, $target );
if ( $other_source != $source
and ( $self->scores_similar( $score, $other_score )
or $score < $other_score ) )
{
$retval = 1;
push @{$other_sources}, $other_source;
}
}
foreach my $other_target (@{ $matrix->get_targets_for_source($source) }) {
my $other_score = $matrix->get_score($source, $other_target);
if ($other_target != $target and
($self->scores_similar($score, $other_score) or $score < $other_score)) {
$retval = 1;
push @{ $other_targets }, $other_target;
foreach
my $other_target ( @{ $matrix->get_targets_for_source($source) } )
{
my $other_score = $matrix->get_score( $source, $other_target );
if ( $other_target != $target
and ( $self->scores_similar( $score, $other_score )
or $score < $other_score ) )
{
$retval = 1;
push @{$other_targets}, $other_target;
}
}
return $retval;
}
} ## end sub ambiguous_mapping
#
#
# rule for similarity taken from java code...
#
sub scores_similar {
my ($self, $s1, $s2) = @_;
my ( $self, $s1, $s2 ) = @_;
# always give priority to exact matches over very similar ones
return 0 if ($s1 == 1 and $s2 < 1);
my $diff = $s1 -$s2;
$diff = -$diff if ($diff < 0);
my $pc = 2 * $diff / ($s1 + $s2);
return ($pc < SIMILAR_SCORE_RATIO);
}
return 0 if ( $s1 == 1 and $s2 < 1 );
my $diff = $s1 - $s2;
$diff = -$diff if ( $diff < 0 );
my $pc = 2*$diff/( $s1 + $s2 );
return ( $pc < SIMILAR_SCORE_RATIO );
}
sub filter_sources {
my ($self, $other_sources, $sources_done) = @_;
my ( $self, $other_sources, $sources_done ) = @_;
unless (scalar(@$other_sources) and scalar(keys %$sources_done)) {
unless ( scalar(@$other_sources) and scalar( keys %$sources_done ) ) {
return $other_sources;
}
my @tmp = ();
foreach my $e (@{ $other_sources }) {
push @tmp, $e unless ($sources_done->{$e});
foreach my $e ( @{$other_sources} ) {
push @tmp, $e unless ( $sources_done->{$e} );
}
return \@tmp;
}
sub filter_targets {
my ($self, $other_targets, $targets_done) = @_;
my ( $self, $other_targets, $targets_done ) = @_;
unless (scalar(@{ $other_targets }) and scalar(keys %$targets_done)) {
unless ( scalar( @{$other_targets} )
and scalar( keys %$targets_done ) )
{
return $other_targets;
}
my @tmp = ();
foreach my $e (@{ $other_targets }) {
push @tmp, $e unless ($targets_done->{$e});
foreach my $e ( @{$other_targets} ) {
push @tmp, $e unless ( $targets_done->{$e} );
}
return \@tmp;
}
1;
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment