Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
ensembl-gh-mirror
ensembl
Commits
dadd5b12
Commit
dadd5b12
authored
Oct 18, 2011
by
Andreas Kusalananda Kähäri
Browse files
Reformat for readability.
parent
6092c0c5
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
117 additions
and
96 deletions
+117
-96
modules/Bio/EnsEMBL/IdMapping/InternalIdMapper/BaseMapper.pm
modules/Bio/EnsEMBL/IdMapping/InternalIdMapper/BaseMapper.pm
+117
-96
No files found.
modules/Bio/EnsEMBL/IdMapping/InternalIdMapper/BaseMapper.pm
View file @
dadd5b12
=head1 LICENSE
Copyright (c) 1999-2011 The European Bioinformatics Institute and
...
...
@@ -41,41 +42,43 @@ use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use
Bio::EnsEMBL::Utils::
ScriptUtils
qw(path_append)
;
use
Bio::EnsEMBL::IdMapping::
MappingList
;
# scores are considered the same if (2.0 * (s1-s2))/(s1 + s2) < this
use
constant
SIMILAR_SCORE_RATIO
=>
0.01
;
#
# find the highest unambiguous score for all sources and targets in a scoring
# matrix
#
sub
basic_mapping
{
my
$self
=
shift
;
my
$matrix
=
shift
;
my
$self
=
shift
;
my
$matrix
=
shift
;
my
$mapping_name
=
shift
;
# argument checks
unless
(
$matrix
and
$matrix
->
isa
('
Bio::EnsEMBL::IdMapping::ScoredMappingMatrix
'))
{
unless
(
$matrix
and
$matrix
->
isa
('
Bio::EnsEMBL::IdMapping::ScoredMappingMatrix
')
)
{
throw
('
Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.
');
}
throw
('
Need a name for serialising the mapping.
')
unless
(
$mapping_name
);
# Create a new MappingList object. Specify AUTO_LOAD to load serialised
# existing mappings if found
my
$dump_path
=
path_append
(
$self
->
conf
->
param
('
basedir
'),
'
mapping
');
my
$mappings
=
Bio::EnsEMBL::IdMapping::
MappingList
->
new
(
-
DUMP_PATH
=>
$dump_path
,
-
CACHE_FILE
=>
"
${mapping_name}
.ser
",
-
AUTO_LOAD
=>
1
,
);
throw
('
Need a name for serialising the mapping.
')
unless
(
$mapping_name
);
# Create a new MappingList object. Specify AUTO_LOAD to load
# serialised existing mappings if found
my
$dump_path
=
path_append
(
$self
->
conf
->
param
('
basedir
'),
'
mapping
'
);
my
$mappings
=
Bio::EnsEMBL::IdMapping::
MappingList
->
new
(
-
DUMP_PATH
=>
$dump_path
,
-
CACHE_FILE
=>
"
${mapping_name}
.ser
",
-
AUTO_LOAD
=>
1
,
);
# checkpoint test: return a previously stored MappingList
if
(
$mappings
->
loaded
)
{
$self
->
logger
->
info
("
Read existing mappings from
${mapping_name}
.ser.
\n
");
if
(
$mappings
->
loaded
)
{
$self
->
logger
->
info
(
"
Read existing mappings from
${mapping_name}
.ser.
\n
");
return
$mappings
;
}
...
...
@@ -83,169 +86,187 @@ sub basic_mapping {
my
$targets_done
=
{};
# sort scoring matrix entries by descending score
my
@sorted_entries
=
sort
{
$b
->
score
<=>
$a
->
score
}
@
{
$matrix
->
get_all_Entries
};
my
@sorted_entries
=
sort
{
$b
->
score
<=>
$a
->
score
}
@
{
$matrix
->
get_all_Entries
};
# debug
#my $idx = substr($mapping_name, -1);
while
(
my
$entry
=
shift
(
@sorted_entries
))
{
while
(
my
$entry
=
shift
(
@sorted_entries
)
)
{
#$self->logger->debug("\nxxx$idx ".$entry->to_string." ");
# we already found a mapping for either source or target
next
if
(
$sources_done
->
{
$entry
->
source
}
or
$targets_done
->
{
$entry
->
target
});
next
if
(
$sources_done
->
{
$entry
->
source
}
or
$targets_done
->
{
$entry
->
target
}
);
#$self->logger->debug('d');
# there's a better mapping for either source or target
next
if
(
$self
->
higher_score_exists
(
$entry
,
$matrix
,
$sources_done
,
$targets_done
));
next
if
(
$self
->
higher_score_exists
(
$entry
,
$matrix
,
$sources_done
,
$targets_done
)
);
#$self->logger->debug('h');
# check for ambiguous mappings; they are dealt with later
my
$other_sources
=
[]
;
my
$other_targets
=
[]
;
if
(
$self
->
ambiguous_mapping
(
$entry
,
$matrix
,
$other_sources
,
$other_targets
))
{
if
(
$self
->
ambiguous_mapping
(
$entry
,
$matrix
,
$other_sources
,
$other_targets
)
)
{
#$self->logger->debug('a');
$other_sources
=
$self
->
filter_sources
(
$other_sources
,
$sources_done
);
$other_targets
=
$self
->
filter_targets
(
$other_targets
,
$targets_done
);
next
if
(
scalar
(
@$other_sources
)
or
scalar
(
@$other_targets
));
$other_sources
=
$self
->
filter_sources
(
$other_sources
,
$sources_done
);
$other_targets
=
$self
->
filter_targets
(
$other_targets
,
$targets_done
);
next
if
(
scalar
(
@$other_sources
)
or
scalar
(
@$other_targets
)
);
}
#$self->logger->debug('A');
# this is the best mapping, add it
$mappings
->
add_Entry
(
$entry
);
$sources_done
->
{
$entry
->
source
}
=
1
;
$targets_done
->
{
$entry
->
target
}
=
1
;
}
$sources_done
->
{
$entry
->
source
}
=
1
;
$targets_done
->
{
$entry
->
target
}
=
1
;
}
## end while ( my $entry = shift...)
# create checkpoint
$mappings
->
write_to_file
;
return
$mappings
;
}
}
## end sub basic_mapping
sub
higher_score_exists
{
my
(
$self
,
$entry
,
$matrix
,
$sources_done
,
$targets_done
)
=
@_
;
my
(
$self
,
$entry
,
$matrix
,
$sources_done
,
$targets_done
)
=
@_
;
my
$source
=
$entry
->
source
;
my
$target
=
$entry
->
target
;
my
$score
=
$entry
->
score
;
foreach
my
$other_source
(
@
{
$matrix
->
get_sources_for_target
(
$target
)
})
{
if
(
$other_source
!=
$source
and
!
$sources_done
->
{
$other_source
}
and
$score
<
$matrix
->
get_score
(
$other_source
,
$target
))
{
return
1
;
my
$score
=
$entry
->
score
;
foreach
my
$other_source
(
@
{
$matrix
->
get_sources_for_target
(
$target
)
}
)
{
if
(
$other_source
!=
$source
and
!
$sources_done
->
{
$other_source
}
and
$score
<
$matrix
->
get_score
(
$other_source
,
$target
)
)
{
return
1
;
}
}
foreach
my
$other_target
(
@
{
$matrix
->
get_targets_for_source
(
$source
)
})
{
if
(
$other_target
!=
$target
and
!
$targets_done
->
{
$other_target
}
and
$score
<
$matrix
->
get_score
(
$source
,
$other_target
))
{
return
1
;
foreach
my
$other_target
(
@
{
$matrix
->
get_targets_for_source
(
$source
)
}
)
{
if
(
$other_target
!=
$target
and
!
$targets_done
->
{
$other_target
}
and
$score
<
$matrix
->
get_score
(
$source
,
$other_target
)
)
{
return
1
;
}
}
return
0
;
}
}
## end sub higher_score_exists
#
# find ambiguous mappings (see scores_similar() for definition)
#
sub
ambiguous_mapping
{
my
(
$self
,
$entry
,
$matrix
,
$other_sources
,
$other_targets
)
=
@_
;
my
(
$self
,
$entry
,
$matrix
,
$other_sources
,
$other_targets
)
=
@_
;
my
$source
=
$entry
->
source
;
my
$target
=
$entry
->
target
;
my
$score
=
$entry
->
score
;
my
$score
=
$entry
->
score
;
my
$retval
=
0
;
foreach
my
$other_source
(
@
{
$matrix
->
get_sources_for_target
(
$target
)
})
{
my
$other_score
=
$matrix
->
get_score
(
$other_source
,
$target
);
if
(
$other_source
!=
$source
and
(
$self
->
scores_similar
(
$score
,
$other_score
)
or
$score
<
$other_score
))
{
$retval
=
1
;
push
@
{
$other_sources
},
$other_source
;
foreach
my
$other_source
(
@
{
$matrix
->
get_sources_for_target
(
$target
)
}
)
{
my
$other_score
=
$matrix
->
get_score
(
$other_source
,
$target
);
if
(
$other_source
!=
$source
and
(
$self
->
scores_similar
(
$score
,
$other_score
)
or
$score
<
$other_score
)
)
{
$retval
=
1
;
push
@
{
$other_sources
},
$other_source
;
}
}
foreach
my
$other_target
(
@
{
$matrix
->
get_targets_for_source
(
$source
)
})
{
my
$other_score
=
$matrix
->
get_score
(
$source
,
$other_target
);
if
(
$other_target
!=
$target
and
(
$self
->
scores_similar
(
$score
,
$other_score
)
or
$score
<
$other_score
))
{
$retval
=
1
;
push
@
{
$other_targets
},
$other_target
;
foreach
my
$other_target
(
@
{
$matrix
->
get_targets_for_source
(
$source
)
}
)
{
my
$other_score
=
$matrix
->
get_score
(
$source
,
$other_target
);
if
(
$other_target
!=
$target
and
(
$self
->
scores_similar
(
$score
,
$other_score
)
or
$score
<
$other_score
)
)
{
$retval
=
1
;
push
@
{
$other_targets
},
$other_target
;
}
}
return
$retval
;
}
}
## end sub ambiguous_mapping
#
#
# rule for similarity taken from java code...
#
sub
scores_similar
{
my
(
$self
,
$s1
,
$s2
)
=
@_
;
my
(
$self
,
$s1
,
$s2
)
=
@_
;
# always give priority to exact matches over very similar ones
return
0
if
(
$s1
==
1
and
$s2
<
1
);
my
$diff
=
$s1
-
$s2
;
$diff
=
-
$diff
if
(
$diff
<
0
);
my
$pc
=
2
*
$diff
/
(
$s1
+
$s2
);
return
(
$pc
<
SIMILAR_SCORE_RATIO
);
}
return
0
if
(
$s1
==
1
and
$s2
<
1
);
my
$diff
=
$s1
-
$s2
;
$diff
=
-
$diff
if
(
$diff
<
0
);
my
$pc
=
2
*$diff
/
(
$s1
+
$s2
);
return
(
$pc
<
SIMILAR_SCORE_RATIO
);
}
sub
filter_sources
{
my
(
$self
,
$other_sources
,
$sources_done
)
=
@_
;
my
(
$self
,
$other_sources
,
$sources_done
)
=
@_
;
unless
(
scalar
(
@$other_sources
)
and
scalar
(
keys
%$sources_done
)
)
{
unless
(
scalar
(
@$other_sources
)
and
scalar
(
keys
%$sources_done
)
)
{
return
$other_sources
;
}
my
@tmp
=
();
foreach
my
$e
(
@
{
$other_sources
})
{
push
@tmp
,
$e
unless
(
$sources_done
->
{
$e
});
foreach
my
$e
(
@
{
$other_sources
}
)
{
push
@tmp
,
$e
unless
(
$sources_done
->
{
$e
}
);
}
return
\
@tmp
;
}
sub
filter_targets
{
my
(
$self
,
$other_targets
,
$targets_done
)
=
@_
;
my
(
$self
,
$other_targets
,
$targets_done
)
=
@_
;
unless
(
scalar
(
@
{
$other_targets
})
and
scalar
(
keys
%$targets_done
))
{
unless
(
scalar
(
@
{
$other_targets
}
)
and
scalar
(
keys
%$targets_done
)
)
{
return
$other_targets
;
}
my
@tmp
=
();
foreach
my
$e
(
@
{
$other_targets
})
{
push
@tmp
,
$e
unless
(
$targets_done
->
{
$e
});
foreach
my
$e
(
@
{
$other_targets
}
)
{
push
@tmp
,
$e
unless
(
$targets_done
->
{
$e
}
);
}
return
\
@tmp
;
}
1
;
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment