Skip to content
Snippets Groups Projects
Commit ce19e680 authored by Patrick Meidl's avatar Patrick Meidl
Browse files

plugin architecture for InternalIdMapper

parent e28ad271
No related branches found
No related tags found
No related merge requests found
...@@ -5,7 +5,7 @@ dry_run = 0 ...@@ -5,7 +5,7 @@ dry_run = 0
loglevel = DEBUG loglevel = DEBUG
; paths ; paths
basedir = /lustre/work1/ensembl/pm2/idmapping/perl/2008-04-22c basedir = /lustre/work1/ensembl/pm2/idmapping/perl/2008-04-28
; prepend this path to your 'log' parameter ; prepend this path to your 'log' parameter
; will default to "$basedir/log" if not set ; will default to "$basedir/log" if not set
...@@ -27,7 +27,7 @@ targetdbname = pm2_pan_troglodytes_core_41_21 ...@@ -27,7 +27,7 @@ targetdbname = pm2_pan_troglodytes_core_41_21
; caching ; caching
;cache_method = build_cache_all ;cache_method = build_cache_all
build_cache_auto_threshold = 100 build_cache_auto_threshold = 100
build_cache_concurrent_jobs = 200 build_cache_concurrent_jobs = 200
; limit ; limit
;region = chromosome:CHIMP1A:1:1:2000000:1 ;region = chromosome:CHIMP1A:1:1:2000000:1
...@@ -50,6 +50,25 @@ transcript_score_threshold = 0 ...@@ -50,6 +50,25 @@ transcript_score_threshold = 0
synteny_rescore_jobs = 20 synteny_rescore_jobs = 20
;lsf_opt_synteny_rescore = ;lsf_opt_synteny_rescore =
; InternalIdMapper
;plugin_internal_id_mappers_gene = \
; Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric::init_basic,\
; Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric::synteny,\
; Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric::best_transcript,\
; Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric::biotype,\
; Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric::internal_id
;plugin_internal_id_mappers_transcript = \
; Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric::init_basic,\
; Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric::non_exact_translation,\
; Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric::mapped_gene,\
; Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric::internal_id,\
; Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric::single_gene
;plugin_internal_id_mappers_exon = \
; Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblExonGeneric::init_basic,\
; Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblExonGeneric::mapped_transcript
; StableIdMapper ; StableIdMapper
mapping_types = gene,transcript,translation,exon mapping_types = gene,transcript,translation,exon
;plugin_stable_id_generator = Bio::EnsEMBL::IdMapping::StableIdGenerator::EnsemblGeneric ;plugin_stable_id_generator = Bio::EnsEMBL::IdMapping::StableIdGenerator::EnsemblGeneric
......
...@@ -87,6 +87,9 @@ $conf->parse_options( ...@@ -87,6 +87,9 @@ $conf->parse_options(
'exonerate_jobs|exoneratejobs=i' => 0, 'exonerate_jobs|exoneratejobs=i' => 0,
'exonerate_bytes_per_job|exoneratebytesperjob=f' => 0, 'exonerate_bytes_per_job|exoneratebytesperjob=f' => 0,
'exonerate_extra_params|exonerateextraparams=s' => 0, 'exonerate_extra_params|exonerateextraparams=s' => 0,
'plugin_internal_id_mappers_gene=s@' => 0,
'plugin_internal_id_mappers_transcript=s@' => 0,
'plugin_internal_id_mappers_exon=s@' => 0,
'mapping_types=s@' => 1, 'mapping_types=s@' => 1,
'plugin_stable_id_generator=s' => 0, 'plugin_stable_id_generator=s' => 0,
'upload_events|uploadevents=s' => 0, 'upload_events|uploadevents=s' => 0,
......
...@@ -96,6 +96,9 @@ $conf->parse_options( ...@@ -96,6 +96,9 @@ $conf->parse_options(
'exonerate_jobs|exoneratejobs=i' => 0, 'exonerate_jobs|exoneratejobs=i' => 0,
'exonerate_bytes_per_job|exoneratebytesperjob=f' => 0, 'exonerate_bytes_per_job|exoneratebytesperjob=f' => 0,
'exonerate_extra_params|exonerateextraparams=s' => 0, 'exonerate_extra_params|exonerateextraparams=s' => 0,
'plugin_internal_id_mappers_gene=s@' => 0,
'plugin_internal_id_mappers_transcript=s@' => 0,
'plugin_internal_id_mappers_exon=s@' => 0,
'mapping_types=s@' => 1, 'mapping_types=s@' => 1,
'plugin_stable_id_generator=s' => 0, 'plugin_stable_id_generator=s' => 0,
'upload_events|uploadevents=s' => 0, 'upload_events|uploadevents=s' => 0,
......
This diff is collapsed.
package Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper;
=head1 NAME
=head1 SYNOPSIS
=head1 DESCRIPTION
=head1 METHODS
=head1 LICENCE
This code is distributed under an Apache style licence. Please see
http:#www.ensembl.org/info/about/code_licence.html for details.
=head1 AUTHOR
Patrick Meidl <meidl@ebi.ac.uk>, Ensembl core API team
=head1 CONTACT
Please post comments/questions to the Ensembl development list
<ensembl-dev@ebi.ac.uk>
=cut
use strict;
use warnings;
no warnings 'uninitialized';
use Bio::EnsEMBL::IdMapping::BaseObject;
our @ISA = qw(Bio::EnsEMBL::IdMapping::BaseObject);
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
use Bio::EnsEMBL::IdMapping::MappingList;
# scores are considered the same if (2.0 * (s1-s2))/(s1 + s2) < this
use constant SIMILAR_SCORE_RATIO => 0.01;
#
# find the highest unambiguous score for all sources and targets in a scoring
# matrix
#
sub basic_mapping {
my $self = shift;
my $matrix = shift;
my $mapping_name = shift;
# argument checks
unless ($matrix and
$matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
}
throw('Need a name for serialising the mapping.') unless ($mapping_name);
# Create a new MappingList object. Specify AUTO_LOAD to load serialised
# existing mappings if found
my $dump_path = path_append($self->conf->param('basedir'), 'mapping');
my $mappings = Bio::EnsEMBL::IdMapping::MappingList->new(
-DUMP_PATH => $dump_path,
-CACHE_FILE => "${mapping_name}.ser",
-AUTO_LOAD => 1,
);
# checkpoint test: return a previously stored MappingList
if ($mappings->loaded) {
$self->logger->info("Read existing mappings from ${mapping_name}.ser.\n");
return $mappings;
}
my $sources_done = {};
my $targets_done = {};
# sort scoring matrix entries by descending score
my @sorted_entries = sort { $b->score <=> $a->score }
@{ $matrix->get_all_Entries };
# debug
#my $idx = substr($mapping_name, -1);
while (my $entry = shift(@sorted_entries)) {
#$self->logger->debug("\nxxx$idx ".$entry->to_string." ");
# we already found a mapping for either source or target
next if ($sources_done->{$entry->source} or
$targets_done->{$entry->target});
#$self->logger->debug('d');
# there's a better mapping for either source or target
next if ($self->higher_score_exists($entry, $matrix, $sources_done,
$targets_done));
#$self->logger->debug('h');
# check for ambiguous mappings; they are dealt with later
my $other_sources = [];
my $other_targets = [];
if ($self->ambiguous_mapping($entry, $matrix, $other_sources, $other_targets)) {
#$self->logger->debug('a');
$other_sources = $self->filter_sources($other_sources, $sources_done);
$other_targets = $self->filter_targets($other_targets, $targets_done);
next if (scalar(@$other_sources) or scalar(@$other_targets));
}
#$self->logger->debug('A');
# this is the best mapping, add it
$mappings->add_Entry($entry);
$sources_done->{$entry->source} = 1;
$targets_done->{$entry->target} = 1;
}
# create checkpoint
$mappings->write_to_file;
return $mappings;
}
sub higher_score_exists {
my ($self, $entry, $matrix, $sources_done, $targets_done) = @_;
my $source = $entry->source;
my $target = $entry->target;
my $score = $entry->score;
foreach my $other_source (@{ $matrix->get_sources_for_target($target) }) {
if ($other_source != $source and !$sources_done->{$other_source} and
$score < $matrix->get_score($other_source, $target)) {
return 1;
}
}
foreach my $other_target (@{ $matrix->get_targets_for_source($source) }) {
if ($other_target != $target and !$targets_done->{$other_target} and
$score < $matrix->get_score($source, $other_target)) {
return 1;
}
}
return 0;
}
#
# find ambiguous mappings (see scores_similar() for definition)
#
sub ambiguous_mapping {
my ($self, $entry, $matrix, $other_sources, $other_targets) = @_;
my $source = $entry->source;
my $target = $entry->target;
my $score = $entry->score;
my $retval = 0;
foreach my $other_source (@{ $matrix->get_sources_for_target($target) }) {
my $other_score = $matrix->get_score($other_source, $target);
if ($other_source != $source and
($self->scores_similar($score, $other_score) or $score < $other_score)) {
$retval = 1;
push @{ $other_sources }, $other_source;
}
}
foreach my $other_target (@{ $matrix->get_targets_for_source($source) }) {
my $other_score = $matrix->get_score($source, $other_target);
if ($other_target != $target and
($self->scores_similar($score, $other_score) or $score < $other_score)) {
$retval = 1;
push @{ $other_targets }, $other_target;
}
}
return $retval;
}
#
# rule for similarity taken from java code...
#
sub scores_similar {
my ($self, $s1, $s2) = @_;
# always give priority to exact matches over very similar ones
return 0 if ($s1 == 1 and $s2 < 1);
my $diff = $s1 -$s2;
$diff = -$diff if ($diff < 0);
my $pc = 2 * $diff / ($s1 + $s2);
return ($pc < SIMILAR_SCORE_RATIO);
}
sub filter_sources {
my ($self, $other_sources, $sources_done) = @_;
unless (scalar(@$other_sources) and scalar(keys %$sources_done)) {
return $other_sources;
}
my @tmp = ();
foreach my $e (@{ $other_sources }) {
push @tmp, $e unless ($sources_done->{$e});
}
return \@tmp;
}
sub filter_targets {
my ($self, $other_targets, $targets_done) = @_;
unless (scalar(@{ $other_targets }) and scalar(keys %$targets_done)) {
return $other_targets;
}
my @tmp = ();
foreach my $e (@{ $other_targets }) {
push @tmp, $e unless ($targets_done->{$e});
}
return \@tmp;
}
1;
package Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblExonGeneric;
=head1 NAME
=head1 SYNOPSIS
=head1 DESCRIPTION
=head1 METHODS
=head1 LICENCE
This code is distributed under an Apache style licence. Please see
http:#www.ensembl.org/info/about/code_licence.html for details.
=head1 AUTHOR
Patrick Meidl <meidl@ebi.ac.uk>, Ensembl core API team
=head1 CONTACT
Please post comments/questions to the Ensembl development list
<ensembl-dev@ebi.ac.uk>
=cut
use strict;
use warnings;
no warnings 'uninitialized';
use Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper;
our @ISA = qw(Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper);
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
#
# basic mapping
#
sub init_basic {
my $self = shift;
my $num = shift;
my $esb = shift;
my $mappings = shift;
my $exon_scores = shift;
$self->logger->info("Basic exon mapping...\n", 0, 'stamped');
$mappings = $self->basic_mapping($exon_scores, "exon_mappings$num");
$num++;
my $new_scores = $esb->create_shrinked_matrix($exon_scores, $mappings,
"exon_matrix$num");
return ($new_scores, $mappings);
}
#
# reduce score for mappings of exons which do not belong to mapped
# transcripts
#
sub mapped_transcript {
my $self = shift;
my $num = shift;
my $esb = shift;
my $mappings = shift;
my $exon_scores = shift;
$self->logger->info("Exons in mapped transcript...\n", 0, 'stamped');
unless ($exon_scores->loaded) {
$esb->non_mapped_transcript_rescore($exon_scores, $mappings);
$exon_scores->write_to_file;
}
$mappings = $self->basic_mapping($exon_scores, "exon_mappings$num");
$num++;
my $new_scores = $esb->create_shrinked_matrix($exon_scores, $mappings,
"exon_matrix$num");
return ($new_scores, $mappings);
}
1;
package Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric;
=head1 NAME
Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric - default Ensembl
InternalIdMapper implementation for genes
=head1 SYNOPSIS
=head1 DESCRIPTION
=head1 METHODS
=head1 LICENCE
This code is distributed under an Apache style licence. Please see
http://www.ensembl.org/info/about/code_licence.html for details.
=head1 AUTHOR
Patrick Meidl <meidl@ebi.ac.uk>, Ensembl core API team
=head1 CONTACT
Please post comments/questions to the Ensembl development list
<ensembl-dev@ebi.ac.uk>
=cut
use strict;
use warnings;
no warnings 'uninitialized';
use Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper;
our @ISA = qw(Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper);
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
#
# basic mapping
#
sub init_basic {
my $self = shift;
my $num = shift;
my $gsb = shift;
my $mappings = shift;
my $gene_scores = shift;
$self->logger->info("Basic gene mapping...\n", 0, 'stamped');
$mappings = $self->basic_mapping($gene_scores, "gene_mappings$num");
$num++;
my $new_scores = $gsb->create_shrinked_matrix($gene_scores, $mappings,
"gene_matrix$num");
return ($new_scores, $mappings);
}
#
# build the synteny from unambiguous mappings
#
sub synteny {
my $self = shift;
my $num = shift;
my $gsb = shift;
my $mappings = shift;
my $gene_scores = shift;
unless ($gene_scores->loaded) {
$self->logger->info("Synteny Framework building...\n", 0, 'stamped');
my $dump_path = path_append($self->conf->param('basedir'), 'mapping');
my $sf = Bio::EnsEMBL::IdMapping::SyntenyFramework->new(
-DUMP_PATH => $dump_path,
-CACHE_FILE => 'synteny_framework.ser',
-LOGGER => $self->logger,
-CONF => $self->conf,
-CACHE => $self->cache,
);
$sf->build_synteny($mappings);
# use it to rescore the genes
$self->logger->info("\nSynteny assisted mapping...\n", 0, 'stamped');
$gene_scores = $sf->rescore_gene_matrix_lsf($gene_scores);
# checkpoint
$gene_scores->write_to_file;
}
my $new_mappings = $self->basic_mapping($gene_scores, "gene_mappings$num");
$num++;
my $new_scores = $gsb->create_shrinked_matrix($gene_scores, $new_mappings,
"gene_matrix$num");
return ($new_scores, $new_mappings);
}
#
# rescore with simple scoring function and try again
#
sub best_transcript {
my $self = shift;
my $num = shift;
my $gsb = shift;
my $mappings = shift;
my $gene_scores = shift;
my $transcript_scores = shift;
$self->logger->info("Retry with simple best transcript score...\n", 0, 'stamped');
unless ($gene_scores->loaded) {
$gsb->simple_gene_rescore($gene_scores, $transcript_scores);
$gene_scores->write_to_file;
}
my $new_mappings = $self->basic_mapping($gene_scores, "gene_mappings$num");
$num++;
my $new_scores = $gsb->create_shrinked_matrix($gene_scores, $new_mappings,
"gene_matrix$num");
return ($new_scores, $new_mappings);
}
#
# rescore by penalising scores between genes with different biotypes
#
sub biotype {
my $self = shift;
my $num = shift;
my $gsb = shift;
my $mappings = shift;
my $gene_scores = shift;
$self->logger->info("Retry with biotype disambiguation...\n", 0, 'stamped');
unless ($gene_scores->loaded) {
$gsb->biotype_gene_rescore($gene_scores);
$gene_scores->write_to_file;
}
my $new_mappings = $self->basic_mapping($gene_scores, "gene_mappings$num");
$num++;
my $new_scores = $gsb->create_shrinked_matrix($gene_scores, $new_mappings,
"gene_matrix$num");
return ($new_scores, $new_mappings);
}
#
# selectively rescore by penalising scores between genes with different
# internalIDs
#
sub internal_id {
my $self = shift;
my $num = shift;
my $gsb = shift;
my $mappings = shift;
my $gene_scores = shift;
$self->logger->info("Retry with internalID disambiguation...\n", 0, 'stamped');
unless ($gene_scores->loaded) {
$gsb->internal_id_rescore($gene_scores);
$gene_scores->write_to_file;
}
my $new_mappings = $self->basic_mapping($gene_scores, "gene_mappings$num");
$num++;
my $new_scores = $gsb->create_shrinked_matrix($gene_scores, $new_mappings,
"gene_matrix$num");
return ($new_scores, $new_mappings);
}
1;
package Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric;
=head1 NAME
=head1 SYNOPSIS
=head1 DESCRIPTION
=head1 METHODS
=head1 LICENCE
This code is distributed under an Apache style licence. Please see
http:#www.ensembl.org/info/about/code_licence.html for details.
=head1 AUTHOR
Patrick Meidl <meidl@ebi.ac.uk>, Ensembl core API team
=head1 CONTACT
Please post comments/questions to the Ensembl development list
<ensembl-dev@ebi.ac.uk>
=cut
use strict;
use warnings;
no warnings 'uninitialized';
use Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper;
our @ISA = qw(Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper);
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
#
# basic mapping
#
sub init_basic {
my $self = shift;
my $num = shift;
my $tsb = shift;
my $mappings = shift;
my $transcript_scores = shift;
$self->logger->info("Basic transcript mapping...\n", 0, 'stamped');
$mappings = $self->basic_mapping($transcript_scores,
"transcript_mappings$num");
$num++;
my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
"transcript_matrix$num");
return ($new_scores, $mappings);
}
#
# handle cases with exact match but different translation
#
sub non_exact_translation {
my $self = shift;
my $num = shift;
my $tsb = shift;
my $mappings = shift;
my $transcript_scores = shift;
$self->logger->info("Exact Transcript non-exact Translation...\n", 0, 'stamped');
unless ($transcript_scores->loaded) {
$tsb->different_translation_rescore($transcript_scores);
$transcript_scores->write_to_file;
}
$mappings = $self->basic_mapping($transcript_scores,
"transcript_mappings$num");
$num++;
my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
"transcript_matrix$num");
return ($new_scores, $mappings);
}
#
# reduce score for mappings of transcripts which do not belong to mapped
# genes
#
sub mapped_gene {
my $self = shift;
my $num = shift;
my $tsb = shift;
my $mappings = shift;
my $transcript_scores = shift;
my $gene_mappings = shift;
$self->logger->info("Transcripts in mapped genes...\n", 0, 'stamped');
unless ($transcript_scores->loaded) {
$tsb->non_mapped_gene_rescore($transcript_scores, $gene_mappings);
$transcript_scores->write_to_file;
}
$mappings = $self->basic_mapping($transcript_scores,
"transcript_mappings$num");
$num++;
my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
"transcript_matrix$num");
return ($new_scores, $mappings);
}
#
# selectively rescore by penalising scores between transcripts with
# different internalIDs
#
sub internal_id {
my $self = shift;
my $num = shift;
my $tsb = shift;
my $mappings = shift;
my $transcript_scores = shift;
$self->logger->info("Retry with internalID disambiguation...\n", 0, 'stamped');
unless ($transcript_scores->loaded) {
$tsb->internal_id_rescore($transcript_scores);
$transcript_scores->write_to_file;
}
$mappings = $self->basic_mapping($transcript_scores,
"transcript_mappings$num");
$num++;
my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
"transcript_matrix$num");
return ($new_scores, $mappings);
}
#
# handle ambiguities between transcripts in single genes
#
sub single_gene {
my $self = shift;
my $num = shift;
my $tsb = shift;
my $mappings = shift;
my $transcript_scores = shift;
$self->logger->info("Transcripts in single genes...\n", 0, 'stamped');
unless ($transcript_scores->loaded) {
$transcript_scores->write_to_file;
}
$mappings = $self->same_gene_transcript_mapping($transcript_scores,
"transcript_mappings$num");
$num++;
my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
"transcript_matrix$num");
return ($new_scores, $mappings);
}
#
# modified basic mapper that maps transcripts that are ambiguous within one gene
#
sub same_gene_transcript_mapping {
my $self = shift;
my $matrix = shift;
my $mapping_name = shift;
# argument checks
unless ($matrix and
$matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
}
throw('Need a name for serialising the mapping.') unless ($mapping_name);
# Create a new MappingList object. Specify AUTO_LOAD to load serialised
# existing mappings if found
my $dump_path = path_append($self->conf->param('basedir'), 'mapping');
my $mappings = Bio::EnsEMBL::IdMapping::MappingList->new(
-DUMP_PATH => $dump_path,
-CACHE_FILE => "${mapping_name}.ser",
-AUTO_LOAD => 1,
);
# checkpoint test: return a previously stored MappingList
if ($mappings->loaded) {
$self->logger->info("Read existing mappings from ${mapping_name}.ser.\n");
return $mappings;
}
my $sources_done = {};
my $targets_done = {};
# sort scoring matrix entries by descending score
my @sorted_entries = sort { $b->score <=> $a->score ||
$a->source <=> $b->source || $a->target <=> $b->target }
@{ $matrix->get_all_Entries };
while (my $entry = shift(@sorted_entries)) {
# $self->logger->debug("\nxxx4 ".$entry->to_string." ");
# we already found a mapping for either source or target yet
next if ($sources_done->{$entry->source} or
$targets_done->{$entry->target});
#$self->logger->debug('d');
my $other_sources = [];
my $other_targets = [];
my %source_genes = ();
my %target_genes = ();
if ($self->ambiguous_mapping($entry, $matrix, $other_sources, $other_targets)) {
#$self->logger->debug('a');
$other_sources = $self->filter_sources($other_sources, $sources_done);
$other_targets = $self->filter_targets($other_targets, $targets_done);
$source_genes{$self->cache->get_by_key('genes_by_transcript_id',
'source', $entry->source)} = 1;
$target_genes{$self->cache->get_by_key('genes_by_transcript_id',
'target', $entry->target)} = 1;
foreach my $other_source (@{ $other_sources }) {
$source_genes{$self->cache->get_by_key('genes_by_transcript_id',
'source', $other_source)} = 1;
}
foreach my $other_target (@{ $other_targets }) {
$target_genes{$self->cache->get_by_key('genes_by_transcript_id',
'target', $other_target)} = 1;
}
# only add mapping if only one source and target gene involved
if (scalar(keys %source_genes) == 1 and scalar(keys %target_genes) == 1) {
#$self->logger->debug('O');
$mappings->add_Entry($entry);
}
} else {
#$self->logger->debug('A');
# this is the best mapping, add it
$mappings->add_Entry($entry);
}
$sources_done->{$entry->source} = 1;
$targets_done->{$entry->target} = 1;
}
# create checkpoint
$mappings->write_to_file;
return $mappings;
}
1;
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment