From ce19e68064d36484e4932fbebd10810c2a6502bd Mon Sep 17 00:00:00 2001
From: Patrick Meidl <pm2@sanger.ac.uk>
Date: Mon, 28 Apr 2008 20:59:53 +0000
Subject: [PATCH] plugin architecture for InternalIdMapper

---
 misc-scripts/id_mapping/default.conf          |  23 +-
 misc-scripts/id_mapping/id_mapping.pl         |   3 +
 misc-scripts/id_mapping/run.pl                |   3 +
 .../Bio/EnsEMBL/IdMapping/InternalIdMapper.pm | 621 ++++--------------
 .../IdMapping/InternalIdMapper/BaseMapper.pm  | 250 +++++++
 .../InternalIdMapper/EnsemblExonGeneric.pm    |  91 +++
 .../InternalIdMapper/EnsemblGeneGeneric.pm    | 187 ++++++
 .../EnsemblTranscriptGeneric.pm               | 276 ++++++++
 8 files changed, 971 insertions(+), 483 deletions(-)
 create mode 100644 modules/Bio/EnsEMBL/IdMapping/InternalIdMapper/BaseMapper.pm
 create mode 100644 modules/Bio/EnsEMBL/IdMapping/InternalIdMapper/EnsemblExonGeneric.pm
 create mode 100644 modules/Bio/EnsEMBL/IdMapping/InternalIdMapper/EnsemblGeneGeneric.pm
 create mode 100644 modules/Bio/EnsEMBL/IdMapping/InternalIdMapper/EnsemblTranscriptGeneric.pm

diff --git a/misc-scripts/id_mapping/default.conf b/misc-scripts/id_mapping/default.conf
index 2d90cd045c..8d5b95c597 100644
--- a/misc-scripts/id_mapping/default.conf
+++ b/misc-scripts/id_mapping/default.conf
@@ -5,7 +5,7 @@ dry_run = 0
 loglevel = DEBUG
 
 ; paths
-basedir = /lustre/work1/ensembl/pm2/idmapping/perl/2008-04-22c
+basedir = /lustre/work1/ensembl/pm2/idmapping/perl/2008-04-28
 
 ; prepend this path to your 'log' parameter
 ; will default to "$basedir/log" if not set
@@ -27,7 +27,7 @@ targetdbname                = pm2_pan_troglodytes_core_41_21
 ; caching
 ;cache_method                = build_cache_all
 build_cache_auto_threshold  = 100
-build_cache_concurrent_jobs  = 200
+build_cache_concurrent_jobs = 200
 
 ; limit
 ;region                     = chromosome:CHIMP1A:1:1:2000000:1
@@ -50,6 +50,25 @@ transcript_score_threshold  = 0
 synteny_rescore_jobs        = 20
 ;lsf_opt_synteny_rescore     = 
 
+; InternalIdMapper
+;plugin_internal_id_mappers_gene = \
+;    Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric::init_basic,\
+;    Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric::synteny,\
+;    Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric::best_transcript,\
+;    Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric::biotype,\
+;    Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric::internal_id
+
+;plugin_internal_id_mappers_transcript = \
+;    Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric::init_basic,\
+;    Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric::non_exact_translation,\
+;    Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric::mapped_gene,\
+;    Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric::internal_id,\
+;    Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric::single_gene
+
+;plugin_internal_id_mappers_exon = \
+;    Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblExonGeneric::init_basic,\
+;    Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblExonGeneric::mapped_transcript
+
 ; StableIdMapper
 mapping_types               = gene,transcript,translation,exon
 ;plugin_stable_id_generator  = Bio::EnsEMBL::IdMapping::StableIdGenerator::EnsemblGeneric
diff --git a/misc-scripts/id_mapping/id_mapping.pl b/misc-scripts/id_mapping/id_mapping.pl
index 6030851bf0..f1b4903d2b 100755
--- a/misc-scripts/id_mapping/id_mapping.pl
+++ b/misc-scripts/id_mapping/id_mapping.pl
@@ -87,6 +87,9 @@ $conf->parse_options(
   'exonerate_jobs|exoneratejobs=i' => 0,
   'exonerate_bytes_per_job|exoneratebytesperjob=f' => 0,
   'exonerate_extra_params|exonerateextraparams=s' => 0,
+  'plugin_internal_id_mappers_gene=s@' => 0,
+  'plugin_internal_id_mappers_transcript=s@' => 0,
+  'plugin_internal_id_mappers_exon=s@' => 0,
   'mapping_types=s@' => 1,
   'plugin_stable_id_generator=s' => 0,
   'upload_events|uploadevents=s' => 0,
diff --git a/misc-scripts/id_mapping/run.pl b/misc-scripts/id_mapping/run.pl
index ad367d3ad0..f764352b89 100755
--- a/misc-scripts/id_mapping/run.pl
+++ b/misc-scripts/id_mapping/run.pl
@@ -96,6 +96,9 @@ $conf->parse_options(
   'exonerate_jobs|exoneratejobs=i' => 0,
   'exonerate_bytes_per_job|exoneratebytesperjob=f' => 0,
   'exonerate_extra_params|exonerateextraparams=s' => 0,
+  'plugin_internal_id_mappers_gene=s@' => 0,
+  'plugin_internal_id_mappers_transcript=s@' => 0,
+  'plugin_internal_id_mappers_exon=s@' => 0,
   'mapping_types=s@' => 1,
   'plugin_stable_id_generator=s' => 0,
   'upload_events|uploadevents=s' => 0,
diff --git a/modules/Bio/EnsEMBL/IdMapping/InternalIdMapper.pm b/modules/Bio/EnsEMBL/IdMapping/InternalIdMapper.pm
index bd2fe1b497..449b584369 100644
--- a/modules/Bio/EnsEMBL/IdMapping/InternalIdMapper.pm
+++ b/modules/Bio/EnsEMBL/IdMapping/InternalIdMapper.pm
@@ -37,7 +37,7 @@ use Bio::EnsEMBL::IdMapping::BaseObject;
 our @ISA = qw(Bio::EnsEMBL::IdMapping::BaseObject);
 
 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
-use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
+use Bio::EnsEMBL::Utils::ScriptUtils qw(inject path_append);
 use Bio::EnsEMBL::IdMapping::Entry;
 use Bio::EnsEMBL::IdMapping::MappingList;
 use Bio::EnsEMBL::IdMapping::SyntenyFramework;
@@ -93,110 +93,44 @@ sub map_genes {
     # create gene mappings
     $self->logger->info("No gene mappings found. Will calculate them now.\n");
 
-    #
-    # basic mapping
-    #
-    $self->logger->info("Basic gene mapping...\n", 0, 'stamped');
-
-    my $mappings0 = $self->basic_mapping($gene_scores, 'gene_mappings0');
-
-    my $gene_scores1 = $gsb->create_shrinked_matrix($gene_scores, $mappings0,
-      'gene_matrix1');
-
-
-    #
-    # build the synteny from unambiguous mappings
-    #
-    unless ($gene_scores1->loaded) {
-      $self->logger->info("Synteny Framework building...\n", 0, 'stamped');
-      my $sf = Bio::EnsEMBL::IdMapping::SyntenyFramework->new(
-        -DUMP_PATH    => $dump_path,
-        -CACHE_FILE   => 'synteny_framework.ser',
-        -LOGGER       => $self->logger,
-        -CONF         => $self->conf,
-        -CACHE        => $self->cache,
-      );
-      $sf->build_synteny($mappings0);
-
-      # use it to rescore the genes
-      $self->logger->info("\nSynteny assisted mapping...\n", 0, 'stamped');
-      $gene_scores1 = $sf->rescore_gene_matrix_lsf($gene_scores1);
-
-      # checkpoint
-      $gene_scores1->write_to_file;
-    }
-
-    my $mappings1 = $self->basic_mapping($gene_scores1, 'gene_mappings1');
-    
-    my $gene_scores2 = $gsb->create_shrinked_matrix($gene_scores1, $mappings1,
-      'gene_matrix2');
-    
+    # determine which plugin methods to run
+    my @default_plugins = (qw(
+      Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric::init_basic
+      Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric::synteny
+      Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric::best_transcript
+      Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric::biotype
+      Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric::internal_id
+    ));
+
+    my @plugins = $self->conf->param('plugin_internal_id_mappers_gene');
+    @plugins = @default_plugins unless (defined($plugins[0]));
+
+    my $new_mappings = Bio::EnsEMBL::IdMapping::MappingList->new(
+      -DUMP_PATH   => $dump_path,
+      -CACHE_FILE  => 'gene_mappings0.ser',
+    );
+    my @mappings = ();
+    my $i = 0;
 
     #
-    # rescore with simple scoring function and try again
+    # run the scoring chain
     #
-    $self->logger->info("Retry with simple best transcript score...\n", 0, 'stamped');
-    
-    unless ($gene_scores2->loaded) {
-      $gsb->simple_gene_rescore($gene_scores2, $transcript_scores);
-      $gene_scores2->write_to_file;
-    }
-    
-    my $mappings2 = $self->basic_mapping($gene_scores2, 'gene_mappings2');
-    
-    my $gene_scores3 = $gsb->create_shrinked_matrix($gene_scores2, $mappings2,
-      'gene_matrix3');
+    foreach my $plugin (@plugins) {
+      ($gene_scores, $new_mappings) = $self->delegate_to_plugin($plugin, $i++,
+        $gsb, $new_mappings, $gene_scores, $transcript_scores);
 
-
-    #
-    # rescore by penalising scores between genes with different biotypes  
-    #
-    $self->logger->info("Retry with biotype disambiguation...\n", 0, 'stamped');
-    
-    unless ($gene_scores3->loaded) {
-      $gsb->biotype_gene_rescore($gene_scores3);
-      $gene_scores3->write_to_file;
+      push(@mappings, $new_mappings);
     }
 
-    my $mappings3 = $self->basic_mapping($gene_scores3, 'gene_mappings3');
-    
-    my $gene_scores4 = $gsb->create_shrinked_matrix($gene_scores3, $mappings3,
-      'gene_matrix4');
-
-
-    #
-    # selectively rescore by penalising scores between genes with different
-    # internalIDs  
-    #
-    $self->logger->info("Retry with internalID disambiguation...\n", 0, 'stamped');
-    
-    unless ($gene_scores4->loaded) {
-      $gsb->internal_id_rescore($gene_scores4);
-      $gene_scores4->write_to_file;
-    }
-
-    my $mappings4 = $self->basic_mapping($gene_scores4, 'gene_mappings4');
-    
-    my $remaining_gene_scores = $gsb->create_shrinked_matrix(
-      $gene_scores4, $mappings4, 'remaining_gene_matrix');
-
-
-    #
     # report remaining ambiguities
-    #
-    $self->logger->info($remaining_gene_scores->get_source_count.
+    $self->logger->info($gene_scores->get_source_count.
       " source genes are ambiguous with ".
-      $remaining_gene_scores->get_target_count." target genes.\n\n");
-
-    $self->log_ambiguous($remaining_gene_scores, 'gene');
+      $gene_scores->get_target_count." target genes.\n\n");
 
+    $self->log_ambiguous($gene_scores, 'gene');
     
-    #
     # merge mappings and write to file
-    #
-    $mappings->add_all($mappings0, $mappings1, $mappings2, $mappings3,
-                       $mappings4);
-
+    $mappings->add_all(@mappings);
     $mappings->write_to_file;
 
     if ($self->logger->loglevel eq 'debug') {
@@ -258,103 +192,44 @@ sub map_transcripts {
     # create transcript mappings
     $self->logger->info("No transcript mappings found. Will calculate them now.\n");
 
-    #
-    # basic mapping
-    #
-    $self->logger->info("Basic transcript mapping...\n", 0, 'stamped');
-
-    my $mappings0 = $self->basic_mapping($transcript_scores,
-      'transcript_mappings0');
-
-    my $transcript_scores1 = $tsb->create_shrinked_matrix(
-      $transcript_scores, $mappings0, 'transcript_matrix1');
-
-
-    #
-    # handle cases with exact match but different translation
-    #
-    $self->logger->info("Exact Transcript non-exact Translation...\n", 0, 'stamped');
-    
-    unless ($transcript_scores1->loaded) {
-      $tsb->different_translation_rescore($transcript_scores1);
-      $transcript_scores1->write_to_file;
-    }
-    
-    my $mappings1 = $self->basic_mapping($transcript_scores1,
-      'transcript_mappings1');
-    
-    my $transcript_scores2 = $tsb->create_shrinked_matrix(
-      $transcript_scores1, $mappings1, 'transcript_matrix2');
-
+    # determine which plugin methods to run
+    my @default_plugins = (qw(
+      Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric::init_basic
+      Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric::non_exact_translation
+      Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric::mapped_gene
+      Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric::internal_id
+      Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric::single_gene
+    ));
+
+    my @plugins = $self->conf->param('plugin_internal_id_mappers_transcript');
+    @plugins = @default_plugins unless (defined($plugins[0]));
+
+    my $new_mappings = Bio::EnsEMBL::IdMapping::MappingList->new(
+      -DUMP_PATH   => $dump_path,
+      -CACHE_FILE  => 'transcript_mappings0.ser',
+    );
+    my @mappings = ();
+    my $i = 0;
 
     #
-    # reduce score for mappings of transcripts which do not belong to mapped
-    # genes
+    # run the scoring chain
     #
-    $self->logger->info("Transcripts in mapped genes...\n", 0, 'stamped');
-    
-    unless ($transcript_scores2->loaded) {
-    $tsb->non_mapped_gene_rescore($transcript_scores2, $gene_mappings);
-      $transcript_scores2->write_to_file;
-    }
-    
-    my $mappings2 = $self->basic_mapping($transcript_scores2,
-      'transcript_mappings2');
-    
-    my $transcript_scores3 = $tsb->create_shrinked_matrix(
-      $transcript_scores2, $mappings2, 'transcript_matrix3');
-
+    foreach my $plugin (@plugins) {
+      ($transcript_scores, $new_mappings) = $self->delegate_to_plugin($plugin,
+        $i++, $tsb, $new_mappings, $transcript_scores, $gene_mappings);
 
-    #
-    # selectively rescore by penalising scores between transcripts with
-    # different internalIDs  
-    #
-    $self->logger->info("Retry with internalID disambiguation...\n", 0, 'stamped');
-    
-    unless ($transcript_scores3->loaded) {
-      $tsb->internal_id_rescore($transcript_scores3);
-      $transcript_scores3->write_to_file;
+      push(@mappings, $new_mappings);
     }
 
-    my $mappings3 = $self->basic_mapping($transcript_scores3,
-      'transcript_mappings3');
-    
-    my $transcript_scores4 = $tsb->create_shrinked_matrix(
-      $transcript_scores3, $mappings3, 'transcript_matrix4');
-
-
-    #
-    # handle ambiguities between transcripts in single genes
-    #
-    $self->logger->info("Transcripts in single genes...\n", 0, 'stamped');
-    
-    unless ($transcript_scores4->loaded) {
-      $transcript_scores4->write_to_file;
-    }
-    
-    my $mappings4 = $self->same_gene_transcript_mapping($transcript_scores4,
-      'transcript_mappings4');
-
-    my $remaining_transcript_scores = $tsb->create_shrinked_matrix(
-      $transcript_scores4, $mappings4, 'transcript_matrix5');
-
-
-    #
     # report remaining ambiguities
-    #
-    $self->logger->info($remaining_transcript_scores->get_source_count.
+    $self->logger->info($transcript_scores->get_source_count.
       " source transcripts are ambiguous with ".
-      $remaining_transcript_scores->get_target_count." target transcripts.\n\n");
+      $transcript_scores->get_target_count." target transcripts.\n\n");
 
-    $self->log_ambiguous($remaining_transcript_scores, 'transcript');
+    $self->log_ambiguous($transcript_scores, 'transcript');
 
-    
-    #
     # merge mappings and write to file
-    #
-    $mappings->add_all($mappings0, $mappings1, $mappings2, $mappings3,
-                       $mappings4);
-
+    $mappings->add_all(@mappings);
     $mappings->write_to_file;
 
     if ($self->logger->loglevel eq 'debug') {
@@ -417,49 +292,41 @@ sub map_exons {
     # create exon mappings
     $self->logger->info("No exon mappings found. Will calculate them now.\n");
 
-    #
-    # basic mapping
-    #
-    $self->logger->info("Basic exon mapping...\n", 0, 'stamped');
+    # determine which plugin methods to run
+    my @default_plugins = (qw(
+      Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblExonGeneric::init_basic
+      Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblExonGeneric::mapped_transcript
+    ));
 
-    my $mappings0 = $self->basic_mapping($exon_scores, 'exon_mappings0');
+    my @plugins = $self->conf->param('plugin_internal_id_mappers_exon');
+    @plugins = @default_plugins unless (defined($plugins[0]));
 
-    my $exon_scores1 = $esb->create_shrinked_matrix( $exon_scores, $mappings0,
-      'exon_matrix1');
-    
+    my $new_mappings = Bio::EnsEMBL::IdMapping::MappingList->new(
+      -DUMP_PATH   => $dump_path,
+      -CACHE_FILE  => 'exon_mappings0.ser',
+    );
+    my @mappings = ();
+    my $i = 0;
 
     #
-    # reduce score for mappings of exons which do not belong to mapped
-    # transcripts
+    # run the scoring chain
     #
-    $self->logger->info("Exons in mapped transcripts...\n", 0, 'stamped');
-    
-    unless ($exon_scores1->loaded) {
-      $esb->non_mapped_transcript_rescore($exon_scores1, $transcript_mappings);
-      $exon_scores1->write_to_file;
-    }
-    
-    my $mappings1 = $self->basic_mapping($exon_scores1, 'exon_mappings1');
-    
-    my $remaining_exon_scores = $esb->create_shrinked_matrix(
-      $exon_scores1, $mappings1, 'exon_matrix2');
+    foreach my $plugin (@plugins) {
+      ($exon_scores, $new_mappings) = $self->delegate_to_plugin($plugin, $i++,
+        $esb, $new_mappings, $exon_scores);
 
+      push(@mappings, $new_mappings);
+    }
 
-    #
     # report remaining ambiguities
-    #
-    $self->logger->info($remaining_exon_scores->get_source_count.
+    $self->logger->info($exon_scores->get_source_count.
       " source exons are ambiguous with ".
-      $remaining_exon_scores->get_target_count." target exons.\n\n");
+      $exon_scores->get_target_count." target exons.\n\n");
 
-    $self->log_ambiguous($remaining_exon_scores, 'exon');
+    $self->log_ambiguous($exon_scores, 'exon');
 
-    
-    #
     # merge mappings and write to file
-    #
-    $mappings->add_all($mappings0, $mappings1);
-
+    $mappings->add_all(@mappings);
     $mappings->write_to_file;
 
     if ($self->logger->loglevel eq 'debug') {
@@ -475,6 +342,10 @@ sub map_exons {
 }
 
 
+#
+# this is not implemented as a plugin, since a) it's too simple and b) it's
+# tied to transcripts so there are no translation scores or score builder.
+#
 sub map_translations {
   my $self = shift;
   my $transcript_mappings = shift;
@@ -557,304 +428,92 @@ sub map_translations {
 }
 
 
-#
-# find the highest unambiguous score for all sources and targets in a scoring
-# matrix
-#
-sub basic_mapping {
+sub delegate_to_plugin {
   my $self = shift;
-  my $matrix = shift;
-  my $mapping_name = shift;
+  my $plugin = shift;
+  my $num = shift;
+  my $score_builder = shift;
+  my $mappings = shift;
+  my $scores = shift;
 
   # argument checks
-  unless ($matrix and
-          $matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
-    throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
+  unless ($score_builder and
+          $score_builder->isa('Bio::EnsEMBL::IdMapping::ScoreBuilder')) {
+    throw('Need a Bio::EnsEMBL::IdMapping::ScoreBuilder.');
   }
 
-  throw('Need a name for serialising the mapping.') unless ($mapping_name);
-
-  # Create a new MappingList object. Specify AUTO_LOAD to load serialised
-  # existing mappings if found
-  my $dump_path = path_append($self->conf->param('basedir'), 'mapping');
-  
-  my $mappings = Bio::EnsEMBL::IdMapping::MappingList->new(
-    -DUMP_PATH   => $dump_path,
-    -CACHE_FILE  => "${mapping_name}.ser",
-    -AUTO_LOAD   => 1,
-  );
-  
-  # checkpoint test: return a previously stored MappingList
-  if ($mappings->loaded) {
-    $self->logger->info("Read existing mappings from ${mapping_name}.ser.\n");
-    return $mappings;
+  unless ($mappings and
+          $mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
+    throw('Need a Bio::EnsEMBL::IdMapping::MappingList.');
   }
-
-  my $sources_done = {};
-  my $targets_done = {};
-
-  # sort scoring matrix entries by descending score
-  my @sorted_entries = sort { $b->score <=> $a->score }
-    @{ $matrix->get_all_Entries };
-
-  # debug
-  my $idx = substr($mapping_name, -1);
-
-  while (my $entry = shift(@sorted_entries)) {
-    
-    #$self->logger->debug("\nxxx$idx ".$entry->to_string." ");
-    
-    # we already found a mapping for either source or target
-    next if ($sources_done->{$entry->source} or
-             $targets_done->{$entry->target});
-    
-    #$self->logger->debug('d');
-    
-    # there's a better mapping for either source or target
-    next if ($self->higher_score_exists($entry, $matrix, $sources_done,
-      $targets_done));
-      
-    #$self->logger->debug('h');
-
-    # check for ambiguous mappings; they are dealt with later
-    my $other_sources = [];
-    my $other_targets = [];
-
-    if ($self->ambiguous_mapping($entry, $matrix, $other_sources, $other_targets)) {
-      #$self->logger->debug('a');
-      
-      $other_sources = $self->filter_sources($other_sources, $sources_done);
-      $other_targets = $self->filter_targets($other_targets, $targets_done);
-
-      next if (scalar(@$other_sources) or scalar(@$other_targets));
-    }
-    
-    #$self->logger->debug('A');
-
-    # this is the best mapping, add it
-    $mappings->add_Entry($entry);
-
-    $sources_done->{$entry->source} = 1;
-    $targets_done->{$entry->target} = 1;
+  
+  unless ($scores and
+          $scores->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
+    throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
   }
 
-  # create checkpoint
-  $mappings->write_to_file;
+  # split plugin name into module and method
+  $plugin =~ /(.*)::(\w+)$/;
+  my $module = $1;
+  my $method = $2;
 
-  return $mappings;
-}
-
-
-sub higher_score_exists {
-  my ($self, $entry, $matrix, $sources_done, $targets_done) = @_;
-
-  my $source = $entry->source;
-  my $target = $entry->target;
-  my $score = $entry->score;
-
-  foreach my $other_source (@{ $matrix->get_sources_for_target($target) }) {
-    if ($other_source != $source and !$sources_done->{$other_source} and
-        $score < $matrix->get_score($other_source, $target)) {
-          return 1;
-    }
+  unless ($module and $method) {
+    throw("Unable to determine module and method name from $plugin.\n");
   }
 
-  foreach my $other_target (@{ $matrix->get_targets_for_source($source) }) {
-    if ($other_target != $target and !$targets_done->{$other_target} and
-        $score < $matrix->get_score($source, $other_target)) {
-          return 1;
-    }
-  }
-
-  return 0;
-}
-
-
-#
-# find ambiguous mappings (see scores_similar() for definition)
-#
-sub ambiguous_mapping {
-  my ($self, $entry, $matrix, $other_sources, $other_targets) = @_;
-
-  my $source = $entry->source;
-  my $target = $entry->target;
-  my $score = $entry->score;
-
-  my $retval = 0;
-
-  foreach my $other_source (@{ $matrix->get_sources_for_target($target) }) {
-    my $other_score = $matrix->get_score($other_source, $target);
+  # instantiate the plugin unless we already have an instance
+  my $plugin_instance;
+  if ($self->has_plugin($module)) {
     
-    if ($other_source != $source and
-      ($self->scores_similar($score, $other_score) or $score < $other_score)) {
-        $retval = 1;
-        push @{ $other_sources }, $other_source;
-    }
-  }
-
-  foreach my $other_target (@{ $matrix->get_targets_for_source($source) }) {
-    my $other_score = $matrix->get_score($source, $other_target);
-
-    if ($other_target != $target and
-      ($self->scores_similar($score, $other_score) or $score < $other_score)) {
-        $retval = 1;
-        push @{ $other_targets }, $other_target;
-    }
-  }
-
-  return $retval;
-}
-
-
-# 
-# rule for similarity taken from java code...
-#
-sub scores_similar {
-  my ($self, $s1, $s2) = @_;
-
-  # always give priority to exact matches over very similar ones
-  return 0 if ($s1 == 1 and $s2 < 1);
-
-  my $diff = $s1 -$s2;
-  $diff = -$diff if ($diff < 0);
+    # re-use an existing plugin instance
+    $plugin_instance = $self->get_plugin($module);
   
-  my $pc = 2 * $diff / ($s1 + $s2);
-  
-  return ($pc < SIMILAR_SCORE_RATIO);
-}
-
-
-sub filter_sources {
-  my ($self, $other_sources, $sources_done) = @_;
-
-  unless (scalar(@$other_sources) and scalar(keys %$sources_done)) {
-    return $other_sources;
-  }
-
-  my @tmp = ();
+  } else {
+    
+    # inject and instantiate the plugin module
+    inject($module);
+    $plugin_instance = $module->new(
+        -LOGGER       => $self->logger,
+        -CONF         => $self->conf,
+        -CACHE        => $self->cache
+    );
+    $self->add_plugin($plugin_instance);
 
-  foreach my $e (@{ $other_sources }) {
-    push @tmp, $e unless ($sources_done->{$e}); 
   }
 
-  return \@tmp;
+  # run the method on the plugin
+  #
+  # pass in a sequence number (number of method run, used for generating
+  # checkpoint files), the scores used for determining the mapping, and all
+  # other arguments passed to this method (these will vary for different object
+  # types)
+  #
+  # return the scores and mappings to feed into the next plugin in the chain
+  return $plugin_instance->$method($num, $score_builder, $mappings, $scores, @_);
 }
 
 
-sub filter_targets {
-  my ($self, $other_targets, $targets_done) = @_;
-
-  unless (scalar(@{ $other_targets }) and scalar(keys %$targets_done)) {
-    return $other_targets;
-  }
-
-  my @tmp = ();
-
-  foreach my $e (@{ $other_targets }) {
-    push @tmp, $e unless ($targets_done->{$e}); 
-  }
+sub has_plugin {
+  my $self = shift;
+  my $module = shift;
 
-  return \@tmp;
+  defined($self->{'_plugins'}->{$module}) ? (return 1) : (return 0);
 }
 
 
-#
-# modified basic mapper that maps transcripts that are ambiguous within one gene
-#
-sub same_gene_transcript_mapping {
+sub get_plugin {
   my $self = shift;
-  my $matrix = shift;
-  my $mapping_name = shift;
-
-  # argument checks
-  unless ($matrix and
-          $matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
-    throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
-  }
-
-  throw('Need a name for serialising the mapping.') unless ($mapping_name);
-
-  # Create a new MappingList object. Specify AUTO_LOAD to load serialised
-  # existing mappings if found
-  my $dump_path = path_append($self->conf->param('basedir'), 'mapping');
-  
-  my $mappings = Bio::EnsEMBL::IdMapping::MappingList->new(
-    -DUMP_PATH   => $dump_path,
-    -CACHE_FILE  => "${mapping_name}.ser",
-    -AUTO_LOAD   => 1,
-  );
-  
-  # checkpoint test: return a previously stored MappingList
-  if ($mappings->loaded) {
-    $self->logger->info("Read existing mappings from ${mapping_name}.ser.\n");
-    return $mappings;
-  }
-
-  my $sources_done = {};
-  my $targets_done = {};
-
-  # sort scoring matrix entries by descending score
-  my @sorted_entries = sort { $b->score <=> $a->score ||
-    $a->source <=> $b->source || $a->target <=> $b->target }
-      @{ $matrix->get_all_Entries };
-
-  while (my $entry = shift(@sorted_entries)) {
-    
-    # $self->logger->debug("\nxxx4 ".$entry->to_string." ");
-
-    # we already found a mapping for either source or target yet
-    next if ($sources_done->{$entry->source} or
-             $targets_done->{$entry->target});
-
-    #$self->logger->debug('d');
-
-    my $other_sources = [];
-    my $other_targets = [];
-    my %source_genes = ();
-    my %target_genes = ();
-
-    if ($self->ambiguous_mapping($entry, $matrix, $other_sources, $other_targets)) {
-      #$self->logger->debug('a');
-
-      $other_sources = $self->filter_sources($other_sources, $sources_done);
-      $other_targets = $self->filter_targets($other_targets, $targets_done);
+  my $module = shift;
 
-      $source_genes{$self->cache->get_by_key('genes_by_transcript_id',
-        'source', $entry->source)} = 1;
-      $target_genes{$self->cache->get_by_key('genes_by_transcript_id',
-        'target', $entry->target)} = 1;
-
-      foreach my $other_source (@{ $other_sources }) {
-        $source_genes{$self->cache->get_by_key('genes_by_transcript_id',
-          'source', $other_source)} = 1;
-      }
-        
-      foreach my $other_target (@{ $other_targets }) {
-        $target_genes{$self->cache->get_by_key('genes_by_transcript_id',
-          'target', $other_target)} = 1;
-      }
-      
-      # only add mapping if only one source and target gene involved
-      if (scalar(keys %source_genes) == 1 and scalar(keys %target_genes) == 1) {
-        #$self->logger->debug('O');
-        $mappings->add_Entry($entry);
-      }
-
-    } else {
-      #$self->logger->debug('A');
-
-      # this is the best mapping, add it
-      $mappings->add_Entry($entry);
-    }
+  return $self->{'_plugins'}->{$module};
+}
 
-    $sources_done->{$entry->source} = 1;
-    $targets_done->{$entry->target} = 1;
-  }
 
-  # create checkpoint
-  $mappings->write_to_file;
+sub add_plugin {
+  my $self = shift;
+  my $plugin_instance = shift;
 
-  return $mappings;
+  $self->{'_plugins'}->{ref($plugin_instance)} = $plugin_instance;
 }
 
 
diff --git a/modules/Bio/EnsEMBL/IdMapping/InternalIdMapper/BaseMapper.pm b/modules/Bio/EnsEMBL/IdMapping/InternalIdMapper/BaseMapper.pm
new file mode 100644
index 0000000000..77a6c8208f
--- /dev/null
+++ b/modules/Bio/EnsEMBL/IdMapping/InternalIdMapper/BaseMapper.pm
@@ -0,0 +1,250 @@
+package Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper;
+
+=head1 NAME
+
+
+=head1 SYNOPSIS
+
+
+=head1 DESCRIPTION
+
+
+=head1 METHODS
+
+
+=head1 LICENCE
+
+This code is distributed under an Apache style licence. Please see
+http:#www.ensembl.org/info/about/code_licence.html for details.
+
+=head1 AUTHOR
+
+Patrick Meidl <meidl@ebi.ac.uk>, Ensembl core API team
+
+=head1 CONTACT
+
+Please post comments/questions to the Ensembl development list
+<ensembl-dev@ebi.ac.uk>
+
+=cut
+
+
+use strict;
+use warnings;
+no warnings 'uninitialized';
+
+use Bio::EnsEMBL::IdMapping::BaseObject;
+our @ISA = qw(Bio::EnsEMBL::IdMapping::BaseObject);
+
+use Bio::EnsEMBL::Utils::Exception qw(throw warning);
+use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
+use Bio::EnsEMBL::IdMapping::MappingList;
+
+
+# scores are considered the same if (2.0 * (s1-s2))/(s1 + s2) < this
+use constant SIMILAR_SCORE_RATIO => 0.01;
+
+    
+#
+# find the highest unambiguous score for all sources and targets in a scoring
+# matrix
+#
+sub basic_mapping {
+  my $self = shift;
+  my $matrix = shift;
+  my $mapping_name = shift;
+
+  # argument checks
+  unless ($matrix and
+          $matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
+    throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
+  }
+
+  throw('Need a name for serialising the mapping.') unless ($mapping_name);
+
+  # Create a new MappingList object. Specify AUTO_LOAD to load serialised
+  # existing mappings if found
+  my $dump_path = path_append($self->conf->param('basedir'), 'mapping');
+  
+  my $mappings = Bio::EnsEMBL::IdMapping::MappingList->new(
+    -DUMP_PATH   => $dump_path,
+    -CACHE_FILE  => "${mapping_name}.ser",
+    -AUTO_LOAD   => 1,
+  );
+  
+  # checkpoint test: return a previously stored MappingList
+  if ($mappings->loaded) {
+    $self->logger->info("Read existing mappings from ${mapping_name}.ser.\n");
+    return $mappings;
+  }
+
+  my $sources_done = {};
+  my $targets_done = {};
+
+  # sort scoring matrix entries by descending score
+  my @sorted_entries = sort { $b->score <=> $a->score }
+    @{ $matrix->get_all_Entries };
+
+  # debug
+  #my $idx = substr($mapping_name, -1);
+
+  while (my $entry = shift(@sorted_entries)) {
+    
+    #$self->logger->debug("\nxxx$idx ".$entry->to_string." ");
+    
+    # we already found a mapping for either source or target
+    next if ($sources_done->{$entry->source} or
+             $targets_done->{$entry->target});
+    
+    #$self->logger->debug('d');
+    
+    # there's a better mapping for either source or target
+    next if ($self->higher_score_exists($entry, $matrix, $sources_done,
+      $targets_done));
+      
+    #$self->logger->debug('h');
+
+    # check for ambiguous mappings; they are dealt with later
+    my $other_sources = [];
+    my $other_targets = [];
+
+    if ($self->ambiguous_mapping($entry, $matrix, $other_sources, $other_targets)) {
+      #$self->logger->debug('a');
+      
+      $other_sources = $self->filter_sources($other_sources, $sources_done);
+      $other_targets = $self->filter_targets($other_targets, $targets_done);
+
+      next if (scalar(@$other_sources) or scalar(@$other_targets));
+    }
+    
+    #$self->logger->debug('A');
+
+    # this is the best mapping, add it
+    $mappings->add_Entry($entry);
+
+    $sources_done->{$entry->source} = 1;
+    $targets_done->{$entry->target} = 1;
+  }
+
+  # create checkpoint
+  $mappings->write_to_file;
+
+  return $mappings;
+}
+
+
+sub higher_score_exists {
+  my ($self, $entry, $matrix, $sources_done, $targets_done) = @_;
+
+  my $source = $entry->source;
+  my $target = $entry->target;
+  my $score = $entry->score;
+
+  foreach my $other_source (@{ $matrix->get_sources_for_target($target) }) {
+    if ($other_source != $source and !$sources_done->{$other_source} and
+        $score < $matrix->get_score($other_source, $target)) {
+          return 1;
+    }
+  }
+
+  foreach my $other_target (@{ $matrix->get_targets_for_source($source) }) {
+    if ($other_target != $target and !$targets_done->{$other_target} and
+        $score < $matrix->get_score($source, $other_target)) {
+          return 1;
+    }
+  }
+
+  return 0;
+}
+
+
+#
+# find ambiguous mappings (see scores_similar() for definition)
+#
+sub ambiguous_mapping {
+  my ($self, $entry, $matrix, $other_sources, $other_targets) = @_;
+
+  my $source = $entry->source;
+  my $target = $entry->target;
+  my $score = $entry->score;
+
+  my $retval = 0;
+
+  foreach my $other_source (@{ $matrix->get_sources_for_target($target) }) {
+    my $other_score = $matrix->get_score($other_source, $target);
+    
+    if ($other_source != $source and
+      ($self->scores_similar($score, $other_score) or $score < $other_score)) {
+        $retval = 1;
+        push @{ $other_sources }, $other_source;
+    }
+  }
+
+  foreach my $other_target (@{ $matrix->get_targets_for_source($source) }) {
+    my $other_score = $matrix->get_score($source, $other_target);
+
+    if ($other_target != $target and
+      ($self->scores_similar($score, $other_score) or $score < $other_score)) {
+        $retval = 1;
+        push @{ $other_targets }, $other_target;
+    }
+  }
+
+  return $retval;
+}
+
+
+# 
+# rule for similarity taken from java code...
+#
+sub scores_similar {
+  my ($self, $s1, $s2) = @_;
+
+  # always give priority to exact matches over very similar ones
+  return 0 if ($s1 == 1 and $s2 < 1);
+
+  my $diff = $s1 -$s2;
+  $diff = -$diff if ($diff < 0);
+  
+  my $pc = 2 * $diff / ($s1 + $s2);
+  
+  return ($pc < SIMILAR_SCORE_RATIO);
+}
+
+
+sub filter_sources {
+  my ($self, $other_sources, $sources_done) = @_;
+
+  unless (scalar(@$other_sources) and scalar(keys %$sources_done)) {
+    return $other_sources;
+  }
+
+  my @tmp = ();
+
+  foreach my $e (@{ $other_sources }) {
+    push @tmp, $e unless ($sources_done->{$e}); 
+  }
+
+  return \@tmp;
+}
+
+
+sub filter_targets {
+  my ($self, $other_targets, $targets_done) = @_;
+
+  unless (scalar(@{ $other_targets }) and scalar(keys %$targets_done)) {
+    return $other_targets;
+  }
+
+  my @tmp = ();
+
+  foreach my $e (@{ $other_targets }) {
+    push @tmp, $e unless ($targets_done->{$e}); 
+  }
+
+  return \@tmp;
+}
+
+
+1;
+
diff --git a/modules/Bio/EnsEMBL/IdMapping/InternalIdMapper/EnsemblExonGeneric.pm b/modules/Bio/EnsEMBL/IdMapping/InternalIdMapper/EnsemblExonGeneric.pm
new file mode 100644
index 0000000000..7caae11a37
--- /dev/null
+++ b/modules/Bio/EnsEMBL/IdMapping/InternalIdMapper/EnsemblExonGeneric.pm
@@ -0,0 +1,91 @@
+package Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblExonGeneric;
+
+=head1 NAME
+
+
+=head1 SYNOPSIS
+
+
+=head1 DESCRIPTION
+
+
+=head1 METHODS
+
+
+=head1 LICENCE
+
+This code is distributed under an Apache style licence. Please see
+http:#www.ensembl.org/info/about/code_licence.html for details.
+
+=head1 AUTHOR
+
+Patrick Meidl <meidl@ebi.ac.uk>, Ensembl core API team
+
+=head1 CONTACT
+
+Please post comments/questions to the Ensembl development list
+<ensembl-dev@ebi.ac.uk>
+
+=cut
+
+
+use strict;
+use warnings;
+no warnings 'uninitialized';
+
+use Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper;
+our @ISA = qw(Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper);
+
+use Bio::EnsEMBL::Utils::Exception qw(throw warning);
+
+  
+#
+# basic mapping
+#
+sub init_basic {
+  my $self = shift;
+  my $num = shift;
+  my $esb = shift;
+  my $mappings = shift;
+  my $exon_scores = shift;
+
+  $self->logger->info("Basic exon mapping...\n", 0, 'stamped');
+
+  $mappings = $self->basic_mapping($exon_scores, "exon_mappings$num");
+  $num++;
+  my $new_scores = $esb->create_shrinked_matrix($exon_scores, $mappings,
+    "exon_matrix$num");
+
+  return ($new_scores, $mappings);
+}
+
+
+#
+# reduce score for mappings of exons which do not belong to mapped
+# transcripts
+#
+sub mapped_transcript {
+  my $self = shift;
+  my $num = shift;
+  my $esb = shift;
+  my $mappings = shift;
+  my $exon_scores = shift;
+
+  $self->logger->info("Exons in mapped transcript...\n", 0, 'stamped');
+
+  unless ($exon_scores->loaded) {
+    $esb->non_mapped_transcript_rescore($exon_scores, $mappings);
+    $exon_scores->write_to_file;
+  }
+
+  $mappings = $self->basic_mapping($exon_scores, "exon_mappings$num");
+  $num++;
+  my $new_scores = $esb->create_shrinked_matrix($exon_scores, $mappings,
+    "exon_matrix$num");
+
+  return ($new_scores, $mappings);
+}
+  
+
+1;
+
diff --git a/modules/Bio/EnsEMBL/IdMapping/InternalIdMapper/EnsemblGeneGeneric.pm b/modules/Bio/EnsEMBL/IdMapping/InternalIdMapper/EnsemblGeneGeneric.pm
new file mode 100644
index 0000000000..c424896f12
--- /dev/null
+++ b/modules/Bio/EnsEMBL/IdMapping/InternalIdMapper/EnsemblGeneGeneric.pm
@@ -0,0 +1,187 @@
+package Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric;
+
+=head1 NAME
+
+Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric - default Ensembl
+InternalIdMapper implementation for genes
+
+=head1 SYNOPSIS
+
+
+=head1 DESCRIPTION
+
+
+=head1 METHODS
+
+
+=head1 LICENCE
+
+This code is distributed under an Apache style licence. Please see
+http://www.ensembl.org/info/about/code_licence.html for details.
+
+=head1 AUTHOR
+
+Patrick Meidl <meidl@ebi.ac.uk>, Ensembl core API team
+
+=head1 CONTACT
+
+Please post comments/questions to the Ensembl development list
+<ensembl-dev@ebi.ac.uk>
+
+=cut
+
+
+use strict;
+use warnings;
+no warnings 'uninitialized';
+
+use Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper;
+our @ISA = qw(Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper);
+
+use Bio::EnsEMBL::Utils::Exception qw(throw warning);
+use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
+
+
+#
+# basic mapping
+#
+sub init_basic {
+  my $self = shift;
+  my $num = shift;
+  my $gsb = shift;
+  my $mappings = shift;
+  my $gene_scores = shift;
+
+  $self->logger->info("Basic gene mapping...\n", 0, 'stamped');
+
+  $mappings = $self->basic_mapping($gene_scores, "gene_mappings$num");
+  $num++;
+  
+  my $new_scores = $gsb->create_shrinked_matrix($gene_scores, $mappings,
+    "gene_matrix$num");
+
+  return ($new_scores, $mappings);
+}
+
+
+#
+# build the synteny from unambiguous mappings
+#
+sub synteny {
+  my $self = shift;
+  my $num = shift;
+  my $gsb = shift;
+  my $mappings = shift;
+  my $gene_scores = shift;
+
+  unless ($gene_scores->loaded) {
+    $self->logger->info("Synteny Framework building...\n", 0, 'stamped');
+    my $dump_path = path_append($self->conf->param('basedir'), 'mapping');
+    my $sf = Bio::EnsEMBL::IdMapping::SyntenyFramework->new(
+      -DUMP_PATH    => $dump_path,
+      -CACHE_FILE   => 'synteny_framework.ser',
+      -LOGGER       => $self->logger,
+      -CONF         => $self->conf,
+      -CACHE        => $self->cache,
+    );
+    $sf->build_synteny($mappings);
+
+    # use it to rescore the genes
+    $self->logger->info("\nSynteny assisted mapping...\n", 0, 'stamped');
+    $gene_scores = $sf->rescore_gene_matrix_lsf($gene_scores);
+
+    # checkpoint
+    $gene_scores->write_to_file;
+  }
+
+  my $new_mappings = $self->basic_mapping($gene_scores, "gene_mappings$num");
+  $num++;
+  my $new_scores = $gsb->create_shrinked_matrix($gene_scores, $new_mappings,
+    "gene_matrix$num");
+
+  return ($new_scores, $new_mappings); 
+}
+
+
+#
+# rescore with simple scoring function and try again
+#
+sub best_transcript {
+  my $self = shift;
+  my $num = shift;
+  my $gsb = shift;
+  my $mappings = shift;
+  my $gene_scores = shift;
+  my $transcript_scores = shift;
+
+  $self->logger->info("Retry with simple best transcript score...\n", 0, 'stamped');
+  
+  unless ($gene_scores->loaded) {
+    $gsb->simple_gene_rescore($gene_scores, $transcript_scores);
+    $gene_scores->write_to_file;
+  }
+  
+  my $new_mappings = $self->basic_mapping($gene_scores, "gene_mappings$num");
+  $num++;
+  my $new_scores = $gsb->create_shrinked_matrix($gene_scores, $new_mappings,
+    "gene_matrix$num");
+
+  return ($new_scores, $new_mappings); 
+}
+
+
+#
+# rescore by penalising scores between genes with different biotypes  
+#
+sub biotype {
+  my $self = shift;
+  my $num = shift;
+  my $gsb = shift;
+  my $mappings = shift;
+  my $gene_scores = shift;
+
+  $self->logger->info("Retry with biotype disambiguation...\n", 0, 'stamped');
+  
+  unless ($gene_scores->loaded) {
+    $gsb->biotype_gene_rescore($gene_scores);
+    $gene_scores->write_to_file;
+  }
+
+  my $new_mappings = $self->basic_mapping($gene_scores, "gene_mappings$num");
+  $num++;
+  my $new_scores = $gsb->create_shrinked_matrix($gene_scores, $new_mappings,
+    "gene_matrix$num");
+
+  return ($new_scores, $new_mappings); 
+}
+
+
+#
+# selectively rescore by penalising scores between genes with different
+# internalIDs  
+#
+sub internal_id {
+  my $self = shift;
+  my $num = shift;
+  my $gsb = shift;
+  my $mappings = shift;
+  my $gene_scores = shift;
+
+  $self->logger->info("Retry with internalID disambiguation...\n", 0, 'stamped');
+  
+  unless ($gene_scores->loaded) {
+    $gsb->internal_id_rescore($gene_scores);
+    $gene_scores->write_to_file;
+  }
+
+  my $new_mappings = $self->basic_mapping($gene_scores, "gene_mappings$num");
+  $num++;
+  my $new_scores = $gsb->create_shrinked_matrix($gene_scores, $new_mappings,
+    "gene_matrix$num");
+
+  return ($new_scores, $new_mappings); 
+}
+
+
+1;
+
diff --git a/modules/Bio/EnsEMBL/IdMapping/InternalIdMapper/EnsemblTranscriptGeneric.pm b/modules/Bio/EnsEMBL/IdMapping/InternalIdMapper/EnsemblTranscriptGeneric.pm
new file mode 100644
index 0000000000..8c7ad9ab33
--- /dev/null
+++ b/modules/Bio/EnsEMBL/IdMapping/InternalIdMapper/EnsemblTranscriptGeneric.pm
@@ -0,0 +1,276 @@
+package Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric;
+
+=head1 NAME
+
+
+=head1 SYNOPSIS
+
+
+=head1 DESCRIPTION
+
+
+=head1 METHODS
+
+
+=head1 LICENCE
+
+This code is distributed under an Apache style licence. Please see
+http:#www.ensembl.org/info/about/code_licence.html for details.
+
+=head1 AUTHOR
+
+Patrick Meidl <meidl@ebi.ac.uk>, Ensembl core API team
+
+=head1 CONTACT
+
+Please post comments/questions to the Ensembl development list
+<ensembl-dev@ebi.ac.uk>
+
+=cut
+
+
+use strict;
+use warnings;
+no warnings 'uninitialized';
+
+use Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper;
+our @ISA = qw(Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper);
+
+use Bio::EnsEMBL::Utils::Exception qw(throw warning);
+use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
+
+  
+#
+# basic mapping
+#
+sub init_basic {
+  my $self = shift;
+  my $num = shift;
+  my $tsb = shift;
+  my $mappings = shift;
+  my $transcript_scores = shift;
+
+  $self->logger->info("Basic transcript mapping...\n", 0, 'stamped');
+
+  $mappings = $self->basic_mapping($transcript_scores,
+    "transcript_mappings$num");
+  $num++;
+  my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
+    "transcript_matrix$num");
+
+  return ($new_scores, $mappings);
+}
+
+
+#
+# handle cases with exact match but different translation
+#
+sub non_exact_translation {
+  my $self = shift;
+  my $num = shift;
+  my $tsb = shift;
+  my $mappings = shift;
+  my $transcript_scores = shift;
+
+  $self->logger->info("Exact Transcript non-exact Translation...\n", 0, 'stamped');
+  
+  unless ($transcript_scores->loaded) {
+    $tsb->different_translation_rescore($transcript_scores);
+    $transcript_scores->write_to_file;
+  }
+  
+  $mappings = $self->basic_mapping($transcript_scores,
+    "transcript_mappings$num");
+  $num++;
+  my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
+    "transcript_matrix$num");
+
+  return ($new_scores, $mappings);
+}
+
+
+#
+# reduce score for mappings of transcripts which do not belong to mapped
+# genes
+#
+sub mapped_gene {
+  my $self = shift;
+  my $num = shift;
+  my $tsb = shift;
+  my $mappings = shift;
+  my $transcript_scores = shift;
+  my $gene_mappings = shift;
+
+  $self->logger->info("Transcripts in mapped genes...\n", 0, 'stamped');
+  
+  unless ($transcript_scores->loaded) {
+  $tsb->non_mapped_gene_rescore($transcript_scores, $gene_mappings);
+    $transcript_scores->write_to_file;
+  }
+  
+  $mappings = $self->basic_mapping($transcript_scores,
+    "transcript_mappings$num");
+  $num++;
+  my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
+    "transcript_matrix$num");
+
+  return ($new_scores, $mappings);
+}
+
+
+#
+# selectively rescore by penalising scores between transcripts with
+# different internalIDs  
+#
+sub internal_id {
+  my $self = shift;
+  my $num = shift;
+  my $tsb = shift;
+  my $mappings = shift;
+  my $transcript_scores = shift;
+
+  $self->logger->info("Retry with internalID disambiguation...\n", 0, 'stamped');
+  
+  unless ($transcript_scores->loaded) {
+    $tsb->internal_id_rescore($transcript_scores);
+    $transcript_scores->write_to_file;
+  }
+
+  $mappings = $self->basic_mapping($transcript_scores,
+    "transcript_mappings$num");
+  $num++;
+  my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
+    "transcript_matrix$num");
+
+  return ($new_scores, $mappings);
+}
+
+
+#
+# handle ambiguities between transcripts in single genes
+#
+sub single_gene {
+  my $self = shift;
+  my $num = shift;
+  my $tsb = shift;
+  my $mappings = shift;
+  my $transcript_scores = shift;
+
+  $self->logger->info("Transcripts in single genes...\n", 0, 'stamped');
+  
+  unless ($transcript_scores->loaded) {
+    $transcript_scores->write_to_file;
+  }
+  
+  $mappings = $self->same_gene_transcript_mapping($transcript_scores,
+    "transcript_mappings$num");
+  $num++;
+  my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
+    "transcript_matrix$num");
+
+  return ($new_scores, $mappings);
+}
+
+
+#
+# modified basic mapper that maps transcripts that are ambiguous within one gene
+#
+sub same_gene_transcript_mapping {
+  my $self = shift;
+  my $matrix = shift;
+  my $mapping_name = shift;
+
+  # argument checks
+  unless ($matrix and
+          $matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
+    throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
+  }
+
+  throw('Need a name for serialising the mapping.') unless ($mapping_name);
+
+  # Create a new MappingList object. Specify AUTO_LOAD to load serialised
+  # existing mappings if found
+  my $dump_path = path_append($self->conf->param('basedir'), 'mapping');
+  
+  my $mappings = Bio::EnsEMBL::IdMapping::MappingList->new(
+    -DUMP_PATH   => $dump_path,
+    -CACHE_FILE  => "${mapping_name}.ser",
+    -AUTO_LOAD   => 1,
+  );
+  
+  # checkpoint test: return a previously stored MappingList
+  if ($mappings->loaded) {
+    $self->logger->info("Read existing mappings from ${mapping_name}.ser.\n");
+    return $mappings;
+  }
+
+  my $sources_done = {};
+  my $targets_done = {};
+
+  # sort scoring matrix entries by descending score
+  my @sorted_entries = sort { $b->score <=> $a->score ||
+    $a->source <=> $b->source || $a->target <=> $b->target }
+      @{ $matrix->get_all_Entries };
+
+  while (my $entry = shift(@sorted_entries)) {
+    
+    # $self->logger->debug("\nxxx4 ".$entry->to_string." ");
+
+    # we already found a mapping for either source or target yet
+    next if ($sources_done->{$entry->source} or
+             $targets_done->{$entry->target});
+
+    #$self->logger->debug('d');
+
+    my $other_sources = [];
+    my $other_targets = [];
+    my %source_genes = ();
+    my %target_genes = ();
+
+    if ($self->ambiguous_mapping($entry, $matrix, $other_sources, $other_targets)) {
+      #$self->logger->debug('a');
+
+      $other_sources = $self->filter_sources($other_sources, $sources_done);
+      $other_targets = $self->filter_targets($other_targets, $targets_done);
+
+      $source_genes{$self->cache->get_by_key('genes_by_transcript_id',
+        'source', $entry->source)} = 1;
+      $target_genes{$self->cache->get_by_key('genes_by_transcript_id',
+        'target', $entry->target)} = 1;
+
+      foreach my $other_source (@{ $other_sources }) {
+        $source_genes{$self->cache->get_by_key('genes_by_transcript_id',
+          'source', $other_source)} = 1;
+      }
+        
+      foreach my $other_target (@{ $other_targets }) {
+        $target_genes{$self->cache->get_by_key('genes_by_transcript_id',
+          'target', $other_target)} = 1;
+      }
+      
+      # only add mapping if only one source and target gene involved
+      if (scalar(keys %source_genes) == 1 and scalar(keys %target_genes) == 1) {
+        #$self->logger->debug('O');
+        $mappings->add_Entry($entry);
+      }
+
+    } else {
+      #$self->logger->debug('A');
+
+      # this is the best mapping, add it
+      $mappings->add_Entry($entry);
+    }
+
+    $sources_done->{$entry->source} = 1;
+    $targets_done->{$entry->target} = 1;
+  }
+
+  # create checkpoint
+  $mappings->write_to_file;
+
+  return $mappings;
+}
+
+
+1;
+
-- 
GitLab