From 46edfe0b26044ce45e99787f0c8546d4989997f6 Mon Sep 17 00:00:00 2001
From: Graham McVicker <mcvicker@sanger.ac.uk>
Date: Fri, 9 Jan 2004 17:16:42 +0000
Subject: [PATCH] removed old perl ID mapping (v. 2) scripts

---
 misc-scripts/idmap2/MapGenes.pm        | 208 -----------
 misc-scripts/idmap2/MapTranscripts.pm  | 247 ------------
 misc-scripts/idmap2/README             |  81 ----
 misc-scripts/idmap2/SQL.pm             | 193 ----------
 misc-scripts/idmap2/apply_mappings.pl  | 149 --------
 misc-scripts/idmap2/double_exon_fix.pl | 157 --------
 misc-scripts/idmap2/mapping.pl         | 497 -------------------------
 7 files changed, 1532 deletions(-)
 delete mode 100644 misc-scripts/idmap2/MapGenes.pm
 delete mode 100644 misc-scripts/idmap2/MapTranscripts.pm
 delete mode 100644 misc-scripts/idmap2/README
 delete mode 100644 misc-scripts/idmap2/SQL.pm
 delete mode 100644 misc-scripts/idmap2/apply_mappings.pl
 delete mode 100644 misc-scripts/idmap2/double_exon_fix.pl
 delete mode 100644 misc-scripts/idmap2/mapping.pl

diff --git a/misc-scripts/idmap2/MapGenes.pm b/misc-scripts/idmap2/MapGenes.pm
deleted file mode 100644
index 10cf0b08b4..0000000000
--- a/misc-scripts/idmap2/MapGenes.pm
+++ /dev/null
@@ -1,208 +0,0 @@
-# here we expect a list of exons from old and new database
-# each exon contains exon_stable to mark it as mapped 
-
-# each exon contains the internal gene_id of the gene it is 
-# part of. This module tries to map the gene_stable ids over to
-# the new exons.
-
-
-package MapGenes;
-use strict;
-
-
-# starting with one exon, build_exon_group tries to build an 
-# 'envelope' around it. That is a set of genes and exons where neither
-# of them misses parts. You call build_exon_group as often as needed
-# until it doesnt produce new exons any more
-
-my ( %newGeneHash, %oldGeneHash, 
-     %oldExonHash, %newExonHash,
-     @oldPruneExon, @newPruneExon );
-my $mappedGene;
-
-# go through old Exons
-# skip if gene is mapped
-# build envelope if not
-sub map_genes {
-  my ( $oldExonInfo, $newExonInfo )  = @_;
-
-  my %oldGenesMapped; 
-  print STDERR ( "Start gene mapping ",scalar( localtime() ),"\n");
-  init_lookup_tables( $oldExonInfo, $newExonInfo );
-
-  for my $oExon ( @oldPruneExon ) {
-    if( ! exists $oExon->{'gene_mapped'} ) {
-      my @map_exons = build_envelope( $oExon, \@oldPruneExon, \@newPruneExon );
-      map_envelope( \@map_exons );
-    }
-  }
-  print STDERR ( "End gene mapping ",scalar( localtime() ),"\n");
-  print STDERR ( "Mapped Genes ",$mappedGene,"\n" );
-}
-
-
-
-sub map_envelope {
-  my $exon_stables = shift;
-  
-  my %gene_map_hash;
-  my $common_exons = 0;
-
-  for my $exon_stable_id ( @$exon_stables ) {
-    if( exists $oldExonHash{ $exon_stable_id } &&
-	exists $newExonHash{ $exon_stable_id } ) {
-      my ( $oEx, $nEx, $hashkey );
-      $oEx = $oldExonHash{ $exon_stable_id };
-      $nEx = $newExonHash{ $exon_stable_id };
-      $common_exons++;
-
-      $hashkey = $oEx->{'gene_id'}."-".$nEx->{'gene_id'};
-      if( exists $gene_map_hash{$hashkey} ) {
-	$gene_map_hash{$hashkey}->[2] += 
-	  $nEx->{'exon_end'}-$nEx->{'exon_start'}+1;
-      } else {
-	$gene_map_hash{$hashkey} = [ $oEx->{'gene_id'}, $nEx->{'gene_id'},
-				     $nEx->{'exon_end'}-$nEx->{'exon_start'}+1,
-				     $oEx->{'gene_stable'}];
-							       
-      }
-    }
-  }
-
-  my @sortedMappings = sort { $b->[2] <=> $a->[2] } values %gene_map_hash;
-  my ( %oldGenesMapped, %newGenesMapped );
-
-  for my $mapRecord ( @sortedMappings ) {
-    if( ! exists $oldGenesMapped{$mapRecord->[0]} &&
-	! exists $newGenesMapped{$mapRecord->[1]} ) {
-      $oldGenesMapped{$mapRecord->[0]} = $mapRecord->[1];
-      $newGenesMapped{$mapRecord->[1]} = $mapRecord->[0];
-      print ( "Gene mapped:\t",$mapRecord->[0], "\t",$mapRecord->[1],
-	      "\t",$mapRecord->[3],".\n" );
-      $mappedGene++;
-    }
-  }
-
-  for my $exon_stable_id ( @$exon_stables ) {
-    if( exists $oldExonHash{ $exon_stable_id }) {
-      $oldExonHash{ $exon_stable_id }{'gene_mapped'} = 1;
-    }
-    if( exists $newExonHash{ $exon_stable_id }) {
-      $newExonHash{ $exon_stable_id }{'gene_mapped'} = 1;
-    }
-  }
-  
-}
-
-
-sub init_lookup_tables {
-  my ( $oldExonInfo, $newExonInfo ) = @_;
-  
-  # init the lookup tables
-  for my $exon ( @$newExonInfo ) {
-    if( exists $exon->{'exon_stable'} ) {
-      push( @{$newGeneHash{$exon->{'gene_id'}}}, $exon );
-      $newExonHash{ $exon->{'exon_stable'} } = $exon;
-      push( @newPruneExon, $exon );
-    }
-  }
-
-  for my $exon ( @$oldExonInfo ) {
-    # should exist for all of them ...
-    if( exists $newExonHash{ $exon->{'exon_stable'}} ) {
-      push( @{$oldGeneHash{$exon->{'gene_id'}}}, $exon );
-      $oldExonHash{ $exon->{'exon_stable'} } = $exon;
-      push( @oldPruneExon, $exon );
-      my $newExon = $newExonHash{ $exon->{'exon_stable'}};
-      # print "vvvv Mapping vvvv\n";
-      # ::print_exon( $exon );
-      # ::print_exon( $newExon );
-      # print "^^^^ Mapping ^^^^\n";
-    } 
-  }
-  
-
-#  print "Lookup initialized: ",scalar( localtime() ),"\n";
-#  print ( "old exons:",scalar( @oldPruneExon ),"\n" );
-#  print ( "new exons:",scalar( @newPruneExon ),"\n" );
-}
-
-
-# given one old exon, build envelope around it
-# return set of stable ids which cover the 
-# given exon.
-
-# That means: All old and new genes of these exons have all
-# their stable_id exons in this set as well.
-sub build_envelope {
-  my ( $exon, $oExonInfo, $nExonInfo ) = @_;
-
-  my $oGeneGroup = {};
-  my $nGeneGroup = {};
-  my $exonSet = {};
-  my $newExons = [];
-  push( @$newExons, $exon->{'exon_stable'} );
-
-  $newExons = build_exon_group( $oGeneGroup, $exonSet, $newExons,
-				\%oldGeneHash, \%oldExonHash );
-
-  while( scalar( @$newExons )) {
-    $newExons = build_exon_group( $nGeneGroup, $exonSet, $newExons,
-				  \%newGeneHash, \%newExonHash );
-    if( scalar( @$newExons )) {
-      $newExons = build_exon_group( $oGeneGroup, $exonSet, $newExons,
-				    \%oldGeneHash, \%oldExonHash );
-    }
-  }
-  
-  # print ( "Exon envelope: ", join( " ", keys %$exonSet),"\n" );
-  # print ( "Old Genes:", join( " ", keys %$oGeneGroup ) ,"\n" );
-  # print ( "New Genes:", join( " ", keys %$nGeneGroup ) ,"\n\n" );
-  return keys %$exonSet;
-}
-
-
-# expand newExonSet that it covers all exons of its genes
-# wiuth respect to one exon/gene set.
-sub build_exon_group {
-  my ( $geneGroup, $exonSet, $newExonSet, 
-       $geneLookup, $exonLookup ) = @_;
-  # print STDERR ( "Build group ", join( " ", @$newExonSet ),"\n" );
-  my (%newGenes, @newExons, $exon);
-
-  # get Genes from new Exons
-  # get Exons from this genes
-  
-  for my $exon_stable ( @$newExonSet ) {
-    $exon = $exonLookup->{$exon_stable};
-    my $geneId;
-
-    $geneId = $exon->{'gene_id'};
-    if( ! defined $geneId ) {
-      print STDERR "undefined ";
-      &::print_exon( $exon );
-    }
-    if( !exists $geneGroup->{$geneId} ) {
-      $newGenes{$geneId} = 1;
-      $geneGroup->{$geneId} = 1;
-    }
-  }
-      
-
-  for my $geneId ( keys %newGenes ) {
-    my $exons = $geneLookup->{$geneId};
-    for my $exon ( @$exons ) {
-      if( ! exists( $exonSet->{$exon->{'exon_stable'}} ) ) {
-	$exonSet->{$exon->{'exon_stable'}} = 1;
-	push( @newExons, $exon->{'exon_stable'} );
-#	print STDERR "Pushed Exon to Envelope\n";
-      }
-    }
-  }
-  # print STDERR ( "Exon envelope: ", join( " ", @newExons),"\n\n" );
-
-  return \@newExons;
-}
-
-
-1;
diff --git a/misc-scripts/idmap2/MapTranscripts.pm b/misc-scripts/idmap2/MapTranscripts.pm
deleted file mode 100644
index 5d33d70709..0000000000
--- a/misc-scripts/idmap2/MapTranscripts.pm
+++ /dev/null
@@ -1,247 +0,0 @@
-# here we expect a list of exons from old and new database
-# each exon contains exon_stable to mark it as mapped 
-
-# each exon contains the internal transcript_id of the transcript it is 
-# part of. This module tries to map the transcript_stable ids over to
-# the new exons.
-
-
-package MapTranscripts;
-use strict;
-
-
-# starting with one exon, build_exon_group tries to build an 
-# 'envelope' around it. That is a set of transcripts and exons where neither
-# of them misses parts. You call build_exon_group as often as needed
-# until it doesnt produce new exons any more
-
-my ( %newTranscriptHash, %oldTranscriptHash, 
-     %oldExonHash, %newExonHash );
-
-my $mappedTranscripts;
-
-
-# go through old Exons
-# skip if Transcript is mapped
-# build envelope if not
-sub map_transcripts {
-  my ( $oldExonInfo, $newExonInfo )  = @_;
-
-  my %oldGenesMapped; 
-  print STDERR ( "Start transcript mapping ",scalar( localtime() ),"\n");
-  init_lookup_tables( $oldExonInfo, $newExonInfo );
-
-  for my $oExon ( @$oldExonInfo ) {
-    if( ! exists $oExon->{'transcript_mapped'} ) {
-      my @map_exons = build_envelope( $oExon, $oldExonInfo, $newExonInfo );
-      map_envelope( \@map_exons );
-    }
-  }
-  print STDERR ( "End transcript mapping ",scalar( localtime() ),"\n");
-  print STDERR ( "Mapped Transcripts ",$mappedTranscripts,"\n" );
-}
-
-
-
-sub map_envelope {
-  my $exon_stables = shift;
-  
-  my %transcript_map_hash;
-  my $common_exons = 0;
-  my ( @oldExList, @newExList );
-
-  for my $exon_stable_id ( @$exon_stables ) {
-    if( exists $oldExonHash{ $exon_stable_id } &&
-	exists $newExonHash{ $exon_stable_id } ) {
-      my ( $oEx, $nEx, $hashkey );
-      @oldExList = @{$oldExonHash{ $exon_stable_id }};
-      @newExList = @{$newExonHash{ $exon_stable_id }};
-      for my $oEx ( @oldExList ) {
-	for my $nEx ( @newExList ) {
-
-	  $hashkey = $oEx->{'transcript_id'}."-".
-	    $nEx->{'transcript_id'};
-	  if( exists $transcript_map_hash{$hashkey} ) {
-	    $transcript_map_hash{$hashkey}->[2] += 
-	      $nEx->{'exon_end'}-$nEx->{'exon_start'}+1;
-	  } else {
-	    $transcript_map_hash{$hashkey} = 
-	      [ $oEx->{'transcript_id'}, $nEx->{'transcript_id'},
-		$nEx->{'exon_end'}-$nEx->{'exon_start'}+1,
-		$oEx->{'transcript_stable'}];
-	    
-	  }
-	}
-      }
-    }
-  }
-
-  my @sortedMappings = sort { $b->[2] <=> $a->[2] } values %transcript_map_hash;
-  my ( %oldTranscriptsMapped, %newTranscriptsMapped );
-
-  for my $mapRecord ( @sortedMappings ) {
-    if( ! exists $oldTranscriptsMapped{$mapRecord->[0]} &&
-	! exists $newTranscriptsMapped{$mapRecord->[1]} ) {
-      $oldTranscriptsMapped{$mapRecord->[0]} = $mapRecord->[1];
-      $newTranscriptsMapped{$mapRecord->[1]} = $mapRecord->[0];
-      print ( "Transcript mapped:\t",$mapRecord->[0], "\t",$mapRecord->[1],
-	      "\t",$mapRecord->[3],"\n" );
-      $mappedTranscripts++;
-    }
-  }
-  
-  for my $exon_stable_id ( @$exon_stables ) {
-    if( exists $oldExonHash{ $exon_stable_id }) {
-      for my $exon ( @{$oldExonHash{ $exon_stable_id }} ) {
-	$exon->{'transcript_mapped'} = 1;
-      }
-    }
-    if( exists $newExonHash{ $exon_stable_id }) {
-      for my $exon ( @{$newExonHash{ $exon_stable_id }} ) {
-	$exon->{'transcript_mapped'} = 1;
-      }
-    }
-  }
-
-}
-
-
-sub init_lookup_tables {
-  my ( $oldExonInfo, $newExonInfo ) = @_;
-
-  # problem: not all exons which are mapped ar emarked as such
-  # that happened if they are in more than one transcript
-  my %exHash= ();
-
-
-  for my $exon ( @$newExonInfo ) {
-    if( exists $exon->{'mapped'} ) {
-      $exHash{$exon->{'exon_id'}." ".$exon->{'sticky_rank'}." mapped"} =
-	$exon->{'mapped'};
-      $exHash{$exon->{'exon_id'}." ".$exon->{'sticky_rank'}." stable"} =
-	$exon->{'exon_stable'};
-    }
-  }
-
-  for my $exon ( @$newExonInfo ) {
-    if( ! exists $exon->{'mapped'} && 
-	( exists $exHash{$exon->{'exon_id'}." ".$exon->{'sticky_rank'}." mapped"} )) {
-      $exon->{'mapped'}= $exHash{$exon->{'exon_id'}." ".$exon->{'sticky_rank'}." mapped"};
-      $exon->{'exon_stable'}= $exHash{$exon->{'exon_id'}." ".$exon->{'sticky_rank'}." stable"};
-    }
-    push( @{$newTranscriptHash{$exon->{'transcript_id'}}}, $exon );
-    push( @{$newExonHash{ $exon->{'exon_stable'} }}, $exon );
-  }
-
-  %exHash = ();
-
-  for my $exon ( @$oldExonInfo ) {
-    if( exists $exon->{'mapped'} ) {
-      $exHash{$exon->{'exon_id'}." ".$exon->{'sticky_rank'}} =
-	$exon->{'mapped'};
-    }
-  }
-
-  for my $exon ( @$oldExonInfo ) {
-    if( ! exists $exon->{'mapped'} && 
-	( exists $exHash{$exon->{'exon_id'}." ".$exon->{'sticky_rank'}} )) {
-      $exon->{'mapped'}= $exHash{$exon->{'exon_id'}." ".$exon->{'sticky_rank'}};
-    }
-    if( exists $exon->{'mapped'} ) {
-      push( @{$oldTranscriptHash{$exon->{'transcript_id'}}}, $exon );
-      push( @{$oldExonHash{ $exon->{'exon_stable'} }}, $exon );
-    }
-  }
-  # remove all exons from $oldExonInfo which dont map
-  my $iter = 0;
-  while( $iter < scalar @$oldExonInfo ) {
-    my $exon = $oldExonInfo->[$iter];
-    if( ! exists $exon->{'mapped'} ) {
-      splice( @$oldExonInfo, $iter ,1 );
-    } else {
-      $iter++;
-    }
-  }
-}
-
-
-# given one old exon, build envelope around it
-# return set of stable ids which cover the 
-# given exon.
-
-# That means: All old and new genes of these exons have all
-# their stable_id exons in this set as well.
-sub build_envelope {
-  my ( $exon, $oExonInfo, $nExonInfo ) = @_;
-
-  my $oTranscriptGroup = {};
-  my $nTranscriptGroup = {};
-  my $exonSet = {};
-  my $newExons = [];
-
-  push( @$newExons, $exon->{'exon_stable'} );
-
-  $newExons = build_exon_group( $oTranscriptGroup, $exonSet, $newExons,
-				\%oldTranscriptHash, \%oldExonHash );
-
-  while( scalar( @$newExons )) {
-    $newExons = build_exon_group( $nTranscriptGroup, $exonSet, $newExons,
-				  \%newTranscriptHash, \%newExonHash );
-    if( scalar( @$newExons )) {
-      $newExons = build_exon_group( $oTranscriptGroup, $exonSet, $newExons,
-				    \%oldTranscriptHash, \%oldExonHash );
-    }
-  }
-  
-#  print ( "Exon envelope: ", join( " ", keys %$exonSet),"\n" );
-#  print ( "Old Transcripts:", join( " ", keys %$oTranscriptGroup ) ,"\n" );
-#  print ( "New Transcripts:", join( " ", keys %$nTranscriptGroup ) ,"\n\n" );
-  return keys %$exonSet;
-}
-
-
-# expand newExonSet that it covers all exons of its transcripts
-# with respect to one exon/transcript set.
-sub build_exon_group {
-  my ( $transcriptGroup, $exonSet, $newExonSet, 
-       $transcriptLookup, $exonLookup ) = @_;
-  # print STDERR ( "Build group ", join( " ", @$newExonSet ),"\n" );
-  my (%newTranscripts, @newExons, $exon);
-
-  # get Transcrips from new Exons
-  # get Exons from this Transcripts
-  
-  for my $exon_stable ( @$newExonSet ) {
-    for my $exon ( @{$exonLookup->{$exon_stable}} ) {
-      my $transcriptId;
-
-      $transcriptId = $exon->{'transcript_id'};
-      if( ! defined $transcriptId ) {
-	print STDERR "undefined ";
-	&::print_exon( $exon );
-      }
-      if( !exists $transcriptGroup->{$transcriptId} ) {
-	$newTranscripts{$transcriptId} = 1;
-	$transcriptGroup->{$transcriptId} = 1;
-      }
-    }
-  }
-      
-
-  for my $transcriptId ( keys %newTranscripts ) {
-    my $exons = $transcriptLookup->{$transcriptId};
-    for my $exon ( @$exons ) {
-      if( ! exists( $exonSet->{$exon->{'exon_stable'}} ) ) {
-	$exonSet->{$exon->{'exon_stable'}} = 1;
-	push( @newExons, $exon->{'exon_stable'} );
-#	print STDERR "Pushed Exon to Envelope\n";
-      }
-    }
-  }
-  # print STDERR ( "Exon envelope: ", join( " ", @newExons),"\n\n" );
-
-  return \@newExons;
-}
-
-
-1;
diff --git a/misc-scripts/idmap2/README b/misc-scripts/idmap2/README
deleted file mode 100644
index 6dd7b1eddf..0000000000
--- a/misc-scripts/idmap2/README
+++ /dev/null
@@ -1,81 +0,0 @@
-This document shell describe how id mapping in EnsEMBL is done. 
-
-
-All idmapping will be based on mapping exons from an old release of our
-database to the next. It is assumed that the new and the old release
-are available in the schema of the new release and the code of the new
-release is used throughout. ( There will not be a DBOLD directory
-involved with the idmapping code. )
-
-1. Exon mapping
-
-The basic proceeding is, to try to map all old exons over to the new
-golden path.
-
-
-1.1 Easy exon mapping
-
-The first exons ids mapped to the new database are the ones which keep
-their position on the same contig. (No contig version update).
-
-Criteria: any overlap and sensitive handling of splits and merges.
-(easier: 50% min overlap gets it )
-
-1.2 New contig version
-
-Some Exons might sit on contigs which are still on the golden path in
-the new release, but in a different version. Does any of the new Exons
-on this contigs look like the old Exon (exact string match or crossmatch)
-
-Criteria: Crossmatch and 95% sequence identity.
-Problem: more than one exon on the same clone is almost identical.
-In this case use best match on clone..
-
-
-1.25 Redundant clone information
-
-Map exons on redundant clones to the new clone covering the same
-area.
-
-1.3 Sister Exons
-
-Do we have unmapped Exons in old Genes which have sister Exons which
-are mapped? Search for Exons in similar position on the new golden
-path. 
-
-Remark on sequence comparison: Use a window around exon sequence to map
-intronic sequence as well. Window 50bp around the exons. 
-
-
-1.6 So far unmapped.
-
-Can we find a >95% sequence match somewhere in the genome?
-If not consider this exon not to be mappable.
-
-
-
-2. Transcript, Gene mapping
-
-This mapping happens on the basis of the mapped exons. We screen
-through the old gene, transcript objects and look if the majority of
-their exons were mapped to a corresponding new object. Then the id is
-transferred to that object. 
-
-In case of a merger, split or general mix of old objects, we transfer
-ids for the biggest number of exons translated from one old to one new
-first, then for the second biggest and so on.
-
-
-
-
-
-
-
-
-
- 
-
-
-
-
-
diff --git a/misc-scripts/idmap2/SQL.pm b/misc-scripts/idmap2/SQL.pm
deleted file mode 100644
index 6488ee936f..0000000000
--- a/misc-scripts/idmap2/SQL.pm
+++ /dev/null
@@ -1,193 +0,0 @@
-# This module encapsulates the SQL needed for the idmapping
-# Author: Arne Stabenau 3.10.2001
-
-# ( why not use EnsEMBL here? Currently to slow?? This piece of software is 
-#  going to be replaced very soon by a java version )
-
-use DBI;
-
-package SQL;
-
-
-
-# takes a database handle and produces all old exon information needed
-
-sub old_orig_exon_information {
-  my $dbh  = shift;
-  my %res;
-
-  my $sth = $dbh->prepare( "
-    SELECT exon_stable, transcript_stable, 
-           gene_stable, exon_id, gene_id, transcript_id, 
-           clone_id, clone_version, sticky_rank, 
-           contig_offset, contig_length,
-           exon_start, exon_end, exon_strand,
-           chr_name, chr_start, chr_end, 
-           raw_start, raw_end, raw_ori
-     FROM  exon_temp 
-    ORDER by clone_id
- " );
-#    LIMIT 5000" );
-  $sth->execute();
-
-  my $arrRef = _store_in_arrayref( $sth );
-  for my $exon ( @$arrRef ) {
-    calc_chr_coord( $exon );
-  }
-
-  return $arrRef;;
-}
-
-sub old_target_exon_information {
-  my $dbh = shift;
-  
-  my $sth = $dbh->prepare( "
-    SELECT exon_id, transcript_id, gene_id,
-           clone_id, clone_version, sticky_rank,
-           contig_offset, contig_length,
-           exon_start, exon_end, exon_strand,
-           chr_name, chr_start, chr_end, 
-           raw_start, raw_end, raw_ori
-     FROM exon_temp
-    ORDER by clone_id
-  " );
-#   LIMIT 5000");
-  $sth->execute();
-  my $arrRef = _store_in_arrayref( $sth );
-  for my $exon ( @$arrRef ) {
-    calc_chr_coord( $exon );
-  }
-  return $arrRef;
-}
-
-
-
-sub orig_exon_information {
-  my $dbh = shift;
-  my %res;
-#  $dbh->do( "drop table exon_temp" );
-
-#    CREATE TABLE exon_temp
-  my $sth = $dbh->prepare( "
-    SELECT esi.stable_id as exon_stable, tsi.stable_id as transcript_stable, 
-           gsi.stable_id as gene_stable, e.exon_id as exon_id, t.gene_id as gene_id,
-           t.transcript_id as transcript_id,
-           cl.embl_id as clone_id, cl.embl_version as clone_version, 
-           c.offset as contig_offset, c.length as contig_length,
-           e.seq_start as exon_start, e.seq_end as exon_end, e.strand as exon_strand,
-           e.sticky_rank as sticky_rank,
-           sgp.chr_name as chr_name, sgp.chr_start as chr_start, sgp.chr_end as chr_end, 
-           sgp.raw_start as raw_start, sgp.raw_end as raw_end, sgp.raw_ori as raw_ori
-      FROM exon e, exon_transcript et, exon_stable_id esi, contig c, clone cl,
-           static_golden_path sgp, transcript t, gene_stable_id gsi, 
-           transcript_stable_id tsi
-     WHERE e.exon_id = et.exon_id
-       AND esi.exon_id = e.exon_id
-       AND et.transcript_id = t.transcript_id
-       AND tsi.transcript_id = et.transcript_id
-       AND t.gene_id = gsi.gene_id
-       AND c.internal_id = e.contig_id
-       AND cl.internal_id = c.clone
-       AND sgp.raw_id = c.internal_id
-       AND e.sticky_rank = 1
-     ORDER by clone_id, clone_version,contig_offset, exon_strand
-" );
-  $sth->execute();
-#     ORDER by gene_stable, transcript_stable, exon_stable, e.sticky_rank
-
-  return _store_in_arrayref( $sth );
-}
-
-sub target_exon_information {
-  my $dbh = shift;
-#  $dbh->do( "drop table exon_temp" );
-#    CREATE TABLE exon_temp
-  my $sth = $dbh->prepare( "
-    SELECT e.exon_id, t.transcript_id, t.gene_id,
-           cl.embl_id as clone_id, cl.embl_version as clone_version, 
-           c.offset as contig_offset, c.length as contig_length,
-           e.seq_start as exon_start, e.seq_end as exon_end, e.strand as exon_strand,
-           e.sticky_rank as sticky_rank,
-           sgp.chr_name as chr_name, sgp.chr_start as chr_start, sgp.chr_end as chr_end, 
-           sgp.raw_start as raw_start, sgp.raw_end as raw_end, sgp.raw_ori as raw_ori
-      FROM exon e, exon_transcript et, contig c, clone cl,
-           static_golden_path sgp, transcript t
-     WHERE e.exon_id = et.exon_id
-       AND et.transcript_id = t.transcript_id
-       AND c.internal_id = e.contig_id
-       AND cl.internal_id = c.clone
-       AND sgp.raw_id = c.internal_id
-       AND e.sticky_rank = 1
-     ORDER by clone_id, clone_version,contig_offset, exon_strand
-" );
-  
-  $sth->execute();
-
-  return _store_in_arrayref( $sth );
-}
-
-
-sub _store_in_arrayref {
-  my $sth = shift;
-  my $res = [];
-
-  my $count = 0;
-
-  while( my $hr = $sth->fetchrow_hashref() ) {
-    my %tempHash = %$hr;
-    push( @$res, \%tempHash );
-  }
-  return $res;
-}
-
-sub exon_sequence {
-  my $dbh = shift;
-  my $exon_id = shift;
-  my $exonSeq;
-
-  my $sth = $dbh->prepare( "
-    SELECT e.exon_id, substring( d.sequence, e.seq_start, e.seq_end-e.seq_start+1 ), 
-           e.seq_start as start, e.seq_end as end,
-           e.strand as strand, e.sticky_rank as rank
-      FROM exon e, contig c, dna d
-     WHERE e.contig_id = c.internal_id
-       AND c.dna = d.id
-       AND e.exon_id = ?
-     ORDER by rank
-  " );
-  $sth->execute( $exon_id );
-  
-  while( my $arrRef = $sth->fetchrow_arrayref ) {
-    my $part  = $arrRef->[1];
-    if( $arrRef->[4] != 1 ) {
-      $part = reverse( $part );
-      $part =~ tr/ACGTacgt/TGCATGCA/;
-    }
-    $exonSeq .= $part;
-  }
-
-  return $exonSeq;
-}
-
-
-sub calc_chr_coord {
-  my $ex = shift;
-  
-  my ( $chr_start, $chr_end, $chr_strand );
-  
-  $chr_strand = $ex->{'exon_strand'} * $ex->{'raw_ori'};
-  if( $ex->{'raw_ori'}  == 1 ) {
-    $chr_start = $ex->{'exon_start'} + $ex->{'chr_start'} - $ex->{'raw_start'};
-    $chr_end = $ex->{'exon_end'} + $ex->{'chr_start'} - $ex->{'raw_start'};
-  } else {
-    $chr_start = $ex->{'chr_start'} + $ex->{'raw_end'} - $ex->{'exon_end'};
-    $chr_end = $ex->{'chr_start'} + $ex->{'raw_end'} - $ex->{'exon_start'};
-  }
-
-  $ex->{'chr_start'} = $chr_start;
-  $ex->{'chr_end' } = $chr_end;
-  $ex->{'chr_strand' } = $chr_strand;
-}
-
-
-1;
diff --git a/misc-scripts/idmap2/apply_mappings.pl b/misc-scripts/idmap2/apply_mappings.pl
deleted file mode 100644
index 39d798cfc9..0000000000
--- a/misc-scripts/idmap2/apply_mappings.pl
+++ /dev/null
@@ -1,149 +0,0 @@
-# read the mapping out put file
-# have the first free id in the variables
-
-# go through new exons.
-# mapped, make a exon_stable_id row
-# not mapped, generate a new stable_id
-
-# go through genes with same technique
-
-# go through transcript
-# if mapped, retrieve the translation_stable_id
-
-use strict;
-use DBI;
-
-my $today = "2002-02-20";
-
-my $sdbh = DBI->connect( "DBI:mysql:host=ecs1d;database=homo_sapiens_core_130", "ensro", "");
-my $tdbh = DBI->connect( "DBI:mysql:host=ecs1e;database=ens_NCBI_28", "ensro", "" );
-
-my $starttime = scalar( localtime() );
-
-my $sth;
-my $resultfile = "map_130_28.txt";
-my %stable_id;
-
-for my $table ( "gene_stable_id", "exon_stable_id", 
-		"transcript_stable_id", "translation_stable_id" ) {
-  $sth = $sdbh->prepare( "select max( stable_id ) from $table" );
-  $sth->execute();
-  ( $stable_id{$table} ) = $sth->fetchrow_array();
-}
-
-# print join( " ", %stable_id ),"\n";
-# exit;
-
-
-exon_stables();
-gene_stables();
-trans_stables();
-
-
-sub exon_stables {
-  my %exonTranslate;
-
-  open( RES, $resultfile );
-  while( <RES> ) {
-    /Exon mapped:/ || next;
-    my @cols;
-    chomp;
-    @cols = split( "\t",$_ ); 
-    $exonTranslate{$cols[2]} = $cols[3];
-  }
-
-  close( RES );
-  
-  $sth = $tdbh->prepare( "select distinct(exon_id) from exon" );
-  $sth->execute;
-  open( EXON, ">exon_stable_id.txt" ) or die;
-
-  while( my ( $exonId ) = $sth->fetchrow_array() ) {
-    if( ! exists $exonTranslate{$exonId} ) {
-      $stable_id{'exon_stable_id'}++;
-      print EXON ( join( "\t",($exonId, $stable_id{'exon_stable_id'},"1\t$today\t$today")),"\n" );
-    } else {
-      print EXON ( join( "\t",($exonId, $exonTranslate{$exonId},"1\t$today\t$today")),"\n" );
-    }
-  }
-  close( EXON );
-}
-
-sub gene_stables {
-  my %geneTranslate;
-
-  open( RES, $resultfile );
-  while( <RES> ) {
-    /Gene mapped:/ || next;
-    my @cols;
-    chomp;
-    @cols = split( "\t",$_ ); 
-    $geneTranslate{$cols[2]} = $cols[3];
-  }
-
-  close( RES );
-  
-  $sth = $tdbh->prepare( "select gene_id from gene" );
-  $sth->execute;
-  
-  open( GEN, ">gene_stable_id.txt" ) or die;
-
-  while( my ( $geneId ) = $sth->fetchrow_array() ) {
-    if( ! exists $geneTranslate{$geneId} ) {
-      $stable_id{'gene_stable_id'}++;
-      print GEN ( join( "\t",($geneId, $stable_id{'gene_stable_id'},"1\t$today\t$today")),"\n");
-    } else {
-      print GEN ( join( "\t",($geneId, $geneTranslate{$geneId},"1\t$today\t$today")),"\n" );
-    }
-  }
-  close ( GEN );
-}
-
-sub trans_stables {
-  my %transcriptHash;
-  my %transcriptTranslation;
-
-  open( RES, $resultfile );
-  while( <RES> ) {
-    /Transcript mapped:/ || next;
-    my @cols;
-    chomp;
-    @cols = split( "\t",$_ ); 
-    $transcriptHash{$cols[2]} = $cols[3];
-  }
-
-  close( RES );
-  
-  $sth = $sdbh->prepare( "select tsi.stable_id, tli.stable_id 
-                            from transcript_stable_id tsi,
-                                 transcript t, translation_stable_id tli
-                           where t.transcript_id = tsi.transcript_id 
-                             and t.translation_id = tli.translation_id" );
-  $sth->execute;
-  
-  while( my $arrref = $sth->fetchrow_arrayref() ) {
-    $transcriptTranslation{$arrref->[0]} = $arrref->[1];
-  }
-
-  $sth = $tdbh->prepare( "select transcript_id, translation_id from transcript" );
-  $sth->execute();
-  
-  open( TSC, ">transcript_stable_id.txt" ) or die;
-  open( TSL, ">translation_stable_id.txt" ) or die;
-
-  while( my ( $tsc, $tsl ) = $sth->fetchrow_array() ) {
-    if( ! exists $transcriptHash{$tsc} ) {
-      $stable_id{'transcript_stable_id'}++;
-      $stable_id{'translation_stable_id'}++;
-
-      print TSC ( join( "\t",($tsc, $stable_id{'transcript_stable_id'},"1")),"\n");
-      print TSL ( join( "\t",($tsl, $stable_id{'translation_stable_id'},"1")),"\n");
-    } else {
-      my $stable_id = $transcriptHash{$tsc};
-      print TSC ( join( "\t",($tsc, $stable_id,"1")),"\n");
-      print TSL ( join( "\t",($tsl, $transcriptTranslation{$stable_id},"1")),"\n");
-    }
-  }
-  close ( TCS );
-  close ( TSL );
-}
diff --git a/misc-scripts/idmap2/double_exon_fix.pl b/misc-scripts/idmap2/double_exon_fix.pl
deleted file mode 100644
index 732a523cf2..0000000000
--- a/misc-scripts/idmap2/double_exon_fix.pl
+++ /dev/null
@@ -1,157 +0,0 @@
-# this script removes exon duplicates from the ensembl database
-# An Exon duplicate is an exon whose start end strand contig phase information
-# is the same to another one.
-
-# step one: gather all exon information
-# step two build mappings from internal ids to internal ids
-# step three change exon_id in exon_transcript 
-# remove superfluous exon records 
-# remove exon_stable_id records and record which stable_ids were
-#  included from which others.
-
-use DBI;
-use strict;
-
-my ( %exonHash, %stickies );
-my ( $rowsRead, $nrExons );
-my %exonTranslate;
-
-my $dbh = DBI->connect( "DBI:mysql:host=ecs1d;database=ens_UCSC_0801", "ensadmin", "ensembl");
-
-my $sth = $dbh->prepare( "select exon_id, contig_id, seq_start, seq_end, strand, phase, end_phase, sticky_rank from exon order by exon_id, sticky_rank desc" );
-$sth->execute();
-
-
-
-
-
-while( my $hr = $sth->fetchrow_hashref ) {
-  $rowsRead++;
-  my $hashkey = join( "\t", ( $hr->{'contig_id'}, $hr->{'seq_start'}, $hr->{'seq_end'},
-			      $hr->{'strand'}, $hr->{'phase'}, $hr->{'end_phase'} ));
-  push( @{$exonHash{$hashkey}},[ $hr->{'exon_id'}, $hr->{'sticky_rank'} ] ); 
-  if( $hr->{'sticky_rank'} > 1 ) {
-    $stickies{$hr->{'exon_id'}} = 1;
-  }
-}
-
-$nrExons = scalar( keys %exonHash );
-
-print ( "Exons in table: ", $rowsRead,"\n" );
-print ( "Nonredundant: ",$nrExons,"\n" );
-
-
-# build equivalence classes
-my @equClass;
-my %allExons;
-
-my $i = 0;
-for my $hashkey ( keys %exonHash ) {
-  $equClass[$i] = { 'position' => $hashkey,
-		    'exon_id' => undef };
-
-  for my $exon ( @{$exonHash{$hashkey}} ) {
-    $allExons{$exon->[0]}->[$exon->[1]-1] = $i; 
-  }
-  $i++;
-}
-
-# make new equclasses by combining old ones
-my %combinedEqu;
-
-for my $exonid ( keys %allExons ) {
-  if( scalar( @{$allExons{$exonid}} ) > 1 ) {
-    my $newEqKey = join( "-",@{$allExons{$exonid}} );
-    my $newEq;
-    # print STDERR "New equclass made $newEqKey\n"; 
-
-    if( ! exists $combinedEqu{$newEqKey} ) {
-      $equClass[$i] = { 'position' => $newEqKey,
-			'exon_id' => undef };
-      $combinedEqu{$newEqKey} = $i;
-      $newEq = $i;
-      $i++;
-    } else {
-      $newEq = $combinedEqu{$newEqKey};
-    }
-
-    $allExons{$exonid} = [$newEq];
-  }
-}
-
-
-# now give away exon_ids for equClasses
-for my $exonid ( keys %allExons ) {
-  my $equNo = $allExons{$exonid}->[0];
-    if( defined $equClass[$equNo]->{'exon_id'} ) {
-      $exonTranslate{$exonid} = $equClass[$equNo]->{'exon_id'}
-    } else {
-      $equClass[$equNo]->{'exon_id'} = $exonid;
-    }
-}
-
-print "Translation entries: ",scalar( keys %exonTranslate ),"\n";
-
-for my $exon ( keys %exonTranslate ) {
-  print "Map:\t",$exon,"\t",$exonTranslate{$exon},"\n";
-}
-
-# dumping_data
-
-$dbh->disconnect;
-
-
-exit;
-
-sub dumping_data {
-  # dump and filter exon, exon_transcript, exon_stable_id
-  # should do supporting feature I guess ...
-  # this is ONE OFF, 
-  $sth = $dbh->prepare( "select * from exon" );
-  $sth->execute();
-  
-  open ( EXON, ">exon.txt" ) or die;
-  
-  while( my $arr = $sth->fetchrow_arrayref() ) {
-    if( ! defined $exonTranslate{$arr->[0]} ) {
-      print EXON join( "\t", @$arr ),"\n";
-    }
-  }
-  close( EXON );
-  
-  
-  open ( EXONT, ">exon_transcript.txt" ) or die;
-  $sth = $dbh->prepare( "select * from exon_transcript" );
-  $sth->execute();
-  while( my $arr = $sth->fetchrow_arrayref() ) {
-    if( defined $exonTranslate{$arr->[0]} ) {
-      $arr->[0] = $exonTranslate{$arr->[0]};
-    }
-    print EXONT join( "\t", @$arr ),"\n";
-  }
-  close( EXONT );
-  
-  
-#  open ( EXONS, ">exon_stable_id.txt" ) or die;
-#  $sth = $dbh->prepare( "select * from exon_stable_id" );
-#  $sth->execute();
-#  while( my $arr = $sth->fetchrow_arrayref() ) {
-#    if( ! defined $exonTranslate{$arr->[0]} ) {
-#      print EXONS join( "\t", @$arr ),"\n";
-#    }
-#  }
-#  close( EXONS );
-
-  open ( SUPF, ">supporting_feature.txt" ) or die;
-  $sth = $dbh->prepare( "select * from supporting_feature" );
-  $sth->execute();
-  while( my $arr = $sth->fetchrow_arrayref() ) {
-    if( defined $exonTranslate{$arr->[0]} ) {
-      $arr->[1] = $exonTranslate{$arr->[1]};
-    }
-    print SUPF join( "\t", @$arr ),"\n";
-  }
-  close( SUPF );
-}
-
-
diff --git a/misc-scripts/idmap2/mapping.pl b/misc-scripts/idmap2/mapping.pl
deleted file mode 100644
index 6aa54d870f..0000000000
--- a/misc-scripts/idmap2/mapping.pl
+++ /dev/null
@@ -1,497 +0,0 @@
-# this script produces idmapping for genes, transcripts, translations and exons
-# It needs Database orig with stable_ids set
-# it needs Database target where the stable ids are to be set
-# it produces log output
-
-# it uses direct sql for speed. All queries are in SQL.pm
-# it uses LSF for some jobs I guess ...
-
-use DBI;
-use SQL;
-use Bio::PrimarySeq;
-
-use strict;
-use Bio::EnsEMBL::Pipeline::Runnable::CrossMatch;
-use MapGenes;
-use MapTranscripts;
-
-my $direct_maps = 0;
-my $crossmatches = 0;
-my $cloneGroups = 0;
-my $seqMatches = 0;
-my $cmmaps = 0;
-my $lasttime;
-
-my $sdbh = DBI->connect( "DBI:mysql:host=ecs1d;database=homo_sapiens_core_130", "ensro", "");
-my $tdbh = DBI->connect( "DBI:mysql:host=ecs1e;database=ens_NCBI_28", "ensro", "");
-
-my $starttime = scalar( localtime() );
-
-print STDERR "Start: ",scalar(localtime()),"\n";
-my $alloExonInfo = &SQL::orig_exon_information( $sdbh );
-my $allnExonInfo = &SQL::target_exon_information( $tdbh );
-
-# remove exon doublettes caused by multiple transcripts
-my ( %exonHash, %geneHash );
-my ( $oExonInfo, $nExonInfo, @exonInfo );
-my ( $oGenes, $nGenes );
-
-for  my $exon ( @$alloExonInfo ) {
-  if( ! exists $exonHash{ $exon->{'exon_id'}."-".$exon->{'sticky_rank'} } ) {
-    push( @{$oExonInfo}, $exon );
-    $exonHash{$exon->{'exon_id'}."-".$exon->{'sticky_rank'} } = 1;
-  }
-  if( ! exists $geneHash{$exon->{'gene_id'}} ) {
-    $geneHash{$exon->{'gene_id'}} = 1;
-  }
-}
-
-# filter out stickies
-# @{$oExonInfo} = ( grep { ! defined $exonHash{$_->{'exon_id'}."-2"} } @exonInfo );
-
-$oGenes = scalar( keys %geneHash);
-
-# @exonInfo= ();
-%geneHash = ();
-%exonHash = ();
-
-for  my $exon ( @$allnExonInfo ) {
-  if( ! exists $exonHash{ $exon->{'exon_id'}."-".$exon->{'sticky_rank'} } ) {
-    push( @{$nExonInfo},$exon );
-    $exonHash{$exon->{'exon_id'}."-".$exon->{'sticky_rank'} } = 1;
-  }
-  if( ! exists $geneHash{$exon->{'gene_id'}} ) {
-    $geneHash{$exon->{'gene_id'}} = 1;
-  }
-}
-$nGenes = scalar( keys %geneHash);
-# @{$nExonInfo} = ( grep { ! defined $exonHash{$_->{'exon_id'}."-2"} } @exonInfo );
-
-print STDERR "Finish: ",scalar(localtime()),"\n";
-
-print STDERR "OldExons: ",scalar( @$oExonInfo ),"\n";
-print STDERR "NewExons: ",scalar( @$nExonInfo ),"\n";
-print STDERR "OldGenes: $oGenes\n";
-print STDERR "NewGenes: $nGenes\n";
-
-
-direct_mapping( $oExonInfo, $nExonInfo );
-contig_version_update( $oExonInfo, $nExonInfo );
-
-MapGenes::map_genes( $oExonInfo, $nExonInfo );
-MapTranscripts::map_transcripts( $alloExonInfo, $allnExonInfo ); 
-
-$sdbh->disconnect();
-
-$tdbh->disconnect();
-
-print STDERR "Start: $starttime\n";
-print STDERR "End : ",scalar( localtime()),"\n";
-
-print STDERR "Old Exon Count: ", scalar( @$nExonInfo ), "\n";
-print STDERR "New Exon Count: ", scalar( @$nExonInfo ), "\n";
-print STDERR "Direct Mappings: ", $direct_maps,"\n";
-print STDERR "Crossmatches: $crossmatches\n";
-print STDERR "CloneGroups: $cloneGroups\n";
-print STDERR "SeqMatches: $seqMatches\n";
-print STDERR "Mappings on Crossmatch: $cmmaps\n";
-
-exit;
-
-
-open( D1, ">dump1.txt" );
-open( D2, ">dump2.txt" );
-
-
-my ( @dumpOld, @dumpNew );
-
-print STDERR "Dumping ...\n";
-
-@dumpOld = sort { ( $a->{'clone_id'} cmp $b->{'clone_id'} ) ||
-		( $a->{'exon_start'} <=> $b->{'exon_start'}) } @$oExonInfo;
-@dumpNew = sort { ( $a->{'clone_id'} cmp $b->{'clone_id'} ) ||
-		( $a->{'exon_start'} <=> $b->{'exon_start'}) } @$nExonInfo;
-
-
-*STDOUT = *D1;
-for my $ex ( @dumpOld ) {
-  print_exon( $ex );
-}
-
-close D1;
-
-*STDOUT  = *D2;
-for my $ex ( @dumpNew ) {
-  print_exon( $ex );
-}
-
-close D2;
-
-
-exit;
-
-
-# extract information (lot of data)
-# direct mappings
-# contig version update
-
-
-
-# similar as direct mapping
-# checks if there is an old exon, and a new exon on same clone but
-# updated version. Exclude clones with same version.
-
-sub contig_version_update {
-  my ( $old, $new ) = @_;
-  $lasttime = time();
-
-  my ( $pruneOld, $pruneNew, $sortOld, $sortNew  );
-  my ( $old_iter, $new_iter, $skip_clone );
-
-  @{$pruneOld} = grep { ! exists $_->{'mapped'} } @{$old};
-  @{$pruneNew} = grep { ! exists $_->{'mapped'} } @{$new};
-
-  # prune short exons (<10) would be appropriat
-  print STDERR "Old Exons left to map: ",scalar( @$pruneOld ),"\n";
-  print STDERR "New Exons left to map: ",scalar( @$pruneNew ),"\n";
- 
-  @{$sortOld} = sort { $a->{'clone_id'} cmp $b->{'clone_id'} } @$pruneOld;
-  @{$sortNew} = sort { $a->{'clone_id'} cmp $b->{'clone_id'} } @$pruneNew;
-
-
-  $new_iter = 0;
-  $old_iter = 0;
-
-  while( 1 ) {
-
-    my $old_exon = $sortOld->[$old_iter];
-    my $new_exon = $sortNew->[$new_iter];
-
-    my $cmp = ( $old_exon->{'clone_id'} cmp $new_exon->{'clone_id'} );
-    if( $cmp == 0 ) {
-      my $clone_id = $old_exon->{'clone_id'};
-
-      # skip clones with no updates
-      if( $old_exon->{'clone_version'} == $new_exon->{'clone_version'} ) {
-	$skip_clone = 1;
-      } else {
-	$skip_clone = 0;
-      }
-
-      my ( $newCloneExons, $oldCloneExons );
-      while( defined $old_exon && 
-	     $old_exon->{'clone_id'} eq $clone_id ) {
-	push( @{$oldCloneExons}, $old_exon );
-	$old_iter++;
-	$old_exon = $sortOld->[$old_iter];
-      }
-	  
-      while( defined $new_exon && 
-	     $new_exon->{'clone_id'} eq $clone_id ) {
-	push( @{$newCloneExons}, $new_exon );
-	$new_iter++;
-	$new_exon = $sortNew->[$new_iter];
-      }
-      if( !$skip_clone ) {
-	clone_map( $oldCloneExons, $newCloneExons );
-      }
-
-    } elsif ( $cmp < 0 ) {
-      $old_iter++;
-    } else {
-      $new_iter++;
-    }
-    if( $new_iter >= scalar( @$sortNew ) || 
-	$old_iter >= scalar( @$sortOld )) {
-      last;
-    }
-    if( time() - $lasttime > 3600 ) {
-      print STDERR ( "Finished $old_iter exons ",scalar( localtime() ),"\n" );
-      $lasttime = time();
-    }
-  }
-
-}
-
-
-
-# map two sets of exons in the same clone from old to new
-# by using crossmatch module ...
-sub clone_map {
-  my ( $oldCloneExons, $newCloneExons ) = @_;
-  my ( $oldExHash, $newExHash );
-  local *HERR, *HOUT;
- 
-  open ( NULL, ">/dev/null" );
-  
-  $cloneGroups++;
-
-  for my $exon ( @$oldCloneExons ) {
-    $oldExHash->{$exon->{'exon_id'}} = SQL::exon_sequence( $sdbh, $exon->{'exon_id'} );
-    # $exon->{'seq'} = $oldExHash->{$exon->{'exon_id'}};
-  }
-
-  for my $exon ( @$newCloneExons ) {
-    $newExHash->{$exon->{'exon_id'}} = SQL::exon_sequence( $tdbh, $exon->{'exon_id'} );
-    # $exon->{'seq'} = $newExHash->{$exon->{'exon_id'}};
-  }
-
-  clone_seq_match( $oldCloneExons, $newCloneExons, 
-		   $oldExHash, $newExHash );
-
-  # no crossmatch mappings
-  return;
-
-  my ( @oldPrune, @newPrune, @scorelist );
-  @oldPrune = grep { ! exists $_->{'mapped'} } @$oldCloneExons;
-  @newPrune = grep { ! exists $_->{'mapped'} } @$newCloneExons;
-
-  for my $oEx ( @oldPrune ) {
-    for my $nEx ( @newPrune ) {
-      my $crossMatcher = new Bio::EnsEMBL::Pipeline::Runnable::CrossMatch
-	( -seq1 => new Bio::PrimarySeq( -seq => $oldExHash->{$oEx->{'exon_id'}}, 
-					-id => $oEx->{'exon_id'} ),
-	  -seq2 => new Bio::PrimarySeq( -seq => $newExHash->{$nEx->{'exon_id'}}, 
-					-id => $nEx->{'exon_id'} ),
-	  -score => 200
-	);
-
-      *HERR = *STDERR;
-      *HOUT = *STDOUT;
-
-      *STDOUT = *NULL;
-      *STDERR = *NULL;
-
-      $crossMatcher->run();
-
-      *STDOUT = *HOUT;
-      *STDERR = *HERR;
-
-      $crossmatches++;
-      my $score = 0;
-      my $matches = 0;
-      for my $fp ( $crossMatcher->output() ) {
-	$score += $fp->score();
-	$matches = 1;
-      }
-      if( $matches ) {
-	push( @scorelist, { score => $score, 
-			    oldEx => $oEx,
-			    newEx => $nEx } );
-      }
-									 
-    }
-  }
-  
-  # now map exons which have crossmatches 
-  my @sortedScores; 
-  @sortedScores = sort { $b->{'score'} <=> $a->{'score'} } @scorelist;
-  
-  for my $scoreRecord ( @sortedScores ) {
-    if( ! exists $scoreRecord->{'oldEx'}{'mapped'} &&
-	! exists $scoreRecord->{'newEx'}{'mapped'} ) {
-      $scoreRecord->{'oldEx'}{'mapped'} = 'crossmatch Clone Seq';
-      $scoreRecord->{'newEx'}{'mapped'} = 'crossmatch Clone Seq';;
-      $scoreRecord->{'newEx'}{'exon_stable'} = 
-	$scoreRecord->{'oldEx'}{'exon_stable'};
-      print ( "Exon mapped:\t", $scoreRecord->{'oldEx'}{'exon_id'},"\t",
-	      $scoreRecord->{'newEx'}{'exon_id'},"\t", 
-	      $scoreRecord->{'oldEx'}{'exon_stable'},"\n" ); 
-      
-      $cmmaps++;
-#      print STDERR "---- CROSSMATCH MAP ----",$scoreRecord->{'score'},"\n";
-    }
-  }
-}
-
-
-# direct sequence mapping inside a clone
-sub clone_seq_match {
-  my ( $oldCloneExons, $newCloneExons,
-       $oldSeqHash, $newSeqHash ) = @_;
-  
-
-  my ( @sold, @snew );
-  @sold = sort { $a->{'chr_start'} <=> $b->{'chr_start'} } @$oldCloneExons;
-  @snew = sort { $a->{'chr_start'} <=> $b->{'chr_start'} } @$newCloneExons;
-
-  my %hashOnSeq;
-
-  for my $exon ( @$oldCloneExons ) {
-    my $seq = $oldSeqHash->{$exon->{'exon_id'}};
-    if( ! exists $hashOnSeq{ $seq } ) {
-      $hashOnSeq{$seq} = [[ $exon ],[]];
-    } else {
-      push( @{$hashOnSeq{ $seq }->[0]}, $exon );
-    }
-  }
-
-  for my $exon ( @$newCloneExons ) {
-    my $seq = $newSeqHash->{$exon->{'exon_id'}};
-    if( ! exists $hashOnSeq{ $seq } ) {
-      next;
-    } else {
-      push( @{$hashOnSeq{ $seq }->[1]}, $exon );
-    }
-  }
-
-  for my $seq ( keys %hashOnSeq ) {
-    my $seqEqExons = $hashOnSeq{$seq};
-    my $seqO = $seqEqExons->[0];
-    my $seqN = $seqEqExons->[1];
-    
-    if(( ! @$seqO ) ||  ( ! @$seqN )) {
-      next;
-    }
-
-    if( scalar( @$seqO ) == scalar( @$seqN )) {
-      if( scalar( @$seqO ) > 1 ) {
-	print STDERR ( "Matching exon sequences.\n" );
-      }
-      for( my $i=0; $i<=$#$seqO; $i++ ) {
-	my $oEx = $seqO->[$i];
-	my $nEx = $seqN->[$i];
-	if( ! exists $oEx->{'mapped'} &&
-	    ! exists $nEx->{'mapped'} ) {
-	  $oEx->{'mapped'} = 'same Clone Seq';
-	  $nEx->{'mapped'} = 'same Clone Seq';
-	  $nEx->{'exon_stable'} = $oEx->{'exon_stable'};
-	  print ( "Exon mapped:\t", $oEx->{'exon_id'},"\t",
-		  $nEx->{'exon_id'},"\t", $oEx->{'exon_stable'},"\n" );
-	  $seqMatches++;
-	}
-      }
-    } else {
-      print STDERR ( "Unequal number of matching exon sequences.\n" );
-    }
-  }
-}
-
-
-
-# takes two exon lists and marks old exons with exact new exons matching
-# marks new exons as matched as well
-# does the direct approach. Exons are equal, when same position on unchanged contig
-
-sub direct_mapping {
-  my ( $old, $new ) = @_;
-  
-  my ( $sold, $snew );
-
-  print STDERR "Direct Mapping started: ",scalar( localtime() ),"\n";
-
-  @{$sold} = sort { exon_direct_compare( $a, $b ) } @$old;
-  @{$snew} = sort { exon_direct_compare( $a, $b ) } @$new;
-
-  print STDERR "Sorted: ",scalar(localtime()),"\n";
-  
-  my ( $new_iter, $old_iter );
-  my ( $old_exon, $new_exon );
-
-  $new_iter = 0;
-  $old_iter = 0;
-
-  while( 1 ) {
-
-    $old_exon = $sold->[$old_iter];
-    $new_exon = $snew->[$new_iter];
-    my  $exon_compare_result;
-    if(( $old_exon->{'clone_id'} cmp $new_exon->{'clone_id'}) == 0 &&
-	$old_exon->{'clone_version'} == $new_exon->{'clone_version'} &&
-	$old_exon->{'exon_strand'} == $new_exon->{'exon_strand'} &&
-	$old_exon->{'contig_offset'} == $new_exon->{'contig_offset'} ) {
-      $exon_compare_result = 0;
-    } else {
-      $exon_compare_result = exon_direct_compare( $old_exon, $new_exon );
-    }
-    
-    if(  $exon_compare_result == 0 ) {
-      # direct map possible
-      
-      # find overlap between the two exons. Need >50% of new exon
-      # covered
-
-      my ( $overlap ) = 
-      sort { $a <=> $b } ( $old_exon->{'exon_end'}-$old_exon->{'exon_start'}+1,
-			   $new_exon->{'exon_end'}-$new_exon->{'exon_start'}+1,
-			   $old_exon->{'exon_end'}-$new_exon->{'exon_start'}+1,
-			   $new_exon->{'exon_end'}-$old_exon->{'exon_start'}+1 );
-#      print "---- MAPPING ----\n";
-
-#      print_exon( $old_exon );
-#      print_exon( $new_exon );
-#      print "OVERLAP: $overlap\n";
-
-      if(( $overlap / 
-	  ( $new_exon->{'exon_end'} - $new_exon->{'exon_start'} + 1 )) > 0.5 ) {
-	
-#	print "MAPPED\n";
-	$old_exon->{'mapped'} = 'direct';
-	$new_exon->{'mapped'} = 'direct';
-
-	$new_exon->{'exon_stable'} = $old_exon->{'exon_stable'};
-	print ( "Exon mapped:\t", $old_exon->{'exon_id'},"\t",
-		$new_exon->{'exon_id'},"\t", $old_exon->{'exon_stable'},"\n" );
-	$old_iter++;
-	$new_iter++;
-
-	if( $old_iter >= scalar( @$old ) ||
-	    $new_iter >= scalar( @$new ) ) {
-	  last;
-	}
-	$direct_maps++;
-	next;
-      }
-    }
-#    print "NOT MAPPED\n";
-
-    $exon_compare_result = exon_direct_compare( $old_exon, $new_exon );
-
-    if( $exon_compare_result == -1 ) {
-      $old_iter++;
-      if( $old_iter > scalar( @$sold ) ) {
-	last;
-      }
-      next;
-
-    } elsif( $exon_compare_result == 1 ) {
-      $new_iter++;
-      if( $new_iter > scalar( @$snew ) ) {
-	last;
-      }
-      next;
-
-    } else {
-      $old_iter++;
-      $new_iter++;
-      if( $old_iter > scalar( @$old ) ||
-	  $new_iter > scalar( @$new ) ) {
-	last;
-      }
-	
-    }
-  }
-
-  print STDERR "Direct Mapping finished: ",scalar( localtime() ),"\n";
-  return;
-}
-
-sub exon_direct_compare {
-  my ( $a, $b ) = @_;
-  return $a->{'clone_id'} cmp $b->{'clone_id'} ||
-    $a->{'clone_version'} <=> $b->{'clone_version'} ||
-      $a->{'contig_offset'} <=> $b->{'contig_offset'} ||
-	$a->{'exon_start'} <=> $b->{'exon_start'} ||
-	  $a->{'exon_strand'} <=> $b->{'exon_strand'} ;
-}
-
-
-
-
-sub print_exon {
-  my $exon = shift;
-  for my $key ( sort keys %{$exon} ) {
-    print $key,"\t",$exon->{$key},"\n";
-  }
-  print "\n";
-}
-- 
GitLab