diff --git a/misc-scripts/chimp/human2chimp2.pl b/misc-scripts/chimp/human2chimp2.pl
index d370721401b38d7c64ba13285cd9343a0ad90042..6706d8c9ae2bc98821b9646c18b7b2bc5379947e 100644
--- a/misc-scripts/chimp/human2chimp2.pl
+++ b/misc-scripts/chimp/human2chimp2.pl
@@ -131,7 +131,6 @@ sub transfer_transcript {
 
   my @chimp_exons;
 
-
  EXON:
   foreach my $exon (@$exons) {
 
@@ -175,10 +174,10 @@ sub transfer_transcript {
         $chimp_cdna_pos += $c->length();
         $cdna_exon_start += $c->length();
 
-        debug("exon mapped - incrementing chimp_cdna_pos and cdna_exon_start\n" .
-              "  exon_len = " . $c->length() . "\n" .
-              "  chimp_cdna_pos  = $chimp_cdna_pos\n" .
-              "  cdna_exon_start = $cdna_exon_start"); 
+      #debug("exon mapped - incrementing chimp_cdna_pos and cdna_exon_start\n" .
+      #        "  exon_len = " . $c->length() . "\n" .
+      #        "  chimp_cdna_pos  = $chimp_cdna_pos\n" .
+      #        "  cdna_exon_start = $cdna_exon_start"); 
 
 
         push @chimp_exons, [$c->start(), $c->end(), $c->strand(), $c->id()];
@@ -197,7 +196,7 @@ sub transfer_transcript {
         return ();
       }
 
-      my($exon_start, $exon_end, $exon_strand, $seq_region) = @$result;
+      my($exon_start, $exon_end, $exon_strand, $exon_seq_region) = @$result;
 
       for(my $i=0; $i < $num; $i++) {
         my $c = $coords[$i];
@@ -211,7 +210,8 @@ sub transfer_transcript {
             process_deletion(\$chimp_cdna_pos, $c->length(),
                              \$exon_start, \$exon_end, $exon_strand,
                              \$cdna_exon_start,
-                             \$cdna_coding_start, \$cdna_coding_end);
+                             \$cdna_coding_start, \$cdna_coding_end,
+                            $exon_seq_region);
 
           return () if(!defined($result));
 
@@ -249,17 +249,18 @@ sub transfer_transcript {
               #
               # insert in chimp, deletion in human
               #
-              debug("before insertion processing\n" .
-                    "  cdna_exon_start = $cdna_exon_start");
+              #debug("before insertion processing\n" .
+              #      "  cdna_exon_start = $cdna_exon_start");
 
               my $result = 
                 process_insertion(\$chimp_cdna_pos, $insert_len,
                                   \$exon_start, \$exon_end, $exon_strand,
                                   \$cdna_exon_start,
-                                  \$cdna_coding_start, \$cdna_coding_end);
+                                  \$cdna_coding_start, \$cdna_coding_end,
+                                 $exon_seq_region);
 
-              debug("after insertion processing\n" .
-                    "  cdna_exon_start = $cdna_exon_start");
+              #debug("after insertion processing\n" .
+              #      "  cdna_exon_start = $cdna_exon_start");
 
               return () if(!defined($result));
 
@@ -271,21 +272,35 @@ sub transfer_transcript {
 
           $chimp_cdna_pos += $c->length();
 
-          debug("match - incremented chimp_cdna_pos\n" .
-                "  match_len = " . $c->length() . "\n" .
-                "  chimp_cdna_pos = $chimp_cdna_pos");
+          #debug("match - incremented chimp_cdna_pos\n" .
+          #      "  match_len = " . $c->length() . "\n" .
+          #      "  chimp_cdna_pos = $chimp_cdna_pos");
 
         }
       }  # foreach coord
 
       $cdna_exon_start += $exon_end - $exon_start + 1;
 
-      debug("exon complete - incremented cdna_exon_start\n" .
-            "  exon_len = " . ($exon_end - $exon_start + 1) .
-            "  cdna_exon_start = $cdna_exon_start\n");
+      #debug("exon complete - incremented cdna_exon_start\n" .
+      #      "  exon_len = " . ($exon_end - $exon_start + 1) .
+      #      "  cdna_exon_start = $cdna_exon_start\n");
+
 
+      push @chimp_exons, [$exon_start, $exon_end, $exon_strand,
+                          $exon_seq_region];
     }
   } # foreach exon
+
+  my $slice_adaptor = $chimp_db->get_SliceAdaptor();
+
+  my @transcripts = create_transcripts(\@chimp_exons,
+                                       $cdna_coding_start, $cdna_coding_end,
+                                       $slice_adaptor);
+
+  foreach my $transcript (@transcripts) {
+    debug("\nTranslation:" . $transcript->translate()->seq() . "\n\n");
+  }
+
 }
 
 ###############################################################################
@@ -316,17 +331,19 @@ sub process_deletion {
   my $cdna_coding_start_ref = shift;
   my $cdna_coding_end_ref   = shift;
 
+  my $exon_seq_region    = shift;
+
   my $del_start = $$cdna_del_pos_ref + 1;
   my $del_end   = $$cdna_del_pos_ref + $del_len;
 
   my $exon_len = $$exon_end_ref - $$exon_start_ref + 1;
   my $cdna_exon_end = $$cdna_exon_start_ref + $exon_len - 1;
 
-  debug("processing deletion");
-  debug("  del_start = $del_start");
-  debug("  del_end   = $del_end");
+  #debug("processing deletion");
+  #debug("  del_start = $del_start");
+  #debug("  del_end   = $del_end");
 
-  # sanity check, deletion should be completely in or adjacent to exon boundaries
+  #sanity check, deletion should be completely in or adjacent to exon boundaries
   if($del_start < $$cdna_exon_start_ref - 1 ||
      $del_start > $cdna_exon_end + 1) {
 
@@ -540,10 +557,6 @@ sub process_deletion {
       my $first_len  = $del_start - $$cdna_exon_start_ref;
       my $intron_len = 3 - $frameshift;
 
-      #second exon is going to starts right after 'frameshift intron'
-      #but in cdna coords this is immediately after last exon
-      $$cdna_exon_start_ref += $first_len;
-
       #reduce the length of the CDS by the length of the new intron
       $$cdna_coding_end_ref -= $intron_len;
 
@@ -557,18 +570,51 @@ sub process_deletion {
 
       ### TBD may have to check we have not run up to end of CDS here
 
+      # we may have encountered a delete at the very end of the exon
+      # in this case we have to take the intron out of the end of this exon
+      # since we are not creating a second one
+      if($first_len + $intron_len >= $exon_len) {
+        if($exon_strand == 1) {
+          $$exon_end_ref -= $intron_len;
+        } else {
+          $$exon_start_ref      -= $intron_len;
+          $$cdna_exon_start_ref -= $intron_len;
+        }
+        return [];
+      }
+
+      #second exon is going to start right after 'frameshift intron'
+      #but in cdna coords this is immediately after last exon
+      $$cdna_exon_start_ref += $first_len;
+
       if($exon_strand == 1) {
-        #end the current exon at the beginning of the deletion
-        my $out_exon = [$$exon_start_ref, $$exon_start_ref + $first_len - 1];
+        # end the current exon at the beginning of the deletion
+        # watch out though, because we may be at the very beginning of
+        # the exon in which case we do not want to create one
+
+        my $first_exon = undef;
+
+        if($first_len) {
+          $first_exon = [$$exon_start_ref, $$exon_start_ref + $first_len - 1,
+                          $exon_strand, $exon_seq_region];
+        }
+
         #start next exon after new intron
         $$exon_start_ref += $first_len + $intron_len;
-        return [$out_exon];
+
+        return (defined($first_exon)) ? [$first_exon] : [];
 
       } else {
-        my $out_exon = [$$exon_end_ref - $first_len + 1, $$exon_end_ref];
+        my $first_exon = undef;
+
+        if($first_len) {
+          $first_exon = [$$exon_end_ref - $first_len + 1, $$exon_end_ref,
+                         $exon_strand, $exon_seq_region];
+        }
         #start next exon after new intron
         $$exon_end_ref   -= $first_len + $intron_len;
-        return [$out_exon];
+
+        return (defined($first_exon)) ? [$first_exon] : [];
       }
     }
   }
@@ -622,12 +668,14 @@ sub process_insertion {
   my $cdna_coding_start_ref = shift;
   my $cdna_coding_end_ref   = shift;
 
+  my $exon_seq_region = shift;
+
   my $exon_len      = $$exon_end_ref - $$exon_start_ref + 1;
   my $cdna_exon_end = $$cdna_exon_start_ref + $exon_len - 1;
 
-  debug("processing insertion");
-  debug("  ins_left = $$cdna_ins_pos_ref");
-  debug("  ins_len  = $insert_len");
+  #debug("processing insertion");
+  #debug("  ins_left = $$cdna_ins_pos_ref");
+  #debug("  ins_len  = $insert_len");
 
   # sanity check, insert should be completely in exon boundaries
   if($$cdna_ins_pos_ref < $$cdna_exon_start_ref ||
@@ -705,15 +753,17 @@ sub process_insertion {
       ### TBD may have to check we have not run up to end of CDS here
 
       if($exon_strand == 1) {
-        #end the current exon at the beginning of the deletion
-        my $out_exon = [$$exon_start_ref, $$exon_start_ref + $first_len - 1];
+        #end the current exon at the beginning of the insert
+        my $out_exon = [$$exon_start_ref, $$exon_start_ref + $first_len - 1,
+                        $exon_strand, $exon_seq_region];
         #start the next exon after the frameshift intron
         $$exon_start_ref += $first_len + $frameshift;
 
         return [$out_exon];
 
       } else {
-        my $out_exon = [$$exon_end_ref - $first_len + 1, $$exon_end_ref];
+        my $out_exon = [$$exon_end_ref - $first_len + 1, $$exon_end_ref,
+                       $exon_strand, $exon_seq_region];
         #start the next exon after the frameshift intron
         $$exon_end_ref   -= $first_len + $frameshift;
 
@@ -768,15 +818,15 @@ sub get_coords_extent {
 
   my($start, $end, $strand, $seq_region);
 
-  debug("calculating coord span");
+  #debug("calculating coord span");
 
   foreach my $c (@$coords) {
     next if($c->isa('Bio::EnsEMBL::Mapper::Gap'));
 
-    debug(" coord_start  = " . $c->start );
-    debug(" coord_end    = " . $c->end   );
-    debug(" coord_strand = " . $c->strand );
-    debug(" coord_id     = " . $c->id);
+    #debug(" coord_start  = " . $c->start );
+    #debug(" coord_end    = " . $c->end   );
+    #debug(" coord_strand = " . $c->strand );
+    #debug(" coord_id     = " . $c->id);
 
     if(!defined($seq_region)) {
       $seq_region = $c->id();
@@ -829,12 +879,154 @@ sub get_coords_extent {
     }
   }
 
-  debug("  calculated span = $start-$end($strand)\n" .
-        "  length = " . ($end - $start + 1));
+  #debug("  calculated span = $start-$end($strand)\n" .
+  #      "  length = " . ($end - $start + 1));
 
   return [$start, $end, $strand, $seq_region];
 }
 
+################################################################################
+# create_transcripts
+#
+################################################################################
+
+sub create_transcripts {
+  my $exon_coords       = shift;
+  my $cdna_coding_start = shift;
+  my $cdna_coding_end   = shift;
+  my $slice_adaptor     = shift;
+
+  my $transcript_strand = undef;
+  my $transcript_seq_region = undef;
+
+  my $transcript = Bio::EnsEMBL::Transcript->new();
+  my $translation = Bio::EnsEMBL::Translation->new();
+
+  my @chimp_transcripts;
+
+  my $cdna_start = 1;
+  my $cur_phase  = undef;
+
+  foreach my $exon_coord (@$exon_coords) {
+    my($exon_start, $exon_end, $exon_strand, $seq_region) = @$exon_coord;
+
+    #sanity check:
+    if($exon_end < $exon_start) {
+      my $exon_dump = '';
+      foreach my $ex (@$exon_coords) {
+        if($ex->[0] > $ex->[1]) {
+          $exon_dump .= '*';
+        }
+        $exon_dump .= '  [' . join(' ', @$ex) . "]\n";
+      }
+      throw("Unexpected: received exon start less than end:\n$exon_dump\n");
+    }
+
+    my $exon_len = $exon_end - $exon_start + 1;
+    my $cdna_end = $cdna_start + $exon_len - 1;
+
+    if(!defined($transcript_seq_region)) {
+      $transcript_seq_region = $seq_region;
+    }
+    elsif($transcript_seq_region ne $seq_region) {
+      debug("transcript exons on different scaffolds - discarding transcript");
+      ### TBD can probably split transcript rather than discarding
+      return ();
+    }
+
+    if(!defined($transcript_strand)) {
+      $transcript_strand = $exon_strand;
+    }
+    elsif($transcript_strand != $exon_strand) {
+      debug("transcript exons on different strands - discarding transcript");
+      ### TBD can probably split transcript rather than discarding
+      return ();
+    }
+
+    #
+    # calculate the start & end phases
+    #
+
+    my ($start_phase, $end_phase);
+
+    if(defined($cur_phase)) {
+      if($cdna_start <= $cdna_coding_end) {
+        $start_phase = $cur_phase; #start phase is last exons end phase
+      } else {
+        #the end of the last exon was the end of the CDS
+        $start_phase = -1;
+      }
+    } else {
+      #sanity check
+      if($cdna_start > $cdna_coding_start && $cdna_start < $cdna_coding_end) {
+        throw("Unexpected.  Start of CDS is not in exon?\n" .
+              "  exon_cdna_start = $cdna_start\n" .
+              "  cdna_coding_start = $cdna_coding_start");
+      }
+      if($cdna_coding_start == $cdna_coding_start) {
+        $start_phase = 0;
+      } else {
+        $start_phase = -1;
+      }
+    }
+
+    if($cdna_end < $cdna_coding_start ||
+       $cdna_end > $cdna_coding_end) {
+      #the end of this exon is outside the CDS
+      $end_phase = -1;
+    } else {
+      #the end of this exon is in the CDS
+      #figure out how much coding sequence in the exon
+      my $coding_start =
+        ($cdna_coding_start > $cdna_start) ? $cdna_coding_start : $cdna_start;
+      my $coding_len = $cdna_end - $cdna_start + 1;
+
+      if($start_phase > 0) {
+        $coding_len += $start_phase;
+      }
+
+      $end_phase = $coding_len % 3;
+    }
+
+    my $slice = $slice_adaptor->fetch_by_region('scaffold', $seq_region);
+
+    my $exon = Bio::EnsEMBL::Exon->new
+      (-START     => $exon_start,
+       -END       => $exon_end,
+       -STRAND    => $exon_strand,
+       -PHASE     => $start_phase,
+       -END_PHASE => $end_phase,
+       -SLICE     => $slice);
+
+    $transcript->add_Exon($exon);
+
+    #
+    # see if this exon is the start or end exon of the translation
+    #
+
+    if($cdna_start <= $cdna_coding_start &&
+       $cdna_end   >= $cdna_coding_start) {
+      my $translation_start = $cdna_coding_start - $cdna_start + 1;
+      $translation->start_Exon($exon);
+      $translation->start($translation_start);
+    }
+    if($cdna_start <= $cdna_coding_end &&
+       $cdna_end   >= $cdna_coding_end) {
+      my $translation_end = $cdna_coding_end - $cdna_start + 1;
+      $translation->end_Exon($exon);
+      $translation->end($translation_end);
+    }
+
+    $cdna_start = $cdna_end + 1;
+    $cur_phase = ($end_phase >= 0) ? $end_phase : undef;
+  }
+
+  $transcript->translation($translation);
+  push @chimp_transcripts, $transcript;
+
+  return @chimp_transcripts;
+}
+
 
 ###############################################################################
 # get_cds_len