diff --git a/misc-scripts/canonical_transcripts/set_canonical_transcripts.pl b/misc-scripts/canonical_transcripts/set_canonical_transcripts.pl
index 49d74da433ec89dc4b8d9902091012cc8b17de62..ca7e41dd86b3de4da6c821065d058a1ceb119055 100644
--- a/misc-scripts/canonical_transcripts/set_canonical_transcripts.pl
+++ b/misc-scripts/canonical_transcripts/set_canonical_transcripts.pl
@@ -1,20 +1,33 @@
+=pod
 
-#!/usr/local/ensembl/bin/perl
+SYNOPSIS
+
+  Script to set canonical transcripts for each gene
+
+DESCRIPTION
+
+  The rule is if the cluster contains translated transcripts take the
+  one with the longest cds otherwise take the one with the longest
+  cdna
+
+EXAMPLE
+
+  perl set_canonical_transcripts.pl -dbhost host -dbuser user -dbpass *** -dbname dbname -dbport 3306 -coord_system toplevel -write
+
+EXTRA
 
-#script to set canonical transcripts for each gene
-#the rule is if the cluster contains translated transcripts take the one with the 
-#longest cds otherwise take the one with the longest cdna
+  To check the script has run correctly you can run the
+  CoreForeignKeys healthcheck:
 
-#an example commandline is
-#
-# perl set_canonical_transcripts.pl -dbhost host -dbuser user -dbpass *** -dbname 
-# dbname -dbport 3306 -coord_system toplevel -write
-#
+  ./run-healthcheck.sh -d dbname -output problem CoreForeignKeys
 
-#To check the script has run correctly you can run the CoreForeignKeys healthcheck
-# e.g ./run-healthcheck.sh -d dbname -output problem CoreForeignKeys
+=cut
+
+#!/usr/local/ensembl/bin/perl
 
 use strict;
+use Carp;
+use DBI qw(:sql_types);
 use Getopt::Long;
 use Bio::EnsEMBL::DBSQL::DBAdaptor;
 use Bio::EnsEMBL::Utils::Exception qw(throw);
@@ -30,20 +43,20 @@ my $write = 0;
 my $include_non_ref = 1;
 my $verbose = 0;
 
-&GetOptions( 'dbhost:s'            => \$host,
-             'dbport:n'            => \$port,
-             'dbname:s'            => \$dbname,
-             'dbuser:s'            => \$user,
-             'dbpass:s'            => \$pass,
-             'coord_system_name:s' => \$coord_system,
-             'seq_region_name:s'   => \$seq_region_name,
-             'write!'              => \$write,
-             'include_non_ref!'    => \$include_non_ref,
-             'verbose!'            => \$verbose, );
+GetOptions( 'dbhost:s'            => \$host,
+            'dbport:n'            => \$port,
+            'dbname:s'            => \$dbname,
+            'dbuser:s'            => \$user,
+            'dbpass:s'            => \$pass,
+            'coord_system_name:s' => \$coord_system,
+            'seq_region_name:s'   => \$seq_region_name,
+            'write!'              => \$write,
+            'include_non_ref!'    => \$include_non_ref,
+            'verbose!'            => \$verbose, );
 
 unless ($write) {
-  print
-" you've not used the -write option so results will not be written into the database\n";
+  print "You have not used the -write option "
+      . "so results will not be written into the database\n";
 }
 
 my $db =
@@ -51,52 +64,91 @@ my $db =
                                       -user   => $user,
                                       -port   => $port,
                                       -dbname => $dbname,
-                                      -pass   => $pass, );
+                                      -pass   => $pass );
 
 
 my $sa = $db->get_SliceAdaptor;
 my $slices;
 
+# Only update the canonical transcripts in this region.
 if ($seq_region_name) {
+  print "Only updating genes on chromosome $seq_region_name\n";
   my $slice =
     $sa->fetch_by_region( $coord_system, $seq_region_name, $include_non_ref );
   push( @$slices, $slice );
+
+  my $update_to_null = q(
+    UPDATE gene
+       SET canonical_transcript_id = NULL,
+           canonical_annotation = NULL
+     WHERE seq_region_id = ?);
+  my $seq_region_id = $slice->get_seq_region_id;
+
+  my $sth = $db->dbc->prepare($update_to_null);
+  $sth->bind_param(1, $seq_region_id, SQL_INTEGER);
+  $sth->execute();
+
 } else {
   $slices = $sa->fetch_all( $coord_system, '', $include_non_ref );
-}
 
-my $update_to_null = "update gene set canonical_transcript_id = NULL, canonical_annotation = NULL";
-$db->dbc->do($update_to_null);
-# get $db->dbc->db_handle->do($update_to_null) instead if above not working
+  my $update_to_null = q(update gene set canonical_transcript_id = NULL, canonical_annotation = NULL);
+  $db->dbc->do($update_to_null);
+  # get $db->dbc->db_handle->do($update_to_null) instead if above not working
+}
 
 my $gene_update_sql = "update gene set canonical_transcript_id = ? where gene_id = ?";
-my $sth = $db->dbc->prepare($gene_update_sql);
+my $gene_sth = $db->dbc->prepare($gene_update_sql);
+
 SLICE: foreach my $slice (@$slices) {
   print "Getting genes for " . $slice->name . "\n" if ($verbose);
   my $genes = $slice->get_all_Genes( undef, undef, 1 );
   my %canonical;
+
 GENE: foreach my $gene (@$genes) {
-    print "Updating gene: ", $gene->dbID, "\n";
+    print "Looking at gene: ", $gene->dbID, "\n";
     my $transcripts = $gene->get_all_Transcripts;
     if ( @$transcripts == 1 ) {
       $canonical{ $gene->dbID } = $transcripts->[0]->dbID;
       next GENE;
     }
+
+    # Keep track of the transcript biotypes
+    my %trans;
+    foreach my $transcript ( @{$transcripts} ) {
+      push @{ $trans{ $transcript->biotype } }, $transcript;
+    }
+
+    # If there is a single protein_coding transcript, make it the
+    # canonical transcript, if it has a functional translation.
+    foreach my $key ( keys(%trans) ) {
+      if ( ( $key eq 'protein_coding' ) && ( @{ $trans{$key} } == 1 ) ) {
+        my $trans_id = $trans{$key}->[0]->dbID;
+        unless ( $trans{$key}->[0]->translation->seq =~ /\*/ ) {
+          $canonical{ $gene->dbID } = $trans_id;
+          next GENE;
+        } else {
+          carp(   "Transcript $trans_id has internal stop(s) "
+                . "and it shouldn't if it is protein coding\n" );
+        }
+      }
+    }
+
     my $has_translation = 0;
     my $count           = 0;
     my @with_translation;
     my @no_translation;
 
-    foreach my $transcript (@$transcripts) {
-      if (    $transcript->translation
+    foreach my $transcript ( @{$transcripts} ) {
+
+      if (    ( $gene->biotype ne 'pseudogene' )
            && ( $gene->biotype       ne 'processed_transcript' )
-           && ( $gene->biotype       ne 'pseudogene' )
            && ( $transcript->biotype ne 'nonsense_mediated_decay' )
-           && ( $transcript->biotype ne 'processed_transcript' ) )
+           && ( $transcript->biotype ne 'processed_transcript' )
+           && $transcript->translation
+           && ( $transcript->translation->seq !~ /\*/ ) )
+
       {
-        unless ( $transcript->translation->seq =~ /\*/ ) {
-          push( @with_translation, $transcript );
-        }
+        push( @with_translation, $transcript );
       } else {
         push( @no_translation, $transcript );
       }
@@ -120,9 +172,11 @@ GENE: foreach my $gene (@$genes) {
     }
     $canonical{ $gene->dbID } = $sorted[0]->dbID;
   } ## end foreach my $gene (@$genes)
+
   foreach my $gene_id ( keys(%canonical) ) {
     my $transcript_id = $canonical{$gene_id};
-    $sth->execute( $transcript_id, $gene_id ) if ($write);
+    print "Updating gene $gene_id\n";
+    $gene_sth->execute( $transcript_id, $gene_id ) if ($write);
   }
 } ## end foreach my $slice (@$slices)