diff --git a/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm b/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm
index 71d793818afd551b3dbcca71dfef1d304ad0f91f..d04724738b66d942d51430ceb782d84de823687e 100644
--- a/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm
@@ -135,7 +135,7 @@ sub create_xrefs {
   # Create a hash of all valid names and taxon_ids for this species
   my %species2name = $self->species_id2name($dbi);
   if (defined $species_name) { push @{$species2name{$species_id}}, $species_name; }
-  if (!defined $species2name{$species_id}) { next; }
+  if (!defined $species2name{$species_id}) { return; }
   my %species2tax  = $self->species_id2taxonomy($dbi);
   push @{$species2tax{$species_id}}, $species_id;
   my @names   = @{$species2name{$species_id}};
@@ -164,30 +164,48 @@ sub create_xrefs {
 
   local $/ = "\/\/\n";
 
-  my $type;
-  if ($file =~ /protein/) {
+  my $type = $self->type_from_file($file);
+  return unless $type;
 
-    $type = 'peptide';
+  while ( my $entry = $refseq_io->getline() ) {
 
-  } elsif ($file =~ /rna/) {
+    my $xref = $self->xref_from_record(
+      $entry,
+      \%name2species_id, \%taxonomy2species_id, 
+      $pred_mrna_source_id, $pred_ncrna_source_id,
+      $mrna_source_id, $ncrna_source_id,
+      $pred_peptide_source_id, $peptide_source_id,
+      $entrez_source_id, $wiki_source_id, $add_dependent_xref_sth,
+      $species_id, $type, \%refseq_ids,\%entrez_ids,\%wiki_ids
+     );
 
-    $type = 'dna';
+      push @xrefs, $xref if $xref;
 
-  } elsif($file =~ /RefSeq_protein/){
-
-    $type = 'peptide';
-
-  }else{
-    print STDERR "Could not work out sequence type for $file\n";
-    return;
-  }
+  } # while <REFSEQ>
 
+  $refseq_io->close();
 
-  while ( $_ = $refseq_io->getline() ) {
+  print "Read " . scalar(@xrefs) ." xrefs from $file\n" if($verbose);
 
-    my $xref;
+  return \@xrefs;
 
-    my $entry = $_;
+}
+sub type_from_file {
+    my ($self, $file) = @_;
+    return 'peptide' if $file =~ /RefSeq_protein/;
+    return 'dna' if $file =~ /rna/;
+    return 'peptide' if $file =~ /protein/;
+    print STDERR "Could not work out sequence type for $file\n";
+    return;
+}
+sub xref_from_record {
+    my ( $self, $entry, $name2species_id, $taxonomy2species_id,
+      $pred_mrna_source_id, $pred_ncrna_source_id,
+      $mrna_source_id, $ncrna_source_id,
+      $pred_peptide_source_id, $peptide_source_id,
+      $entrez_source_id, $wiki_source_id, $add_dependent_xref_sth,
+      $species_id, $type, $refseq_ids,$entrez_ids,$wiki_ids
+) = @_;
     chomp $entry;
 
     my ($species) = $entry =~ /\s+ORGANISM\s+(.*)\n/;
@@ -196,12 +214,12 @@ sub create_xrefs {
     $species =~ s/\s*\(.+\)//; # Ditch anything in parens
     $species =~ s/\s+/_/g;
     $species =~ s/\n//g;
-    my $species_id_check = $name2species_id{$species};
+    my $species_id_check = $name2species_id->{$species};
 
     # Try going through the taxon ID if species check didn't work.
     if ( !defined $species_id_check ) {
         my ($taxon_id) = $entry =~ /db_xref="taxon:(\d+)"/;
-        $species_id_check = $taxonomy2species_id{$taxon_id};
+        $species_id_check = $taxonomy2species_id->{$taxon_id};
     }
 
     # skip xrefs for species that aren't in the species table
@@ -209,6 +227,7 @@ sub create_xrefs {
         && defined $species_id_check
         && $species_id == $species_id_check )
     {
+      my $xref = {};
       my ($acc) = $entry =~ /ACCESSION\s+(\S+)/;
       my ($ver) = $entry =~ /VERSION\s+(\S+)/;
       my ($refseq_pair) = $entry =~ /DBSOURCE\s+REFSEQ: accession (\S+)/;
@@ -243,7 +262,7 @@ sub create_xrefs {
       $description =~ s/\s+/ /g;
       $description = substr($description, 0, 255) if (length($description) > 255);
 
-      my ($seq) = $_ =~ /^\s*ORIGIN\s+(.+)/ms; # /s allows . to match newline
+      my ($seq) = $entry =~ /^\s*ORIGIN\s+(.+)/ms; # /s allows . to match newline
       my @seq_lines = split /\n/, $seq;
       my $parsed_seq = "";
       foreach my $x (@seq_lines) {
@@ -311,12 +330,12 @@ sub create_xrefs {
           # Add xrefs for RefSeq mRNA as well where available
           $refseq_pair =~ s/\.[0-9]*// if $refseq_pair;
           if (defined $refseq_pair) {
-            if ($refseq_ids{$refseq_pair}) {
-              foreach my $refseq_id (@{ $refseq_ids{$refseq_pair} }) {
-                foreach my $entrez_id (@{ $entrez_ids{$ll} }) {
+            if ($refseq_ids->{$refseq_pair}) {
+              foreach my $refseq_id (@{ $refseq_ids->{$refseq_pair} }) {
+                foreach my $entrez_id (@{ $entrez_ids->{$ll} }) {
                   $add_dependent_xref_sth->execute($refseq_id, $entrez_id);
                 }
-                foreach my $wiki_id (@{ $wiki_ids{$ll} }) {
+                foreach my $wiki_id (@{ $wiki_ids->{$ll} }) {
                   $add_dependent_xref_sth->execute($refseq_id, $wiki_id);
                 }
               }
@@ -328,19 +347,8 @@ sub create_xrefs {
       # Don't add SGD Xrefs, as they are mapped directly from SGD ftp site
 
       # Refseq's do not tell whether the mim is for the gene of morbid so ignore for now.
-
-      push @xrefs, $xref;
-
-    }# if defined species
-
-  } # while <REFSEQ>
-
-  $refseq_io->close();
-
-  print "Read " . scalar(@xrefs) ." xrefs from $file\n" if($verbose);
-
-  return \@xrefs;
-
+      return $xref;
+  }
 }
 
 # --------------------------------------------------------------------------------
diff --git a/misc-scripts/xref_mapping/XrefParser/WormbaseCElegansBase.pm b/misc-scripts/xref_mapping/XrefParser/WormbaseCElegansBase.pm
new file mode 100644
index 0000000000000000000000000000000000000000..53558bddbdae544f6cf7d5247a0b52162a448136
--- /dev/null
+++ b/misc-scripts/xref_mapping/XrefParser/WormbaseCElegansBase.pm
@@ -0,0 +1,67 @@
+=head1 LICENSE
+
+Copyright [2018] EMBL-European Bioinformatics Institute
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+=cut
+use strict;
+use warnings;
+package XrefParser::WormbaseCElegansBase;
+
+sub swap_dependency {
+  my ($self, $source_ids, $dbi, $xref, @source_ids_skip) = @_;
+  $source_ids = [$source_ids] unless ref $source_ids eq 'ARRAY';
+
+  my @matching_source_id_dependents;
+  my @other_dependents;
+  for my $dependent_xref (@{$xref->{DEPENDENT_XREFS} || []}){
+     my $source_id_here = $dependent_xref->{SOURCE_ID};
+     if(grep {$_ == $source_id_here } @$source_ids
+         and $self->get_xref($dependent_xref->{ACCESSION}, $dependent_xref->{SOURCE_ID}, $xref->{SPECIES_ID})){
+         $dependent_xref->{SPECIES_ID} = $xref->{SPECIES_ID};
+         push @matching_source_id_dependents, $dependent_xref;
+     } elsif (grep {$_ == $source_id_here} @source_ids_skip){
+       #skip
+     } else {
+         push @other_dependents, $dependent_xref;
+     }
+  }
+  my @result;
+  for my $matching_source_id_dependent (@matching_source_id_dependents) {
+    my $source_id = $matching_source_id_dependent->{SOURCE_ID};
+    my $xref_as_dependent_here = {
+      %$xref, INFO_TYPE => "DEPENDENT",
+      LINKAGE_SOURCE_ID => $source_id,
+      DEPENDENT_XREFS => undef,
+    };
+    my @dependents_here = ({
+      %$xref, INFO_TYPE => "DEPENDENT",
+      LINKAGE_SOURCE_ID => $source_id,
+      DEPENDENT_XREFS => undef,
+    });
+    for my $d (@other_dependents){
+       push @dependents_here, {
+          %$d, INFO_TYPE => "DEPENDENT", LINKAGE_SOURCE_ID => $source_id,
+       };
+    }
+    push @result, {
+       %$matching_source_id_dependent,
+       LABEL=>undef, INFO_TYPE => "MISC",
+       DEPENDENT_XREFS =>\@dependents_here, 
+    };
+  }
+
+  return @result;
+}
+1;
diff --git a/misc-scripts/xref_mapping/XrefParser/WormbaseCElegansRefSeqGPFFParser.pm b/misc-scripts/xref_mapping/XrefParser/WormbaseCElegansRefSeqGPFFParser.pm
new file mode 100644
index 0000000000000000000000000000000000000000..da456fc3fc6115632e4b9876efcc2a570b59c458
--- /dev/null
+++ b/misc-scripts/xref_mapping/XrefParser/WormbaseCElegansRefSeqGPFFParser.pm
@@ -0,0 +1,68 @@
+=head1 LICENSE
+
+Copyright [2018] EMBL-European Bioinformatics Institute
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+=cut
+
+package XrefParser::WormbaseCElegansRefSeqGPFFParser;
+use strict;
+use warnings;
+use parent qw/XrefParser::WormbaseCElegansBase XrefParser::RefSeqGPFFParser/;
+my $SOURCE_IDS;
+my $ACCESSION_FROM_ENTRY_PATTERN;
+sub run {
+  my ($self, $arg_ref) = @_;
+  my $type = $self->type_from_file(@{$arg_ref->{files}});
+  if($type eq 'peptide'){
+    $SOURCE_IDS = [ $self->get_source_id_for_source_name('protein_id') ];
+    $ACCESSION_FROM_ENTRY_PATTERN =  qr/This record has been curated by WormBase. The\s+reference sequence is identical to (.*?)\./;
+  } elsif ($type eq 'dna'){
+    $SOURCE_IDS = [
+       $self->get_source_id_for_source_name('wormbase_cds'),
+       $self->get_source_id_for_source_name('wormbase_transcript'),
+    ];
+    $ACCESSION_FROM_ENTRY_PATTERN = qr/standard_name="(.*?)"/;
+  }
+  die %$arg_ref unless @$SOURCE_IDS;
+  return $self->SUPER::run($arg_ref);
+}
+sub upload_xref_object_graphs {
+  my ($self, $xrefs, $dbi) = @_;
+  my @adapted_xrefs;
+  for my $xref ( @$xrefs) {
+    push @adapted_xrefs, $self->swap_dependency($SOURCE_IDS, $dbi, $xref, @$SOURCE_IDS);
+  }  
+  return $self->SUPER::upload_xref_object_graphs(\@adapted_xrefs, $dbi);
+}
+
+sub xref_from_record {
+   my ($self, $entry, @args) = @_;
+   return &modify_xref_with_dependent(
+      $SOURCE_IDS, $entry,
+      $self->SUPER::xref_from_record($entry, @args),
+      $ACCESSION_FROM_ENTRY_PATTERN,
+   );
+}
+
+sub modify_xref_with_dependent {
+   my ($source_ids, $entry, $xref, $get_accession_pattern) = @_;
+   return unless $xref;
+   my ($accession) = $entry =~ $get_accession_pattern;
+   return unless $accession;
+   $xref->{DEPENDENT_XREFS} //= [];
+   push @{$xref->{DEPENDENT_XREFS}}, map {{ACCESSION => $accession, SOURCE_ID=>$_}} @$source_ids;
+   return $xref;
+}
+1;
diff --git a/misc-scripts/xref_mapping/XrefParser/WormbaseCElegansUniProtParser.pm b/misc-scripts/xref_mapping/XrefParser/WormbaseCElegansUniProtParser.pm
new file mode 100644
index 0000000000000000000000000000000000000000..68b2c7e45b4109104466d81d250a7236836538d0
--- /dev/null
+++ b/misc-scripts/xref_mapping/XrefParser/WormbaseCElegansUniProtParser.pm
@@ -0,0 +1,44 @@
+=head1 LICENSE
+
+Copyright [2018] EMBL-European Bioinformatics Institute
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+=cut
+use strict;
+use warnings;
+package XrefParser::WormbaseCElegansUniProtParser;
+
+# UniProt xrefs are sometimes - really - dependent xrefs of
+# INSDC entries which we get from somewhere else
+# Attempt to find the parent (has to already be present in the xref table)
+# INSDC and UniProt entries have the same protein sequence, and
+# UniProt lists INSDC as a parent. We get INSDC entries from somewhere else,
+# so make UniProt entries dependent on INSDC entries.
+# Note:
+# INSDC entries have coordinates, and UniProt entries don't.
+# So for perfect homologs, there can be many INSDC entries per UniProt.
+
+use parent qw/XrefParser::WormbaseCElegansBase XrefParser::UniProtParser/;
+
+sub upload_xref_object_graphs {
+  my ($self, $xrefs, $dbi) = @_;
+  my $source_id = $self->get_source_id_for_source_name('protein_id'); 
+  my $source_id_skip = $self->get_source_id_for_source_name('EMBL'); 
+  my @adapted_xrefs;
+  for my $xref ( @$xrefs) {
+    push @adapted_xrefs, $self->swap_dependency($source_id, $dbi, $xref, $source_id_skip);
+  }  
+  return $self->SUPER::upload_xref_object_graphs(\@adapted_xrefs, $dbi);
+}
+1;
diff --git a/misc-scripts/xref_mapping/XrefParser/WormbaseDirectParser.pm b/misc-scripts/xref_mapping/XrefParser/WormbaseDirectParser.pm
index 0d570d862f638facab43264116f0f2fa0bdc0817..7b5e1d88ed1ebb80141d73727a543448a6ad5a7f 100644
--- a/misc-scripts/xref_mapping/XrefParser/WormbaseDirectParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/WormbaseDirectParser.pm
@@ -28,151 +28,96 @@ use XrefParser::BaseParser;
 
 use base qw( XrefParser::BaseParser );
 
-
 sub run {
 
   my ($self, $ref_arg) = @_;
   my $source_id    = $ref_arg->{source_id};
   my $species_id   = $ref_arg->{species_id};
   my $files        = $ref_arg->{files};
-  my $verbose      = $ref_arg->{verbose};
 
   if((!defined $source_id) or (!defined $species_id) or (!defined $files)){
     croak "Need to pass source_id, species_id and files as pairs";
   }
-  $verbose |=0;
 
   my $file = @{$files}[0];
-
-  my $wormbasegene_src_id = $self->get_source_id_for_source_name('wormbase_gene');
-  my $wormbasegseq_src_id = $self->get_source_id_for_source_name('wormbase_gseqname');
-  my $wormbaselocus_src_id = $self->get_source_id_for_source_name('wormbase_locus');
-  my $wormbasetran_src_id = $self->get_source_id_for_source_name('wormbase_transcript');
-  my $wormpep_src_id = $self->get_source_id_for_source_name('wormpep_id');
-
-  my $xref_wgene_sth = $self->dbi()->prepare("SELECT xref_id FROM xref WHERE accession=? AND source_id=$wormbasegene_src_id AND species_id=$species_id");
-  my $xref_gseq_sth = $self->dbi()->prepare("SELECT xref_id FROM xref WHERE accession=? AND source_id=$wormbasegseq_src_id AND species_id=$species_id");
-  my $xref_wloc_sth = $self->dbi()->prepare("SELECT xref_id FROM xref WHERE accession=? AND source_id=$wormbaselocus_src_id AND species_id=$species_id");
-  my $xref_wtran_sth = $self->dbi()->prepare("SELECT xref_id FROM xref WHERE accession=? AND source_id=$wormbasetran_src_id AND species_id=$species_id");
-  my $xref_wpep_sth = $self->dbi()->prepare("SELECT xref_id FROM xref WHERE accession=? AND source_id=$wormpep_src_id AND species_id=$species_id");
-
-  my $pep_io = $self->get_filehandle($file);
-
-  if ( !defined $pep_io ) {
-    print STDERR "ERROR: Could not open $file\n";
-    return 1;    # 1 error
+  my @fields = qw/wormbase_gene wormbase_gseqname wormbase_locus wormbase_transcript wormbase_cds wormpep_id protein_id/;
+  my %src_ids;
+  my $sth = $self->dbi()->prepare("SELECT xref_id FROM xref WHERE accession=? AND source_id=? AND species_id=$species_id");
+  for my $field (@fields){
+    $src_ids{$field} = $self->get_source_id_for_source_name($field); 
   }
+  my $data = $self->get_data(@$files);
+  for my $gene_id (keys %$data){
+    $self->add_xref_and_direct_xref(
+      $sth, $species_id, "gene", $src_ids{wormbase_gene},
+      $gene_id,  $gene_id
+    );
+    $self->add_xref_and_direct_xref(
+      $sth, $species_id, "gene", $src_ids{wormbase_gseqname},
+      $gene_id, $data->{$gene_id}->{wormbase_gseqname}
+    );
+    $self->add_xref_and_direct_xref(
+      $sth, $species_id, "gene", $src_ids{wormbase_locus}, 
+      $gene_id, $data->{$gene_id}->{wormbase_locus}
+    );
+    for my $transcript (@{$data->{$gene_id}->{transcripts}}){
+      $self->add_xref_and_direct_xref(
+        $sth, $species_id, "transcript", $src_ids{wormbase_transcript}, 
+        $transcript->{transcript_id}, $transcript->{transcript_id}
+      );
+      $self->add_xref_and_direct_xref(
+        $sth, $species_id, "transcript", $src_ids{wormbase_cds},
+        $transcript->{wormbase_cds}, $transcript->{wormbase_cds}, $transcript->{transcript_id}
+      );
+      $self->add_xref_and_direct_xref(
+        $sth, $species_id, "translation", $src_ids{wormpep_id}, 
+        $transcript->{wormpep_id}, $transcript->{wormpep_id}, $transcript->{transcript_id}
+      );
+      $self->add_xref_and_direct_xref(
+        $sth, $species_id, "translation", $src_ids{protein_id}, 
+        $transcript->{protein_id}, $transcript->{protein_id}, $transcript->{transcript_id}
+      );
+    } 
+  }
+}
+sub get_data {
+  my ($self, $file) = @_;
+  my $pep_io = $self->get_filehandle($file) or croak "Could not open: $file";
 
-  my ($x_count, $d_count);
-
-  my (%wbgene2seqid, %wbgene2loc, %tran2wbtran, %tran2wpep);
+  my $data = {};
 
   while ( $_ = $pep_io->getline() ) {
     next if /^\/\//;
-    
-    my ($gseqid, $wbgeneid, $locus, $wbtranscript, $wormpep) = split(/\t/, $_);
-
-    # Each WBGeneid should have only one sequence name and (optionally) one locus name
-    $wbgene2seqid{$wbgeneid} = $gseqid;
-    $wbgene2loc{$wbgeneid} = $locus if $locus ne '.';
-
-    $tran2wbtran{$wbtranscript} = 1;
-    $tran2wpep{$wbtranscript} = $wormpep if $wormpep ne '.';
-
+    my ($gseqid, $wbgeneid, $locus, $wbtranscript, $wormpep, $insdc_parent, $insdc_locus_tag, $protein_id, $uniprot_id) = split(/\t/, $_);
+
+    $data->{$wbgeneid}->{transcripts} //=[];
+    push @{$data->{$wbgeneid}->{transcripts}}, {
+      transcript_id => $wbtranscript,
+      ($wormpep ne '.' && $wbtranscript =~ /^(.*?)(\.\d+)?$/ ? (wormbase_cds => $1 ) : ()),
+      ($wormpep ne '.' ? (wormpep_id => $wormpep) : ()),
+      ($protein_id ne '.' ? (protein_id => $protein_id) : ()),
+    };
+    $data->{$wbgeneid}->{wormbase_gseqname} = $gseqid;
+    $data->{$wbgeneid}->{wormbase_locus} =  $locus if $locus ne '.'; 
   }
   $pep_io->close();
-
-  foreach my $wbgid (keys %wbgene2seqid) {
-    # reuse or create xref
-    $xref_wgene_sth->execute($wbgid);
-    my $xref_id = ($xref_wgene_sth->fetchrow_array())[0];
-    if (!$xref_id) {
-      $xref_id = $self->add_xref({ acc        => $wbgid,
-                                   label      => $wbgid,
-                                   source_id  => $wormbasegene_src_id,
-                                   species_id => $species_id,
-                                   info_type  => "DIRECT"} );
-      $x_count++;
-    }
-    $self->add_direct_xref($xref_id, $wbgid, "gene", "");
-    $d_count++;
-    
-    my $gseqname = $wbgene2seqid{$wbgid};
-
-    $xref_gseq_sth->execute($wbgid);
-    $xref_id = ($xref_gseq_sth->fetchrow_array())[0];
-    if (not $xref_id) {
-      $xref_id = $self->add_xref({ acc        => $wbgid,
-                                   label      => $gseqname,
-                                   source_id  => $wormbasegseq_src_id,
-                                   species_id => $species_id,
-                                   info_type  => "DIRECT"} );
-      $x_count++;
-    }
-    $self->add_direct_xref($xref_id, $wbgid, "gene", "");
-    $d_count++;
-
-
-    if (exists $wbgene2loc{$wbgid}) {
-      my $loc_sym = $wbgene2loc{$wbgid};
-
-      $xref_wloc_sth->execute($wbgid);    
-      $xref_id = ($xref_wloc_sth->fetchrow_array())[0];
-      if (!$xref_id) {
-        $xref_id = $self->add_xref({ acc        => $wbgid,
-                                     label      => $loc_sym,
-                                     source_id  => $wormbaselocus_src_id,
-                                     species_id => $species_id,
-                                     info_type  => "DIRECT"} );
-        $x_count++;
-      }
-    }
-    
-    # and direct xref
-    $self->add_direct_xref($xref_id, $wbgid, "gene", "");
-    $d_count++;
-  }
-  
-
-  foreach my $tid (keys %tran2wbtran) {
-    $xref_wtran_sth->execute($tid);      
-    my $xref_id = ($xref_wtran_sth->fetchrow_array())[0];
-    if (!$xref_id) {
-      $xref_id = $self->add_xref({ acc        => $tid,
-                                   label      => $tid,
-                                   source_id  => $wormbasetran_src_id,
-                                   species_id => $species_id,
-                                   info_type  => "DIRECT"} );
-      $x_count++;
-    }
-    
-    # and direct xref
-    $self->add_direct_xref($xref_id, $tid, "transcript", "");
-    $d_count++;
-  }
-
-  foreach my $tid (keys %tran2wpep) {
-    my $wpep = $tran2wpep{$tid};
-
-    $xref_wpep_sth->execute($wpep);
-      
-    my $xref_id = ($xref_wpep_sth->fetchrow_array())[0];
-    if (!$xref_id) {
-      $xref_id = $self->add_xref({ acc        => $wpep,
-                                   label      => $wpep,
-                                   source_id  => $wormpep_src_id,
-                                   species_id => $species_id,
-                                   info_type  => "DIRECT"} );
-      $x_count++;
-    }
-
-    # and direct xref
-    $self->add_direct_xref($xref_id, $tid, "translation", "");
-    $d_count++;
-  }
-
-  print "Added $d_count direct xrefs and $x_count xrefs\n" if($verbose);
-  return 0;
+  return $data;
 }
 
+sub add_xref_and_direct_xref {
+  my ($self, $sth, $species_id, $object_type, $source_id,  $object_id, $label, $primary_id) = @_;
+  $primary_id //= $object_id;
+  return unless $label;
+  $sth->execute($primary_id, $source_id);
+  $self->add_direct_xref(
+      ($sth->fetchrow_array())[0]
+      || $self->add_xref({ 
+           acc => $object_id, 
+           label => $label, 
+           source_id => $source_id, 
+           species_id => $species_id,
+           info_type  => "DIRECT"
+      })
+  , $primary_id, $object_type, "");
+}
 1;
diff --git a/misc-scripts/xref_mapping/xref_config.ini b/misc-scripts/xref_mapping/xref_config.ini
index 69889a109487bb49b4fcd8204f121f1878ba6c5c..fe7c917c79d9953d6d4630ed68ac13435511716b 100644
--- a/misc-scripts/xref_mapping/xref_config.ini
+++ b/misc-scripts/xref_mapping/xref_config.ini
@@ -1652,6 +1652,14 @@ order           = 50
 priority        = 1
 parser          = comes from WormbaseDirectParser
 
+[source wormbase_cds::wormbase]
+# Used by wormbase core species
+name            = wormbase_cds
+download        = N
+order           = 50
+priority        = 1
+parser          = comes from WormbaseDirectParser
+
 [source Gramene_Pathway::arabidopsis_thaliana]
 # Used by Arabidopsis thaliana, Gramene-specific
 name            = Gramene_Pathway
diff --git a/modules/t/xref_parser.t b/modules/t/xref_parser.t
index 2c77b54ea78293c4083a393f618bd1af2df086fb..03f16c15bb263d87e35f3ebc97112b94fa5f4a9a 100644
--- a/modules/t/xref_parser.t
+++ b/modules/t/xref_parser.t
@@ -55,32 +55,37 @@ my %xref_tables_expected_empty_by_default = (
 );
 my $tmp_dir = tempdir(CLEANUP=>1);
 sub store_in_temporary_file {
-  my $path = "$tmp_dir/tmp";
+  my ($content, %opts) = @_;
+  my $path = join("/", $tmp_dir, $opts{tmp_file_name} || "tmp");
   open(my $fh, ">", $path) or die $path;  
-  print $fh @_;
+  print $fh $content;
   close($fh);
   return $path;
 }
+# Happens to match the species id of the core database
+my $SPECIES_ID = 1;
+my $SPECIES_NAME = "Homo sapiens";
 sub test_parser {
-  my ($parser, $content, $source_id, $expected, $test_name) = @_;
+  my ($parser, $content, $expected, $test_name, %opts) = @_;
   require_ok($parser);
   $parser->new($database)->run({
-   files => [store_in_temporary_file($content)],
-   source_id => $source_id,
-   species_id => 1 #Happens to be right, but doesn't matter anyway - we are not testing the mapping
+   files =>[store_in_temporary_file($content, %opts)],
+   source_id => "Source id (unused but sometimes required)",
+   species_id => $SPECIES_ID,
+   species => $SPECIES_NAME,
   });
   my $expected_table_counts = {%xref_tables_expected_empty_by_default, %$expected};
   subtest "$parser $test_name" => sub {
     plan tests => scalar(keys %$expected_table_counts);
     for my $table (keys %$expected_table_counts){
       my $actual_count = count_rows($dba, $table);
-      $dba->dbc->prepare("delete from $table;")->execute() if $actual_count;
+      $dba->dbc->prepare("delete from $table;")->execute() if ($actual_count and not $opts{skip_clean});
       my $expected_count = $expected_table_counts->{$table};
       is($actual_count, $expected_count, "$table has $expected_count rows") or diag "$table has $actual_count rows";
     }
   }
 }
-test_parser("XrefParser::WormbaseDirectParser", "", "source_id (unused)", {}, "null case");
+test_parser("XrefParser::WormbaseDirectParser", "",  {}, "null case");
 my $wormbase_celegans_xrefs_head= <<EOF;
 //
 // WormBase Caenorhabditis elegans XREFs for WS265
@@ -99,14 +104,264 @@ my $wormbase_celegans_xrefs_head= <<EOF;
 // Missing or not applicable data (e.g. protein identifiers for non-coding RNAs) is denoted by a "."
 //
 2L52.1	WBGene00007063	.	2L52.1b	CE50569	BX284602	CELE_2L52.1	CTQ86426	A0A0K3AWR5
-2L52.1	WBGene00007063	.	2L52.1a	CE32090	BX284602	CELE_2L52.1	CCD61130	A4F336
+2L52.1	WBGene00007063	.	2L52.1a.1	CE32090	BX284602	CELE_2L52.1	CCD61130	A4F336
+2L52.1	WBGene00007063	.	2L52.1a.2	CE32090	BX284602	CELE_2L52.1	CCD61130	A4F336
 2L52.2	WBGene00200402	.	2L52.2	.	BX284602	CELE_2L52.2	.	.
 EOF
-test_parser("XrefParser::WormbaseDirectParser", $wormbase_celegans_xrefs_head, "source_id (unused)", {
-xref=>9,
-gene_direct_xref => 6,
-transcript_direct_xref => 3,
-translation_direct_xref => 2,
-}, "Direct xrefs: genes: count currently off due to some questionable duplicates, transcripts: as in column 4, translations: as in column 5. At least one direct xref per xref (but should be one to one)");
+my $wormbase_celegans_xrefs_expected_count = {
+xref=>14,
+gene_direct_xref => 4,
+transcript_direct_xref => 7,
+translation_direct_xref => 6,
+};
+test_parser("XrefParser::WormbaseDirectParser", $wormbase_celegans_xrefs_head,  
+   $wormbase_celegans_xrefs_expected_count, "Direct xrefs: genes: columns 1,2,3, transcripts: column 4 once as transcript and also once as CDS for coding genes, translations: column 5 and 7. xrefs: sum of these minus one reused CDS and two reused proteins"
+);
+
+my $uniprot_elegans_record = <<EOF;
+ID   A0A0K3AWR5_CAEEL        Unreviewed;       220 AA.
+AC   A0A0K3AWR5;
+DT   11-NOV-2015, integrated into UniProtKB/TrEMBL.
+DT   11-NOV-2015, sequence version 1.
+DT   18-JUL-2018, entry version 14.
+DE   SubName: Full=Uncharacterized protein {ECO:0000313|EMBL:CTQ86426.1};
+GN   ORFNames=2L52.1 {ECO:0000313|EMBL:CTQ86426.1,
+GN   ECO:0000313|WormBase:2L52.1b},
+GN   CELE_2L52.1 {ECO:0000313|EMBL:CTQ86426.1};
+OS   $SPECIES_NAME.
+OC   Eukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida;
+OC   Rhabditoidea; Rhabditidae; Peloderinae; Caenorhabditis.
+OX   NCBI_TaxID=$SPECIES_ID {ECO:0000313|EMBL:CTQ86426.1, ECO:0000313|Proteomes:UP000001940};
+RN   [1] {ECO:0000313|EMBL:CTQ86426.1, ECO:0000313|Proteomes:UP000001940}
+RP   NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA].
+RC   STRAIN=Bristol N2 {ECO:0000313|EMBL:CTQ86426.1,
+RC   ECO:0000313|Proteomes:UP000001940};
+RX   PubMed=9851916; DOI=https://doi.org/10.1126/science.282.5396.2012;
+RG   The C. elegans sequencing consortium;
+RA   Sulson J.E., Waterston R.;
+RT   "Genome sequence of the nematode C. elegans: a platform for
+RT   investigating biology.";
+RL   Science 282:2012-2018(1998).
+CC   -----------------------------------------------------------------------
+CC   Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms
+CC   Distributed under the Creative Commons Attribution (CC BY 4.0) License
+CC   -----------------------------------------------------------------------
+DR   EMBL; BX284602; CTQ86426.1; -; Genomic_DNA.
+DR   RefSeq; NP_001300487.1; NM_001313558.1.
+DR   UniGene; Cel.25279; -.
+DR   EnsemblMetazoa; 2L52.1b; 2L52.1b; WBGene00007063.
+DR   GeneID; 181792; -.
+DR   CTD; 181792; -.
+DR   WormBase; 2L52.1b; CE50569; WBGene00007063; -.
+DR   Proteomes; UP000001940; Chromosome II.
+DR   ExpressionAtlas; A0A0K3AWR5; baseline and differential.
+PE   4: Predicted;
+KW   Complete proteome {ECO:0000313|Proteomes:UP000001940};
+KW   Reference proteome {ECO:0000313|Proteomes:UP000001940}.
+SQ   SEQUENCE   220 AA;  26028 MW;  E12D5EA7F6FFF373 CRC64;
+     MSDNEEVYVN FRGMNCISTG KSASMVPSKR RNWPKRVKKR LSTQRNNQKT IRPPELNKNN
+     IEIKDMNSNN LEERNREECI QPVSVEKNIL HFEKFKSNQI CIVRENNKFR EGTRRRRKNS
+     GESEDLKIHE NFTEKRRPIR SCKQNISFYE MDGDIEEFEV FFDTPTKSKK VLLDIYSAKK
+     MPKIEVEDSL VNKFHSKRPS RACRVLGSME EVPFDVEIGY
+//
+EOF
+test_parser("XrefParser::UniProtParser", $uniprot_elegans_record,  {
+  xref => 3,
+  primary_xref => 1,
+  dependent_xref => 2,
+},"Example UniProt record"); 
+(my $uniprot_elegans_record_embl = $uniprot_elegans_record) =~ s/DR   EMBL;.*?\n//;
+test_parser("XrefParser::UniProtParser", $uniprot_elegans_record_embl,  {
+  xref => 1,
+  primary_xref => 1,
+},"EMBL entries are the dependent xrefs");
+my @recognised_sources = (
+ "PDB; 3HRI; X-ray; 2.85 A; A/B/C/D/E/F=44-477.",
+ "MEROPS; C26.956; -.",
+);
+for my $l (@recognised_sources) {
+  (my $uniprot_elegans_record_extra_line = $uniprot_elegans_record) =~ s/DR(.*?)\n/DR$1\nDR  $l/;
+  test_parser("XrefParser::UniProtParser", $uniprot_elegans_record_extra_line,  {
+    xref => 4,
+    primary_xref => 1,
+    dependent_xref => 3,
+  }, "Pick up as extra xref + dependent xref: $l" );
+}
+sub test_elegans_uniprot {
+  my ($expected_count, $extra_line) = @_;
+  my $uniprot_elegans_record_here = $uniprot_elegans_record;
+  $uniprot_elegans_record_here =~ s/DR(.*?)\n/DR$1\nDR  $extra_line/ if $extra_line;
+  test_parser("XrefParser::WormbaseCElegansUniProtParser", 
+      $uniprot_elegans_record_here,
+     {}, 
+     "No UniProt entries without corresponding INSDC entries $extra_line"
+  );
+  test_parser("XrefParser::WormbaseDirectParser",
+    $wormbase_celegans_xrefs_head,
+    $wormbase_celegans_xrefs_expected_count,
+    "Test again to set up the next test",
+    skip_clean => 1,
+  );
+  test_parser("XrefParser::WormbaseCElegansUniProtParser",
+    $uniprot_elegans_record_here,
+    $expected_count,
+    "Correct xrefs and dependent xrefs $extra_line",
+  );
+}
+my $wormbase_and_uniprot_expected_count = {
+  %$wormbase_celegans_xrefs_expected_count,
+  xref => $wormbase_celegans_xrefs_expected_count->{xref}+1,
+  dependent_xref => 1 #protein id still there, no parent sequence ID
+};
+test_elegans_uniprot($wormbase_and_uniprot_expected_count, "");
+for my $l (@recognised_sources) {
+  test_elegans_uniprot({
+    %$wormbase_and_uniprot_expected_count,
+    xref => $wormbase_and_uniprot_expected_count->{xref}+1,
+    dependent_xref => $wormbase_and_uniprot_expected_count->{dependent_xref}+1,
+  },$l);
+}
+test_elegans_uniprot({
+  %$wormbase_and_uniprot_expected_count,
+  dependent_xref => $wormbase_and_uniprot_expected_count->{dependent_xref}+1,
+  }, "EMBL; BX284602; CCD61130.1; -; Genomic_DNA",
+);
+
+my $refseq_protein_elegans_record = <<EOF;
+LOCUS       NP_493629                427 aa            linear   INV 19-AUG-2018
+DEFINITION  Uncharacterized protein CELE_2L52.1 [Caenorhabditis elegans].
+ACCESSION   NP_493629
+VERSION     NP_493629.2
+DBLINK      BioProject: PRJNA158
+            BioSample: SAMEA3138177
+DBSOURCE    REFSEQ: accession NM_061228.2
+KEYWORDS    RefSeq.
+SOURCE      Caenorhabditis elegans
+  ORGANISM  Caenorhabditis elegans
+            Eukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida;
+            Rhabditoidea; Rhabditidae; Peloderinae; Caenorhabditis.
+REFERENCE   
+  <snipped>
+COMMENT     REVIEWED REFSEQ: This record has been curated by WormBase. The
+            reference sequence is identical to CCD61130.
+FEATURES             Location/Qualifiers
+     source          1..427
+                     /organism="Caenorhabditis elegans"
+                     /strain="Bristol N2"
+                     /db_xref="taxon:$SPECIES_ID"
+                     /chromosome="II"
+     Protein         1..427
+                     /product="hypothetical protein"
+                     /calculated_mol_wt=49887
+     CDS             1..427
+                     /gene="2L52.1"
+                     /locus_tag="CELE_2L52.1"
+                     /standard_name="2L52.1a"
+                     /coded_by="NM_061228.2:1..1284"
+                     /note="Confirmed by transcript evidence"
+                     /db_xref="EnsemblGenomes-Gn:WBGene00007063"
+                     /db_xref="EnsemblGenomes-Tr:2L52.1a"
+                     /db_xref="GeneID:181792"
+                     /db_xref="GOA:A4F336"
+                     /db_xref="InterPro:IPR013087"
+                     /db_xref="UniProtKB/TrEMBL:A4F336"
+                     /db_xref="WormBase:WBGene00007063"
+ORIGIN      
+        1 msmvrnvsnq sekleilsck wvgclkstev fktveklldh vtadhipevi vnddgseevv
+       61 cqwdccemga srgnlqkkke wmenhfktrh vrkakifkcl iedcpvvkss sqeiethlri
+      121 shpinpkker lkefksstdh ieptqanrvw tivngevqwk tpprvkkktv iyyddgpryv
+      181 fptgcarcny dsdeselesd efwsatemsd neevyvnfrg mncistgksa smvpskrrnw
+      241 pkrvkkrlst qrnnqktirp pelnknniei kdmnsnnlee rnreeciqpv sveknilhfe
+      301 kfksnqiciv rennkfregt rrrrknsges edlkihenft ekrrpirsck qnisfyemdg
+      361 dieefevffd tptkskkvll diysakkmpk ievedslvnk fhskrpsrac rvlgsmeevp
+      421 fdveigy
+//
+EOF
+
+my $refseq_mrna_elegans_record = <<EOF;
+LOCUS       NM_001313558             663 bp    mRNA    linear   INV 19-AUG-2018
+DEFINITION  Caenorhabditis elegans Uncharacterized protein (2L52.1), partial
+            mRNA.
+ACCESSION   NM_001313558
+VERSION     NM_001313558.1
+DBLINK      BioProject: PRJNA158
+            BioSample: SAMEA3138177
+KEYWORDS    RefSeq.
+SOURCE      Caenorhabditis elegans
+  ORGANISM  Caenorhabditis elegans
+            Eukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida;
+            Rhabditoidea; Rhabditidae; Peloderinae; Caenorhabditis.
+REFERENCE   1  (bases 1 to 663)
+  <snipped>
+COMMENT     REVIEWED REFSEQ: This record has been curated by WormBase. This
+            record is derived from an annotated genomic sequence (NC_003280).
+            COMPLETENESS: incomplete on both ends.
+FEATURES             Location/Qualifiers
+     source          1..663
+                     /organism="Caenorhabditis elegans"
+                     /mol_type="mRNA"
+                     /strain="Bristol N2"
+                     /db_xref="taxon:$SPECIES_ID"
+                     /chromosome="II"
+     gene            <1..>663
+                     /gene="2L52.1"
+                     /locus_tag="CELE_2L52.1"
+                     /db_xref="GeneID:181792"
+                     /db_xref="WormBase:WBGene00007063"
+     CDS             1..663
+                     /gene="2L52.1"
+                     /locus_tag="CELE_2L52.1"
+                     /standard_name="2L52.1a"
+                     /note="Confirmed by transcript evidence"
+                     /codon_start=1
+                     /product="hypothetical protein"
+                     /protein_id="NP_001300487.1"
+                     /db_xref="EnsemblGenomes-Gn:WBGene00007063"
+                     /db_xref="EnsemblGenomes-Tr:2L52.1b"
+                     /db_xref="GeneID:181792"
+                     /db_xref="UniProtKB/TrEMBL:A0A0K3AWR5"
+                     /db_xref="WormBase:WBGene00007063"
+                     /translation="MSDNEEVYVNFRGMNCISTGKSASMVPSKRRNWPKRVKKRLSTQ
+                     RNNQKTIRPPELNKNNIEIKDMNSNNLEERNREECIQPVSVEKNILHFEKFKSNQICI
+                     VRENNKFREGTRRRRKNSGESEDLKIHENFTEKRRPIRSCKQNISFYEMDGDIEEFEV
+                     FFDTPTKSKKVLLDIYSAKKMPKIEVEDSLVNKFHSKRPSRACRVLGSMEEVPFDVEI
+                     GY"
+ORIGIN      
+        1 atgtcagata atgaagaagt atatgtgaac ttccgtggaa tgaactgtat ctcaacagga
+       61 aagtcggcca gtatggtccc gagcaaacga agaaattggc caaaaagagt gaagaaaagg
+      121 ctatcgacac aaagaaacaa tcagaaaact attcgaccac cagagctgaa taaaaataat
+      181 atagagataa aagatatgaa ctcaaataac cttgaagaac gcaacagaga agaatgcatt
+      241 cagcctgttt ctgttgaaaa gaacatcctg cattttgaaa aattcaaatc aaatcaaatt
+      301 tgcattgttc gggaaaacaa taaatttaga gaaggaacga gaagacgcag aaagaattct
+      361 ggtgaatcgg aagacttgaa aattcatgaa aactttactg aaaaacgaag acccattcga
+      421 tcatgcaaac aaaatataag tttctatgaa atggacgggg atatagaaga atttgaagtg
+      481 tttttcgata ctcccacaaa aagcaaaaaa gtacttctgg atatctacag tgcgaagaaa
+      541 atgccaaaaa ttgaggttga agattcatta gttaataagt ttcattcaaa acgtccatca
+      601 agagcatgtc gagttcttgg aagtatggaa gaagtaccat ttgatgtgga aataggatat
+      661 tga
+//
+EOF
+sub test_refseq {
+  my ($record, $type) = @_;
+  my $file = "something_that_says_$type";
+  test_parser("XrefParser::RefSeqGPFFParser",$record, {
+    xref =>1,
+    primary_xref => 1,
+  }, "$type: Example record" , tmp_file_name => $file);
+  test_parser("XrefParser::WormbaseCElegansRefSeqGPFFParser",$record, {
+  }, "$type no entries without WormBase records" , tmp_file_name => $file);
+
+  test_parser("XrefParser::WormbaseDirectParser", $wormbase_celegans_xrefs_head,  
+      $wormbase_celegans_xrefs_expected_count, "$type test again to set up the next test",
+  skip_clean => 1);
+  test_parser("XrefParser::WormbaseCElegansRefSeqGPFFParser",$record,  {
+    %$wormbase_celegans_xrefs_expected_count,
+    xref => $wormbase_celegans_xrefs_expected_count->{xref}+1,
+    dependent_xref => 1,
+  }, "$type RefSeq entries hang off INSDC entries", tmp_file_name => $file);
+}
+test_refseq($refseq_protein_elegans_record, "protein");
+test_refseq($refseq_mrna_elegans_record, "mrna");
 done_testing();
 
+