diff --git a/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm b/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm
index 71d793818afd551b3dbcca71dfef1d304ad0f91f..299cb52e224c5b315f13a3e854f797a2365a5903 100644
--- a/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm
@@ -185,9 +185,40 @@ sub create_xrefs {
 
   while ( $_ = $refseq_io->getline() ) {
 
-    my $xref;
+    my $xref = $self->xref_from_record(
+      $_,
+      \%name2species_id, \%taxonomy2species_id, 
+      $pred_mrna_source_id, $pred_ncrna_source_id,
+      $mrna_source_id, $ncrna_source_id,
+      $pred_peptide_source_id, $peptide_source_id,
+      $entrez_source_id, $wiki_source_id, $add_dependent_xref_sth,
+      $species_id, $type, \%refseq_ids,\%entrez_ids,\%wiki_ids
+     );
+
+      push @xrefs, $xref if $xref;
 
-    my $entry = $_;
+  } # while <REFSEQ>
+
+  $refseq_io->close();
+
+  print "Read " . scalar(@xrefs) ." xrefs from $file\n" if($verbose);
+
+  return \@xrefs;
+
+}
+sub xref_from_record {
+    my ( $self, $entry, $name2species_id, $taxonomy2species_id,
+      $pred_mrna_source_id, $pred_ncrna_source_id,
+      $mrna_source_id, $ncrna_source_id,
+      $pred_peptide_source_id, $peptide_source_id,
+      $entrez_source_id, $wiki_source_id, $add_dependent_xref_sth,
+      $species_id, $type, $refseq_ids,$entrez_ids,$wiki_ids
+) = @_;
+    my %name2species_id = %$name2species_id;
+    my %taxonomy2species_id = %$taxonomy2species_id;
+    my %refseq_ids = %$refseq_ids;
+    my %entrez_ids = %$entrez_ids;
+    my %wiki_ids = %$wiki_ids;
     chomp $entry;
 
     my ($species) = $entry =~ /\s+ORGANISM\s+(.*)\n/;
@@ -209,6 +240,7 @@ sub create_xrefs {
         && defined $species_id_check
         && $species_id == $species_id_check )
     {
+      my $xref = {};
       my ($acc) = $entry =~ /ACCESSION\s+(\S+)/;
       my ($ver) = $entry =~ /VERSION\s+(\S+)/;
       my ($refseq_pair) = $entry =~ /DBSOURCE\s+REFSEQ: accession (\S+)/;
@@ -328,19 +360,8 @@ sub create_xrefs {
       # Don't add SGD Xrefs, as they are mapped directly from SGD ftp site
 
       # Refseq's do not tell whether the mim is for the gene of morbid so ignore for now.
-
-      push @xrefs, $xref;
-
-    }# if defined species
-
-  } # while <REFSEQ>
-
-  $refseq_io->close();
-
-  print "Read " . scalar(@xrefs) ." xrefs from $file\n" if($verbose);
-
-  return \@xrefs;
-
+      return $xref;
+  }
 }
 
 # --------------------------------------------------------------------------------
diff --git a/misc-scripts/xref_mapping/XrefParser/WormbaseCElegansBase.pm b/misc-scripts/xref_mapping/XrefParser/WormbaseCElegansBase.pm
new file mode 100644
index 0000000000000000000000000000000000000000..8339c51a6a3467328a5043a58f10f9d1ead71b01
--- /dev/null
+++ b/misc-scripts/xref_mapping/XrefParser/WormbaseCElegansBase.pm
@@ -0,0 +1,44 @@
+=head1 LICENSE
+
+Copyright [2018] EMBL-European Bioinformatics Institute
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+=cut
+
+package XrefParser::WormbaseCElegansBase;
+
+sub swap_dependency {
+  my ($self, $source_id, $dbi, $xref, @source_ids_skip) = @_;
+
+  my @matching_source_id_dependents;
+  my @other_dependents;
+  for my $dependent_xref (@{$xref->{DEPENDENT_XREFS} || []}){
+     my $source_id_here = $dependent_xref->{SOURCE_ID};
+     if($source_id_here eq $source_id
+         and $self->get_xref($dependent_xref->{ACCESSION}, $dependent_xref->{SOURCE_ID}, $xref->{SPECIES_ID})){
+         $dependent_xref->{SPECIES_ID} = $xref->{SPECIES_ID};
+         push @matching_source_id_dependents, $dependent_xref;
+     } elsif (grep {$_ == $source_id_here} @source_ids_skip){
+       #skip
+     } else {
+         push @other_dependents, $dependent_xref;
+     }
+  }
+  return map {{%$_, LABEL=>undef, INFO_TYPE => "MISC", DEPENDENT_XREFS => [{
+        %$xref,
+        INFO_TYPE => "DEPENDENT",
+        LINKAGE_SOURCE_ID => $source_id,
+     }, map {{%$_,INFO_TYPE => "DEPENDENT", LINKAGE_SOURCE_ID => $source_id}} @other_dependents]}} @matching_source_id_dependents;
+}
+1;
diff --git a/misc-scripts/xref_mapping/XrefParser/WormbaseCElegansRefSeqGPFFParser.pm b/misc-scripts/xref_mapping/XrefParser/WormbaseCElegansRefSeqGPFFParser.pm
new file mode 100644
index 0000000000000000000000000000000000000000..7cae198f10f23a557c8df1a2a3739234d532fd0d
--- /dev/null
+++ b/misc-scripts/xref_mapping/XrefParser/WormbaseCElegansRefSeqGPFFParser.pm
@@ -0,0 +1,49 @@
+=head1 LICENSE
+
+Copyright [2018] EMBL-European Bioinformatics Institute
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+=cut
+
+package XrefParser::WormbaseCElegansRefSeqGPFFParser;
+
+use parent XrefParser::WormbaseCElegansBase, XrefParser::RefSeqGPFFParser;
+
+my $source_id;
+
+sub upload_xref_object_graphs {
+  my ($self, $xrefs, $dbi) = @_;
+  $source_id //= $self->get_source_id_for_source_name('protein_id'); 
+  my @adapted_xrefs;
+  for my $xref ( @$xrefs) {
+    push @adapted_xrefs, $self->swap_dependency($source_id, $dbi, $xref);
+  }  
+  return $self->SUPER::upload_xref_object_graphs(\@adapted_xrefs, $dbi);
+}
+sub xref_from_record {
+   my ($self, $entry, @args) = @_;
+   
+   my $xref = $self->SUPER::xref_from_record($entry, @args);
+   $source_id //= $self->get_source_id_for_source_name('protein_id'); 
+   $entry =~ /This record has been curated by WormBase. The\s+reference sequence is identical to (.*?)\./;
+   my $insdc_protein_id = $1;
+   if($insdc_protein_id) {
+     $xref->{DEPENDENT_XREFS} //= [];
+     push @{$xref->{DEPENDENT_XREFS}}, {ACCESSION => $insdc_protein_id, SOURCE_ID=>$source_id};
+     return $xref;
+   } else {
+     return undef;
+   }
+}
+1;
diff --git a/misc-scripts/xref_mapping/XrefParser/WormbaseCElegansUniProtParser.pm b/misc-scripts/xref_mapping/XrefParser/WormbaseCElegansUniProtParser.pm
new file mode 100644
index 0000000000000000000000000000000000000000..0652bbc6ea6aa0d0ee24233871795c5f3b551abd
--- /dev/null
+++ b/misc-scripts/xref_mapping/XrefParser/WormbaseCElegansUniProtParser.pm
@@ -0,0 +1,43 @@
+=head1 LICENSE
+
+Copyright [2018] EMBL-European Bioinformatics Institute
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+=cut
+
+package XrefParser::WormbaseCElegansUniProtParser;
+
+# UniProt xrefs are sometimes - really - dependent xrefs of
+# INSDC entries which we get from somewhere else
+# Attempt to find the parent (has to already be present in the xref table)
+# INSDC and UniProt entries have the same protein sequence, and
+# UniProt lists INSDC as a parent. We get INSDC entries from somewhere else,
+# so make UniProt entries dependent on INSDC entries.
+# Note:
+# INSDC entries have coordinates, and UniProt entries don't.
+# So for perfect homologs, there can be many INSDC entries per UniProt.
+
+use parent XrefParser::WormbaseCElegansBase, XrefParser::UniProtParser;
+
+sub upload_xref_object_graphs {
+  my ($self, $xrefs, $dbi) = @_;
+  my $source_id = $self->get_source_id_for_source_name('protein_id'); 
+  my $source_id_skip = $self->get_source_id_for_source_name('EMBL'); 
+  my @adapted_xrefs;
+  for my $xref ( @$xrefs) {
+    push @adapted_xrefs, $self->swap_dependency($source_id, $dbi, $xref, $source_id_skip);
+  }  
+  return $self->SUPER::upload_xref_object_graphs(\@adapted_xrefs, $dbi);
+}
+1;
diff --git a/misc-scripts/xref_mapping/XrefParser/WormbaseDirectParser.pm b/misc-scripts/xref_mapping/XrefParser/WormbaseDirectParser.pm
index 0d570d862f638facab43264116f0f2fa0bdc0817..92ea687e517c2d4e92df7f3f0f2b28dc7bbd238d 100644
--- a/misc-scripts/xref_mapping/XrefParser/WormbaseDirectParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/WormbaseDirectParser.pm
@@ -28,34 +28,56 @@ use XrefParser::BaseParser;
 
 use base qw( XrefParser::BaseParser );
 
-
 sub run {
 
   my ($self, $ref_arg) = @_;
   my $source_id    = $ref_arg->{source_id};
   my $species_id   = $ref_arg->{species_id};
   my $files        = $ref_arg->{files};
-  my $verbose      = $ref_arg->{verbose};
 
   if((!defined $source_id) or (!defined $species_id) or (!defined $files)){
     croak "Need to pass source_id, species_id and files as pairs";
   }
-  $verbose |=0;
 
   my $file = @{$files}[0];
-
-  my $wormbasegene_src_id = $self->get_source_id_for_source_name('wormbase_gene');
-  my $wormbasegseq_src_id = $self->get_source_id_for_source_name('wormbase_gseqname');
-  my $wormbaselocus_src_id = $self->get_source_id_for_source_name('wormbase_locus');
-  my $wormbasetran_src_id = $self->get_source_id_for_source_name('wormbase_transcript');
-  my $wormpep_src_id = $self->get_source_id_for_source_name('wormpep_id');
-
-  my $xref_wgene_sth = $self->dbi()->prepare("SELECT xref_id FROM xref WHERE accession=? AND source_id=$wormbasegene_src_id AND species_id=$species_id");
-  my $xref_gseq_sth = $self->dbi()->prepare("SELECT xref_id FROM xref WHERE accession=? AND source_id=$wormbasegseq_src_id AND species_id=$species_id");
-  my $xref_wloc_sth = $self->dbi()->prepare("SELECT xref_id FROM xref WHERE accession=? AND source_id=$wormbaselocus_src_id AND species_id=$species_id");
-  my $xref_wtran_sth = $self->dbi()->prepare("SELECT xref_id FROM xref WHERE accession=? AND source_id=$wormbasetran_src_id AND species_id=$species_id");
-  my $xref_wpep_sth = $self->dbi()->prepare("SELECT xref_id FROM xref WHERE accession=? AND source_id=$wormpep_src_id AND species_id=$species_id");
-
+  my @fields = qw/wormbase_gene wormbase_gseqname wormbase_locus wormbase_transcript wormpep_id protein_id/;
+  my %src_ids;
+  my $sth = $self->dbi()->prepare("SELECT xref_id FROM xref WHERE accession=? AND source_id=? AND species_id=$species_id");
+  for my $field (@fields){
+    $src_ids{$field} = $self->get_source_id_for_source_name($field); 
+  }
+  my $data = $self->get_data(@$files);
+  for my $gene_id (keys %$data){
+    $self->add_xref_and_direct_xref(
+      $sth, $species_id, "gene", $src_ids{wormbase_gene},
+      $gene_id,  $gene_id
+    );
+    $self->add_xref_and_direct_xref(
+      $sth, $species_id, "gene", $src_ids{wormbase_gseqname},
+      $gene_id, $data->{$gene_id}->{wormbase_gseqname}
+    );
+    $self->add_xref_and_direct_xref(
+      $sth, $species_id, "gene", $src_ids{wormbase_locus}, 
+      $gene_id, $data->{$gene_id}->{wormbase_locus}
+    );
+    for my $transcript (@{$data->{$gene_id}->{transcripts}}){
+      $self->add_xref_and_direct_xref(
+        $sth, $species_id, "transcript", $src_ids{wormbase_transcript}, 
+        $transcript->{transcript_id}, $transcript->{transcript_id}
+      );
+      $self->add_xref_and_direct_xref(
+        $sth, $species_id, "translation", $src_ids{wormpep_id}, 
+        $transcript->{wormpep_id}, $transcript->{wormpep_id}, $transcript->{transcript_id}
+      );
+      $self->add_xref_and_direct_xref(
+        $sth, $species_id, "translation", $src_ids{protein_id}, 
+        $transcript->{protein_id}, $transcript->{protein_id}, $transcript->{transcript_id}
+      );
+    } 
+  }
+}
+sub get_data {
+  my ($self, $file) = @_;
   my $pep_io = $self->get_filehandle($file);
 
   if ( !defined $pep_io ) {
@@ -63,116 +85,38 @@ sub run {
     return 1;    # 1 error
   }
 
-  my ($x_count, $d_count);
-
-  my (%wbgene2seqid, %wbgene2loc, %tran2wbtran, %tran2wpep);
+  my $data = {};
 
   while ( $_ = $pep_io->getline() ) {
     next if /^\/\//;
-    
-    my ($gseqid, $wbgeneid, $locus, $wbtranscript, $wormpep) = split(/\t/, $_);
-
-    # Each WBGeneid should have only one sequence name and (optionally) one locus name
-    $wbgene2seqid{$wbgeneid} = $gseqid;
-    $wbgene2loc{$wbgeneid} = $locus if $locus ne '.';
-
-    $tran2wbtran{$wbtranscript} = 1;
-    $tran2wpep{$wbtranscript} = $wormpep if $wormpep ne '.';
-
+    my ($gseqid, $wbgeneid, $locus, $wbtranscript, $wormpep, $insdc_parent, $insdc_locus_tag, $protein_id, $uniprot_id) = split(/\t/, $_);
+    $data->{$wbgeneid}->{transcripts} //=[];
+    push @{$data->{$wbgeneid}->{transcripts}}, {
+      transcript_id => $wbtranscript,
+      ($wormpep eq '.' ? () : (wormpep_id => $wormpep)),
+      ($protein_id eq '.' ? () : (protein_id => $protein_id)),
+    };
+    $data->{$wbgeneid}->{wormbase_gseqname} = $gseqid;
+    $data->{$wbgeneid}->{wormbase_locus} =  $locus if $locus ne '.'; 
   }
   $pep_io->close();
-
-  foreach my $wbgid (keys %wbgene2seqid) {
-    # reuse or create xref
-    $xref_wgene_sth->execute($wbgid);
-    my $xref_id = ($xref_wgene_sth->fetchrow_array())[0];
-    if (!$xref_id) {
-      $xref_id = $self->add_xref({ acc        => $wbgid,
-                                   label      => $wbgid,
-                                   source_id  => $wormbasegene_src_id,
-                                   species_id => $species_id,
-                                   info_type  => "DIRECT"} );
-      $x_count++;
-    }
-    $self->add_direct_xref($xref_id, $wbgid, "gene", "");
-    $d_count++;
-    
-    my $gseqname = $wbgene2seqid{$wbgid};
-
-    $xref_gseq_sth->execute($wbgid);
-    $xref_id = ($xref_gseq_sth->fetchrow_array())[0];
-    if (not $xref_id) {
-      $xref_id = $self->add_xref({ acc        => $wbgid,
-                                   label      => $gseqname,
-                                   source_id  => $wormbasegseq_src_id,
-                                   species_id => $species_id,
-                                   info_type  => "DIRECT"} );
-      $x_count++;
-    }
-    $self->add_direct_xref($xref_id, $wbgid, "gene", "");
-    $d_count++;
-
-
-    if (exists $wbgene2loc{$wbgid}) {
-      my $loc_sym = $wbgene2loc{$wbgid};
-
-      $xref_wloc_sth->execute($wbgid);    
-      $xref_id = ($xref_wloc_sth->fetchrow_array())[0];
-      if (!$xref_id) {
-        $xref_id = $self->add_xref({ acc        => $wbgid,
-                                     label      => $loc_sym,
-                                     source_id  => $wormbaselocus_src_id,
-                                     species_id => $species_id,
-                                     info_type  => "DIRECT"} );
-        $x_count++;
-      }
-    }
-    
-    # and direct xref
-    $self->add_direct_xref($xref_id, $wbgid, "gene", "");
-    $d_count++;
-  }
-  
-
-  foreach my $tid (keys %tran2wbtran) {
-    $xref_wtran_sth->execute($tid);      
-    my $xref_id = ($xref_wtran_sth->fetchrow_array())[0];
-    if (!$xref_id) {
-      $xref_id = $self->add_xref({ acc        => $tid,
-                                   label      => $tid,
-                                   source_id  => $wormbasetran_src_id,
-                                   species_id => $species_id,
-                                   info_type  => "DIRECT"} );
-      $x_count++;
-    }
-    
-    # and direct xref
-    $self->add_direct_xref($xref_id, $tid, "transcript", "");
-    $d_count++;
-  }
-
-  foreach my $tid (keys %tran2wpep) {
-    my $wpep = $tran2wpep{$tid};
-
-    $xref_wpep_sth->execute($wpep);
-      
-    my $xref_id = ($xref_wpep_sth->fetchrow_array())[0];
-    if (!$xref_id) {
-      $xref_id = $self->add_xref({ acc        => $wpep,
-                                   label      => $wpep,
-                                   source_id  => $wormpep_src_id,
-                                   species_id => $species_id,
-                                   info_type  => "DIRECT"} );
-      $x_count++;
-    }
-
-    # and direct xref
-    $self->add_direct_xref($xref_id, $tid, "translation", "");
-    $d_count++;
-  }
-
-  print "Added $d_count direct xrefs and $x_count xrefs\n" if($verbose);
-  return 0;
+  return $data;
 }
 
+sub add_xref_and_direct_xref {
+  my ($self, $sth, $species_id, $object_type, $source_id,  $object_id, $label, $primary_id) = @_;
+  $primary_id //= $object_id;
+  return unless $label;
+  $sth->execute($primary_id, $source_id);
+  $self->add_direct_xref(
+      ($sth->fetchrow_array())[0]
+      || $self->add_xref({ 
+           acc => $object_id, 
+           label => $label, 
+           source_id => $source_id, 
+           species_id => $species_id,
+           info_type  => "DIRECT"
+      })
+  , $primary_id, $object_type, "");
+}
 1;
diff --git a/modules/t/xref_parser.t b/modules/t/xref_parser.t
index 2c77b54ea78293c4083a393f618bd1af2df086fb..95ad36a12780694214f6ba1cb042aefa23a4518d 100644
--- a/modules/t/xref_parser.t
+++ b/modules/t/xref_parser.t
@@ -55,32 +55,37 @@ my %xref_tables_expected_empty_by_default = (
 );
 my $tmp_dir = tempdir(CLEANUP=>1);
 sub store_in_temporary_file {
-  my $path = "$tmp_dir/tmp";
+  my ($content, %opts) = @_;
+  my $path = join("/", $tmp_dir, $opts{tmp_file_name} || "tmp");
   open(my $fh, ">", $path) or die $path;  
-  print $fh @_;
+  print $fh $content;
   close($fh);
   return $path;
 }
+# Happens to match the species id of the core database
+my $SPECIES_ID = 1;
+my $SPECIES_NAME = "Homo sapiens";
 sub test_parser {
-  my ($parser, $content, $source_id, $expected, $test_name) = @_;
+  my ($parser, $content, $expected, $test_name, %opts) = @_;
   require_ok($parser);
   $parser->new($database)->run({
-   files => [store_in_temporary_file($content)],
-   source_id => $source_id,
-   species_id => 1 #Happens to be right, but doesn't matter anyway - we are not testing the mapping
+   files => [store_in_temporary_file($content, %opts)],
+   source_id => "Source id (unused but sometimes required)",
+   species_id => $SPECIES_ID,
+   species => $SPECIES_NAME,
   });
   my $expected_table_counts = {%xref_tables_expected_empty_by_default, %$expected};
   subtest "$parser $test_name" => sub {
     plan tests => scalar(keys %$expected_table_counts);
     for my $table (keys %$expected_table_counts){
       my $actual_count = count_rows($dba, $table);
-      $dba->dbc->prepare("delete from $table;")->execute() if $actual_count;
+      $dba->dbc->prepare("delete from $table;")->execute() if ($actual_count and not $opts{skip_clean});
       my $expected_count = $expected_table_counts->{$table};
       is($actual_count, $expected_count, "$table has $expected_count rows") or diag "$table has $actual_count rows";
     }
   }
 }
-test_parser("XrefParser::WormbaseDirectParser", "", "source_id (unused)", {}, "null case");
+test_parser("XrefParser::WormbaseDirectParser", "",  {}, "null case");
 my $wormbase_celegans_xrefs_head= <<EOF;
 //
 // WormBase Caenorhabditis elegans XREFs for WS265
@@ -102,11 +107,175 @@ my $wormbase_celegans_xrefs_head= <<EOF;
 2L52.1	WBGene00007063	.	2L52.1a	CE32090	BX284602	CELE_2L52.1	CCD61130	A4F336
 2L52.2	WBGene00200402	.	2L52.2	.	BX284602	CELE_2L52.2	.	.
 EOF
-test_parser("XrefParser::WormbaseDirectParser", $wormbase_celegans_xrefs_head, "source_id (unused)", {
-xref=>9,
-gene_direct_xref => 6,
+my $wormbase_celegans_xrefs_expected_count = {
+xref=>11,
+gene_direct_xref => 4,
 transcript_direct_xref => 3,
-translation_direct_xref => 2,
-}, "Direct xrefs: genes: count currently off due to some questionable duplicates, transcripts: as in column 4, translations: as in column 5. At least one direct xref per xref (but should be one to one)");
+translation_direct_xref => 4,
+};
+test_parser("XrefParser::WormbaseDirectParser", $wormbase_celegans_xrefs_head,  
+   $wormbase_celegans_xrefs_expected_count, "Direct xrefs: genes: columns 1,2,3, transcripts: column 4, translations: column 5 and 7. xrefs: sum of these "
+);
+
+my $uniprot_elegans_record = <<EOF;
+ID   A0A0K3AWR5_CAEEL        Unreviewed;       220 AA.
+AC   A0A0K3AWR5;
+DT   11-NOV-2015, integrated into UniProtKB/TrEMBL.
+DT   11-NOV-2015, sequence version 1.
+DT   18-JUL-2018, entry version 14.
+DE   SubName: Full=Uncharacterized protein {ECO:0000313|EMBL:CTQ86426.1};
+GN   ORFNames=2L52.1 {ECO:0000313|EMBL:CTQ86426.1,
+GN   ECO:0000313|WormBase:2L52.1b},
+GN   CELE_2L52.1 {ECO:0000313|EMBL:CTQ86426.1};
+OS   $SPECIES_NAME.
+OC   Eukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida;
+OC   Rhabditoidea; Rhabditidae; Peloderinae; Caenorhabditis.
+OX   NCBI_TaxID=$SPECIES_ID {ECO:0000313|EMBL:CTQ86426.1, ECO:0000313|Proteomes:UP000001940};
+RN   [1] {ECO:0000313|EMBL:CTQ86426.1, ECO:0000313|Proteomes:UP000001940}
+RP   NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA].
+RC   STRAIN=Bristol N2 {ECO:0000313|EMBL:CTQ86426.1,
+RC   ECO:0000313|Proteomes:UP000001940};
+RX   PubMed=9851916; DOI=https://doi.org/10.1126/science.282.5396.2012;
+RG   The C. elegans sequencing consortium;
+RA   Sulson J.E., Waterston R.;
+RT   "Genome sequence of the nematode C. elegans: a platform for
+RT   investigating biology.";
+RL   Science 282:2012-2018(1998).
+CC   -----------------------------------------------------------------------
+CC   Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms
+CC   Distributed under the Creative Commons Attribution (CC BY 4.0) License
+CC   -----------------------------------------------------------------------
+DR   EMBL; BX284602; CTQ86426.1; -; Genomic_DNA.
+DR   RefSeq; NP_001300487.1; NM_001313558.1.
+DR   UniGene; Cel.25279; -.
+DR   EnsemblMetazoa; 2L52.1b; 2L52.1b; WBGene00007063.
+DR   GeneID; 181792; -.
+DR   CTD; 181792; -.
+DR   WormBase; 2L52.1b; CE50569; WBGene00007063; -.
+DR   Proteomes; UP000001940; Chromosome II.
+DR   ExpressionAtlas; A0A0K3AWR5; baseline and differential.
+PE   4: Predicted;
+KW   Complete proteome {ECO:0000313|Proteomes:UP000001940};
+KW   Reference proteome {ECO:0000313|Proteomes:UP000001940}.
+SQ   SEQUENCE   220 AA;  26028 MW;  E12D5EA7F6FFF373 CRC64;
+     MSDNEEVYVN FRGMNCISTG KSASMVPSKR RNWPKRVKKR LSTQRNNQKT IRPPELNKNN
+     IEIKDMNSNN LEERNREECI QPVSVEKNIL HFEKFKSNQI CIVRENNKFR EGTRRRRKNS
+     GESEDLKIHE NFTEKRRPIR SCKQNISFYE MDGDIEEFEV FFDTPTKSKK VLLDIYSAKK
+     MPKIEVEDSL VNKFHSKRPS RACRVLGSME EVPFDVEIGY
+//
+EOF
+test_parser("XrefParser::UniProtParser", $uniprot_elegans_record,  {
+  xref => 3,
+  primary_xref => 1,
+  dependent_xref => 2,
+},"Example UniProt record"); 
+(my $uniprot_elegans_record_embl = $uniprot_elegans_record) =~ s/DR   EMBL;.*?\n//;
+test_parser("XrefParser::UniProtParser", $uniprot_elegans_record_embl,  {
+  xref => 1,
+  primary_xref => 1,
+},"EMBL entries are the dependent xrefs");
+my @recognised_sources = (
+ "PDB; 3HRI; X-ray; 2.85 A; A/B/C/D/E/F=44-477.",
+ "MEROPS; C26.956; -.",
+);
+for my $l (@recognised_sources) {
+  (my $uniprot_elegans_record_extra_line = $uniprot_elegans_record) =~ s/DR(.*?)\n/DR$1\nDR  $l/;
+  test_parser("XrefParser::UniProtParser", $uniprot_elegans_record_extra_line,  {
+    xref => 4,
+    primary_xref => 1,
+    dependent_xref => 3,
+  }, "Pick up as extra xref + dependent xref: $l" );
+} 
+test_parser("XrefParser::WormbaseCElegansUniProtParser", $uniprot_elegans_record,  {
+}, "No UniProt entries without corresponding INSDC entries");
+
+test_parser("XrefParser::WormbaseDirectParser", $wormbase_celegans_xrefs_head,  
+  $wormbase_celegans_xrefs_expected_count, "Test again to set up the next test",
+skip_clean => 1);
+my $wormbase_and_uniprot_expected_count = {
+  %$wormbase_celegans_xrefs_expected_count,
+  xref => $wormbase_celegans_xrefs_expected_count->{xref}+1, 
+  dependent_xref => 1 #protein id still there, no parent sequence ID 
+};
+test_parser("XrefParser::WormbaseCElegansUniProtParser", $uniprot_elegans_record, 
+  $wormbase_and_uniprot_expected_count, "Get counts");
+
+for my $l (@recognised_sources) {
+  (my $uniprot_elegans_record_extra_line = $uniprot_elegans_record) =~ s/DR(.*?)\n/DR$1\nDR  $l/;
+  test_parser("XrefParser::WormbaseDirectParser", $wormbase_celegans_xrefs_head,  
+    $wormbase_celegans_xrefs_expected_count, "Test again to set up the next test",
+  skip_clean => 1);
+  test_parser("XrefParser::WormbaseCElegansUniProtParser", $uniprot_elegans_record_extra_line,  {
+    %$wormbase_and_uniprot_expected_count,
+    xref => $wormbase_and_uniprot_expected_count->{xref}+1,
+    dependent_xref => $wormbase_and_uniprot_expected_count->{dependent_xref}+1,
+  }, "Pick up as extra xref + dependent xref: $l"  );
+}
+my $refseq_protein_elegans_record = <<EOF;
+LOCUS       NP_493629                427 aa            linear   INV 19-AUG-2018
+DEFINITION  Uncharacterized protein CELE_2L52.1 [Caenorhabditis elegans].
+ACCESSION   NP_493629
+VERSION     NP_493629.2
+DBLINK      BioProject: PRJNA158
+            BioSample: SAMEA3138177
+DBSOURCE    REFSEQ: accession NM_061228.2
+KEYWORDS    RefSeq.
+SOURCE      Caenorhabditis elegans
+  ORGANISM  Caenorhabditis elegans
+            Eukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida;
+            Rhabditoidea; Rhabditidae; Peloderinae; Caenorhabditis.
+REFERENCE   
+  <snipped>
+COMMENT     REVIEWED REFSEQ: This record has been curated by WormBase. The
+            reference sequence is identical to CCD61130.
+FEATURES             Location/Qualifiers
+     source          1..427
+                     /organism="Caenorhabditis elegans"
+                     /strain="Bristol N2"
+                     /db_xref="taxon:$SPECIES_ID"
+                     /chromosome="II"
+     Protein         1..427
+                     /product="hypothetical protein"
+                     /calculated_mol_wt=49887
+     CDS             1..427
+                     /gene="2L52.1"
+                     /locus_tag="CELE_2L52.1"
+                     /standard_name="2L52.1a"
+                     /coded_by="NM_061228.2:1..1284"
+                     /note="Confirmed by transcript evidence"
+                     /db_xref="EnsemblGenomes-Gn:WBGene00007063"
+                     /db_xref="EnsemblGenomes-Tr:2L52.1a"
+                     /db_xref="GeneID:181792"
+                     /db_xref="GOA:A4F336"
+                     /db_xref="InterPro:IPR013087"
+                     /db_xref="UniProtKB/TrEMBL:A4F336"
+                     /db_xref="WormBase:WBGene00007063"
+ORIGIN      
+        1 msmvrnvsnq sekleilsck wvgclkstev fktveklldh vtadhipevi vnddgseevv
+       61 cqwdccemga srgnlqkkke wmenhfktrh vrkakifkcl iedcpvvkss sqeiethlri
+      121 shpinpkker lkefksstdh ieptqanrvw tivngevqwk tpprvkkktv iyyddgpryv
+      181 fptgcarcny dsdeselesd efwsatemsd neevyvnfrg mncistgksa smvpskrrnw
+      241 pkrvkkrlst qrnnqktirp pelnknniei kdmnsnnlee rnreeciqpv sveknilhfe
+      301 kfksnqiciv rennkfregt rrrrknsges edlkihenft ekrrpirsck qnisfyemdg
+      361 dieefevffd tptkskkvll diysakkmpk ievedslvnk fhskrpsrac rvlgsmeevp
+      421 fdveigy
+//
+EOF
+test_parser("XrefParser::RefSeqGPFFParser",$refseq_protein_elegans_record, {
+  xref =>1,
+  primary_xref => 1,
+}, "Example RefSeq protein record" , tmp_file_name => "something_that_says_protein");
+test_parser("XrefParser::WormbaseCElegansRefSeqGPFFParser",$refseq_protein_elegans_record, {
+}, "No entries without WormBase records" , tmp_file_name => "something_that_says_protein");
+
+test_parser("XrefParser::WormbaseDirectParser", $wormbase_celegans_xrefs_head,  
+    $wormbase_celegans_xrefs_expected_count, "Test again to set up the next test",
+skip_clean => 1);
+test_parser("XrefParser::WormbaseCElegansRefSeqGPFFParser",$refseq_protein_elegans_record,  {
+  %$wormbase_celegans_xrefs_expected_count,
+  xref => $wormbase_celegans_xrefs_expected_count->{xref}+1,
+  dependent_xref => 1,
+}, "RefSeq entries hang off INSDC entries", tmp_file_name => "something_that_says_protein");
 done_testing();
 
+