From 7fe63560a9d6d423abbcad84f8ae7919fe49abb6 Mon Sep 17 00:00:00 2001
From: Glenn Proctor <gp1@sanger.ac.uk>
Date: Mon, 7 Feb 2005 10:59:10 +0000
Subject: [PATCH] Updated to reflect the fact that a CCDS may be linked to more
 than one transcript. Hence need to add xrefs only once, i.e. check for
 existence before adding.

---
 .../xref_mapping/XrefParser/CCDSParser.pm     | 24 +++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/misc-scripts/xref_mapping/XrefParser/CCDSParser.pm b/misc-scripts/xref_mapping/XrefParser/CCDSParser.pm
index ea3a3d4b63..36a09c884f 100644
--- a/misc-scripts/xref_mapping/XrefParser/CCDSParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/CCDSParser.pm
@@ -2,6 +2,8 @@ package XrefParser::CCDSParser;
 
 use strict;
 
+use DBI;
+
 use XrefParser::BaseParser;
 
 use vars qw(@ISA);
@@ -9,6 +11,8 @@ use vars qw(@ISA);
 
 # Parse file of CCDS records and assign direct xrefs
 # All assumed to be linked to transcripts
+# The same CCDS may be linked to more than one transcript, but need to only
+# add the xref once, so check if it already exists before adding it.
 
 sub run {
 
@@ -16,19 +20,31 @@ sub run {
 
   open(CCDS,"<".$file) || die "Could not open $file\n";
 
-  my $count = 0;
+  my $line_count = 0;
+  my $xref_count = 0;
+
+  my $xref_sth = $self->dbi()->prepare("SELECT xref_id FROM xref WHERE accession=? AND version=? AND source_id=$source_id AND species_id=$species_id");
 
   while (<CCDS>) {
 
     my ($stable_id, $ccds) = split;
 
-    my $xref_id = $self->add_xref($ccds, 1, $ccds, "", $source_id, $species_id);
+    my ($acc, $version) = split (/\./, $ccds);
+    $line_count++;
+
+    # check if an xref already exists
+    $xref_sth->execute($acc, $version);
+    my $xref_id = ($xref_sth->fetchrow_array())[0];
+    if (!$xref_id) {
+      $xref_id = $self->add_xref($acc, $version, $ccds, "", $source_id, $species_id);
+      $xref_count++;
+    }
+
     $self->add_direct_xref($xref_id, $stable_id, "transcript", "");
-    $count++;
 
   }
 
-  print "Parsed $count CCDS identifiers from $file\n";
+  print "Parsed $line_count CCDS identifiers from $file, added $xref_count xrefs and $line_count direct_xrefs\n";
 
   close(CCDS);
 
-- 
GitLab