From b0b66b4dab0941e40fe6141106bf5fbab70c7092 Mon Sep 17 00:00:00 2001
From: Ian Longden <ianl@sanger.ac.uk>
Date: Wed, 8 Dec 2004 11:18:04 +0000
Subject: [PATCH] source_id removed from synonym,dependent_xref and
 primary_xref this information is available via the xref. linkage_source_id
 added to dependent xref. create_xrefs now also returns a hashref

---
 .../xref_mapping/XrefParser/BaseParser.pm     | 46 +++++++++++--------
 .../XrefParser/RefSeqGPFFParser.pm            |  7 ++-
 .../xref_mapping/XrefParser/RefSeqParser.pm   |  2 +-
 .../xref_mapping/XrefParser/UniProtParser.pm  |  6 ++-
 misc-scripts/xref_mapping/sql/table.sql       |  4 +-
 5 files changed, 39 insertions(+), 26 deletions(-)

diff --git a/misc-scripts/xref_mapping/XrefParser/BaseParser.pm b/misc-scripts/xref_mapping/XrefParser/BaseParser.pm
index 9b9fc46bc8..145c320ff6 100644
--- a/misc-scripts/xref_mapping/XrefParser/BaseParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/BaseParser.pm
@@ -278,21 +278,24 @@ sub get_valid_codes{
 
 sub upload_xref_object_graphs {
 
-  my ($self, @xrefs) = @_;
+  my ($self, $rxrefs) = @_;
+
 
   my $dbi = dbi();
 
-  if ($#xrefs > -1) {
+#  print "count = ".$#$rxrefs."\n";
+
+  if ($#$rxrefs > -1) {
 
     # remove all existing xrefs with same source ID(s)
-    delete_by_source(\@xrefs);
+    delete_by_source($rxrefs);
 
     # upload new ones
     print "Uploading xrefs\n";
     my $xref_sth = $dbi->prepare("INSERT INTO xref (accession,version,label,description,source_id,species_id) VALUES(?,?,?,?,?,?)");
-    my $pri_insert_sth = $dbi->prepare("INSERT INTO primary_xref VALUES(?,?,?,?,?)");
+    my $pri_insert_sth = $dbi->prepare("INSERT INTO primary_xref VALUES(?,?,?,?)");
     my $pri_update_sth = $dbi->prepare("UPDATE primary_xref SET sequence=? WHERE xref_id=?");
-    my $syn_sth = $dbi->prepare("INSERT INTO synonym VALUES(?,?,?)");
+    my $syn_sth = $dbi->prepare("INSERT INTO synonym VALUES(?,?)");
     my $dep_sth = $dbi->prepare("INSERT INTO dependent_xref VALUES(?,?,?,?)");
     my $xref_update_label_sth = $dbi->prepare("UPDATE xref SET label=? WHERE xref_id=?");
     my $xref_update_descr_sth = $dbi->prepare("UPDATE xref SET description=? WHERE xref_id=?");
@@ -300,7 +303,7 @@ sub upload_xref_object_graphs {
     local $xref_sth->{RaiseError}; # disable error handling here as we'll do it ourselves
     local $xref_sth->{PrintError};
 
-    foreach my $xref (@xrefs) {
+    foreach my $xref (@{$rxrefs}) {
        my $xref_id;
       # Create entry in xref table and note ID
       if(! $xref_sth->execute($xref->{ACCESSION},
@@ -332,23 +335,26 @@ sub upload_xref_object_graphs {
 	$pri_insert_sth->execute($xref_id,
 				 $xref->{SEQUENCE},
 				 $xref->{SEQUENCE_TYPE},
-				 $xref->{STATUS},
-				 $xref->{SOURCE_ID}) || die $dbi->errstr;
+				 $xref->{STATUS}) || die $dbi->errstr;
       }
 
       # if there are synonyms, create xrefs for them and entries in the synonym table
       foreach my $syn (@{$xref->{SYNONYMS}}) {
 
-	$xref_sth->execute($syn,
-			   "",
-			   "",
-			   "",
-			   $xref->{SOURCE_ID},
-			   $xref->{SPECIES_ID});
-
-	my $syn_xref_id = insert_or_select($xref_sth, $dbi->err, $syn, $xref->{SOURCE_ID});
-
-	$syn_sth->execute($xref_id, $syn_xref_id, $xref->{SOURCE_ID} ) || die $dbi->errstr;
+	my $syn_xref_id = get_xref($syn,$xref->{SOURCE_ID});
+	if(!defined($syn_xref_id)){
+	  $xref_sth->execute($syn,
+			     "",
+			     "",
+			     "",
+			     $xref->{SOURCE_ID},
+			     $xref->{SPECIES_ID});
+	  $syn_xref_id = get_xref($syn,$xref->{SOURCE_ID});
+	}
+	if(!defined($syn_xref_id)){
+	  print STDERR $xref->{ACCESSION}."\n$syn\n";
+	}
+	$syn_sth->execute($xref_id, $syn_xref_id ) || die "$dbi->errstr \n $xref_id\n $syn_xref_id\n";
 
       }				# foreach syn
 
@@ -370,9 +376,9 @@ sub upload_xref_object_graphs {
 	  print STDERR "dbi\t$dbi->err \n$dep{ACCESSION} \n $dep{SOURCE_ID} \n";
 	}
 	if(!defined($dep_xref_id)){
-	  print STDERR "$dep{ACCESSION} \n $dep{SOURCE_ID} \n".$dbi->err."\n";
+	  print STDERR "acc = $dep{ACCESSION} \nlink = $dep{LINKAGE_SOURCE_ID} \n".$dbi->err."\n";
 	}
-	$dep_sth->execute($xref_id, $dep_xref_id, $dep{LINKAGE_ANNOTATION}, $dep{SOURCE_ID} ) || die $dbi->errstr;
+	$dep_sth->execute($xref_id, $dep_xref_id, $dep{LINKAGE_ANNOTATION}, $dep{LINKAGE_SOURCE_ID} ) || die $dbi->errstr;
 	# TODO linkage anntation?
 
       }				# foreach dep
diff --git a/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm b/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm
index bfc69b0881..8a677b9541 100644
--- a/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/RefSeqGPFFParser.pm
@@ -141,24 +141,28 @@ sub create_xrefs {
       foreach my $ll (@LocusIDline) {
 	my %dep;
 	$dep{SOURCE_ID} = $dependent_sources{LocusLink};
+	$dep{LINKAGE_SOURCE_ID} = $source_id;
 	$dep{ACCESSION} = $ll;
 	push @{$xref->{DEPENDENT_XREFS}}, \%dep;
       }
       foreach my $mim (@mimline) {
 	my %dep;
 	$dep{SOURCE_ID} = $dependent_sources{MIM};
+	$dep{LINKAGE_SOURCE_ID} = $source_id;
 	$dep{ACCESSION} = $mim;
 	push @{$xref->{DEPENDENT_XREFS}}, \%dep;
       }
       foreach my $med (@medline) {
 	my %dep;
 	$dep{SOURCE_ID} = $dependent_sources{MEDLINE};
+	$dep{LINKAGE_SOURCE_ID} = $source_id;
 	$dep{ACCESSION} = $med;
 	push @{$xref->{DEPENDENT_XREFS}}, \%dep;
       }
       foreach my $pub (@pubmed) {
 	my %dep;
 	$dep{SOURCE_ID} = $dependent_sources{PUBMED};
+	$dep{LINKAGE_SOURCE_ID} = $source_id;
 	$dep{ACCESSION} = $pub;
 	push @{$xref->{DEPENDENT_XREFS}}, \%dep;
       }
@@ -169,6 +173,7 @@ sub create_xrefs {
       if($mrna){
         my %mrna_dep;
         $mrna_dep{SOURCE_ID} = $source_id; # source is still RefSeq
+	$mrna_dep{LINKAGE_SOURCE_ID} = $source_id;
         my ($mrna_acc,$mrna_ver) = split (/\./,$mrna);
 
         $mrna_dep{ACCESSION} = $mrna_acc;
@@ -185,7 +190,7 @@ sub create_xrefs {
 
   print "Read " . scalar(@xrefs) ." xrefs from $file\n";
 
-  return @xrefs;
+  return \@xrefs;
 
 }
 
diff --git a/misc-scripts/xref_mapping/XrefParser/RefSeqParser.pm b/misc-scripts/xref_mapping/XrefParser/RefSeqParser.pm
index 7ea1d3f5a9..83f3eb3f72 100644
--- a/misc-scripts/xref_mapping/XrefParser/RefSeqParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/RefSeqParser.pm
@@ -121,7 +121,7 @@ sub create_xrefs {
 
   print "Read " . scalar(@xrefs) ." xrefs from $file\n";
 
-  return @xrefs;
+  return \@xrefs;
 
 }
 
diff --git a/misc-scripts/xref_mapping/XrefParser/UniProtParser.pm b/misc-scripts/xref_mapping/XrefParser/UniProtParser.pm
index 78449c9f27..90e90d2968 100644
--- a/misc-scripts/xref_mapping/XrefParser/UniProtParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/UniProtParser.pm
@@ -195,6 +195,7 @@ sub create_xrefs {
 	  # create dependent xref structure & store it
 	  my %dep;
 	  $dep{SOURCE_NAME} = $source;
+	  $dep{LINKAGE_SOURCE_ID} = $xref->{SOURCE_ID};
 	  $dep{SOURCE_ID} = $dependent_sources{$source};
 	  $dep{ACCESSION} = $acc;
 	  push @{$xref->{DEPENDENT_XREFS}}, \%dep; # array of hashrefs
@@ -205,6 +206,7 @@ sub create_xrefs {
 	      my %dep2;
 	      $dep2{SOURCE_NAME} = $source;
 	      $dep2{SOURCE_ID} = $dependent_sources{protein_id};
+	      $dep2{LINKAGE_SOURCE_ID} = $xref->{SOURCE_ID};
 	      $dep2{ACCESSION} = $protein_id;
 	      push @{$xref->{DEPENDENT_XREFS}}, \%dep2; # array of hashrefs
 	    }
@@ -219,6 +221,7 @@ sub create_xrefs {
 
       my %medline_dep;
       $medline_dep{SOURCE_ID} = $dependent_sources{PUBMED};
+      $medline_dep{LINKAGE_SOURCE_ID} = $xref->{SOURCE_ID};
       $medline_dep{ACCESSION} = $medline;
       push @{$xref->{DEPENDENT_XREFS}}, \%medline_dep;
 
@@ -229,6 +232,7 @@ sub create_xrefs {
 
       my %pubmed_dep;
       $pubmed_dep{SOURCE_ID} = $dependent_sources{PUBMED};
+      $pubmed_dep{LINKAGE_SOURCE_ID} = $xref->{SOURCE_ID};
       $pubmed_dep{ACCESSION} = $pubmed;
       push @{$xref->{DEPENDENT_XREFS}}, \%pubmed_dep;
 
@@ -242,7 +246,7 @@ sub create_xrefs {
 
   print "Read $num_sp SwissProt xrefs and $num_sptr SPTrEMBL xrefs from $file\n";
 
-  return @xrefs;
+  return \@xrefs;
 
   #TODO - currently include records from other species - filter on OX line??
 }
diff --git a/misc-scripts/xref_mapping/sql/table.sql b/misc-scripts/xref_mapping/sql/table.sql
index df13b89a24..105e7d6a01 100755
--- a/misc-scripts/xref_mapping/sql/table.sql
+++ b/misc-scripts/xref_mapping/sql/table.sql
@@ -28,7 +28,6 @@ CREATE TABLE primary_xref (
   sequence                    mediumtext,
   sequence_type               enum('dna','peptide'),
   status                      enum('experimental','predicted'),
-  source_id                   int unsigned not null,
 
   PRIMARY KEY (xref_id)
 
@@ -41,7 +40,7 @@ CREATE TABLE dependent_xref (
   master_xref_id              int unsigned not null,
   dependent_xref_id           int unsigned not null,
   linkage_annotation          varchar(255),
-  source_id                   int unsigned not null,
+  linkage_source_id           int unsigned not null,
 
   KEY master_idx(master_xref_id),
   KEY dependent_idx(dependent_xref_id)
@@ -54,7 +53,6 @@ CREATE TABLE synonym (
 
   xref_id                     int unsigned not null,
   synonym_xref_id             int unsigned not null,
-  source_id                   int unsigned not null,
 
   KEY xref_idx(xref_id)
 
-- 
GitLab