From d5b9c8455a3b2dbe31b36f4a1280f5efe0843c20 Mon Sep 17 00:00:00 2001
From: Ian Longden <ianl@sanger.ac.uk>
Date: Tue, 29 Mar 2005 14:27:44 +0000
Subject: [PATCH] reworked to work correctly

---
 .../xref_mapping/XrefParser/WormPepParser.pm  | 90 +++++++++++--------
 1 file changed, 51 insertions(+), 39 deletions(-)

diff --git a/misc-scripts/xref_mapping/XrefParser/WormPepParser.pm b/misc-scripts/xref_mapping/XrefParser/WormPepParser.pm
index 446f722dae..62596a5694 100644
--- a/misc-scripts/xref_mapping/XrefParser/WormPepParser.pm
+++ b/misc-scripts/xref_mapping/XrefParser/WormPepParser.pm
@@ -39,6 +39,8 @@ sub run {
     $species_id = XrefParser::BaseParser->get_species_id_for_filename($file);
   }
 
+  my $worm_source_id = XrefParser::BaseParser->get_source_id_for_source_name('wormbase_transcript');
+
   my (%worm)  =  %{XrefParser::BaseParser->get_valid_codes("wormbase_transcript",$species_id)};
 
   my (%swiss)  =  %{XrefParser::BaseParser->get_valid_codes("Uniprot",$species_id)};
@@ -48,56 +50,66 @@ sub run {
   my $sth = $dbi->prepare($sql);
 
 
+  my $sql2 = "select x2.accession, x2.xref_id ";
+  $sql2   .= "from dependent_xref d, xref x1, xref x2 ";
+  $sql2   .= "where d.master_xref_id = x1.xref_id and ";
+  $sql2   .= "      d.dependent_xref_id = x2.xref_id and ";
+  $sql2   .= "      x2.source_id = $worm_source_id and ";
+  $sql2   .= "      x1.xref_id = ? and ";
+  $sql2   .= "      x2.accession = ?";
+  my $sth2 = $dbi->prepare($sql2);
+
+
+  my $sql3 = 'delete from dependent_xref where dependent_xref.master_xref_id=? and dependent_xref.dependent_xref_id=?'; 
+  my $sth3 = $dbi->prepare($sql3);
+
   open(PEP,"<".$file) || die "Could not open $file\n";
 
   while (<PEP>) {
     my ($transcript, $wb, $swiss_ref)  = (split(/\t/,substr($_,1)))[0,1,5];
-    
+    my $swiss_xref;
     if($swiss_ref =~ /SW:(.*)/){
       $swiss_ref = $1;
-    }
-    else{
-      $swiss_ref  = 0 ;
-    }
-    if(length($wb) < 3){
-      print "ERRR:".$_;
-    }
-    
-    #Is the transcript different from the gene
-    my $diff =0;
-    my $gene;
-    if($transcript =~ /(\S+\.\d+)/){
-      $gene = $1;
-      if($gene ne $transcript){
-	$diff=1;
-      }
-    }
-    else{
-      die "Gene format not recognised $transcript\n";
-    }
-
-    my $exists =0;
-    # if gene stored as transcript so change this
-    if(defined($worm{$gene}) and !defined($worm{$transcript})){
-      # change accesion to transcript name instead of gene
-      $sth->execute($transcript, $worm{$gene}) || die $dbi->errstr;
-      print "changing $gene to $transcript\n";
-    }
-    # if no record exists for this 
-    elsif(!defined($worm{$gene}) and !defined($worm{$transcript})){
-      if($swiss_ref){
-	if(defined($swiss{$swiss_ref})){
-	  XrefParser::BaseParser->add_to_xrefs($swiss{$swiss_ref},$transcript,'',$transcript,"","",$source_id,$species_id);	  
+      if(defined($swiss{$swiss_ref})){
+	$swiss_xref = $swiss{$swiss_ref};
+	my $diff =0;
+	my $gene;
+	if($transcript =~ /(\S+\.\d+)/){
+	  $gene = $1;
+	  if($gene ne $transcript){
+	    $diff=1;
+	  }
 	}
 	else{
-	  print $swiss_ref." not found\n";
-	}  
+	  die "Gene format not recognised $transcript\n";
+	}
+
+	$sth2->execute($swiss_xref, $gene) || die $dbi->errstr;
+	(my $gene_acc, my $gene_xref) = $sth2->fetchrow_array();
+
+	$sth2->execute($swiss_xref, $transcript) || die $dbi->errstr;
+	(my $tran_acc, my $tran_xref) =  $sth2->fetchrow_array();
+
+	my $create = 1;
+	if(defined($tran_xref)){ #okay
+	  $create = 0;
+	}
+	elsif(defined($gene_xref)){
+	  #need to delete dependency
+	  #then add new one with correct name
+	  $sth3->execute($swiss_xref, $gene_xref) || die $dbi->errstr;
+	  print "removing $swiss_ref -> $gene : ";
+	}
+	if($create){
+	  XrefParser::BaseParser->add_to_xrefs($swiss_xref,$transcript,'',$transcript,"","",$source_id,$species_id);	  
+	  print "adding $swiss_ref -> $transcript\n";
+	}
+	
       }
-    }
       
-
+    }
   }
-
+  
 }
 
   
-- 
GitLab