From f7899900075181b373d1bb5eb178efedd8f38b47 Mon Sep 17 00:00:00 2001
From: Ian Longden <ianl@sanger.ac.uk>
Date: Mon, 16 Feb 2009 11:33:19 +0000
Subject: [PATCH] copy data from xref database to core database

---
 .../xref_mapping/XrefMapper/XrefLoader.pm     | 399 ++++++++++++++++++
 1 file changed, 399 insertions(+)
 create mode 100644 misc-scripts/xref_mapping/XrefMapper/XrefLoader.pm

diff --git a/misc-scripts/xref_mapping/XrefMapper/XrefLoader.pm b/misc-scripts/xref_mapping/XrefMapper/XrefLoader.pm
new file mode 100644
index 0000000000..95e1400cfa
--- /dev/null
+++ b/misc-scripts/xref_mapping/XrefMapper/XrefLoader.pm
@@ -0,0 +1,399 @@
+package XrefMapper::XrefLoader;
+
+use vars '@ISA';
+@ISA = qw{ XrefMapper::BasicMapper };
+
+use strict;
+use warnings;
+use XrefMapper::BasicMapper;
+
+use Cwd;
+use DBI;
+use File::Basename;
+use IPC::Open3;
+
+sub new {
+  my($class, $mapper) = @_;
+
+  my $self ={};
+  bless $self,$class;
+  $self->core($mapper->core);
+  $self->xref($mapper->xref);
+  $self->mapper($mapper);
+  return $self;
+}
+
+
+sub mapper{
+  my ($self, $arg) = @_;
+
+  (defined $arg) &&
+    ($self->{_mapper} = $arg );
+  return $self->{_mapper};
+}
+
+sub update{
+  my ($self, $arg) = @_;
+  # remove xref, object_xref, identity_xref, depenedent_xref, go_xref, unmapped_object, (interpro???), external_synonym, projections.
+
+
+  #####################################
+  # first remove all the projections. #
+  #####################################
+
+  my $sql = "DELETE es FROM xref x, external_synonym es WHERE x.xref_id = es.xref_id and x.info_type = 'PROJECTION'";
+  my $sth = $self->core->dbc->prepare($sql);
+  $sth->execute();
+
+  $sql = "DELETE object_xref FROM object_xref, xref WHERE object_xref.xref_id = xref.xref_id AND xref.info_type = 'PROJECTION'";
+  $sth = $self->core->dbc->prepare($sql);
+  $sth->execute();
+  $sql = "DELETE xref FROM xref WHERE xref.info_type = 'PROJECTION'";
+  $sth = $self->core->dbc->prepare($sql);
+  $sth->execute();
+  $sth->finish;
+
+  #########################################
+  # Get source_id to external_db_id       #
+  #########################################
+
+  my %name_to_external_db_id;
+  $sql = "select external_db_id, db_name from external_db";
+  $sth = $self->core->dbc->prepare($sql);
+  $sth->execute();
+  my ($id, $name);
+  $sth->bind_columns(\$id, \$name);
+  while($sth->fetch()){
+    $name_to_external_db_id{$name} = $id;
+   }
+  $sth->finish;
+
+  my %source_id_to_external_db_id;
+  $sql = "select s.source_id, s.name from source s, xref x where x.source_id = s.source_id group by s.source_id"; # only get those of interest
+  $sth = $self->xref->dbc->prepare($sql);
+  $sth->execute();
+  $sth->bind_columns(\$id, \$name);
+  while($sth->fetch()){
+     if(defined($name_to_external_db_id{$name})){
+      $source_id_to_external_db_id{$id} = $name_to_external_db_id{$name};
+    }
+    else{
+      die "ERROR: Could not find $name in external_db table please add this too continue\n";
+    }
+  }
+  $sth->finish;
+
+  
+  ######################################
+  # For each external_db to be updated #
+  # Delete the existing ones           # 
+  ######################################
+
+
+  $sth = $self->xref->dbc->prepare('select s.name, count(*) from xref x, object_xref ox, source s where ox.xref_id = x.xref_id  and x.source_id = s.source_id and ox_status = "DUMP_OUT"  group by s.name');
+  $sth->execute();
+  my $count;
+  $sth->bind_columns(\$name,\$count);
+
+  my $synonym_sth  =  $self->core->dbc->prepare('DELETE external_synonym FROM external_synonym, xref WHERE external_synonym.xref_id = xref.xref_id AND xref.external_db_id = ?');
+  my $go_sth       =  $self->core->dbc->prepare('DELETE gx FROM xref x, object_xref ox LEFT JOIN go_xref gx ON ox.object_xref_id = gx.object_xref_id WHERE x.xref_id = ox.xref_id AND x.external_db_id = ? AND gx.linkage_type is not null');
+  my $identity_sth =  $self->core->dbc->prepare('DELETE identity_xref FROM identity_xref, object_xref, xref WHERE identity_xref.object_xref_id = object_xref.object_xref_id AND object_xref.xref_id = xref.xref_id AND xref.external_db_id = ?');
+  my $object_sth   =  $self->core->dbc->prepare('DELETE object_xref FROM object_xref, xref WHERE object_xref.xref_id = xref.xref_id AND xref.external_db_id = ?');
+#  my $dependent_sth = $self->core->dbc->prepare('DELETE dependent_xref FROM dependent_xref, xref  WHERE dependent_xref.dependent_xref_id = xref.xref_id and xref.external_db_id = ?');
+  my $xref_sth     =  $self->core->dbc->prepare('DELETE FROM xref WHERE xref.external_db_id = ?');
+  my $unmapped_sth =  $self->core->dbc->prepare('DELETE FROM unmapped_object WHERE type="xref" and external_db_id = ?');
+
+
+  my $test =1;
+  if(!$test){
+  while($sth->fetch()){
+    my $ex_id = $name_to_external_db_id{$name};
+
+    print "Deleting data for $name from core before updating from new xref database\n";
+    $synonym_sth->execute($ex_id);
+    $go_sth->execute($ex_id);
+    $identity_sth->execute($ex_id);
+    $object_sth->execute($ex_id);  
+#    $dependent_sth->execute($ex_id);
+    $xref_sth->execute($ex_id);
+    $unmapped_sth->execute($ex_id);
+  }
+  $sth->finish;
+}
+  $synonym_sth->finish;
+  $go_sth->finish;  
+  $identity_sth->finish;
+  $object_sth->finish;  
+#  $dependent_sth->finish;
+  $xref_sth->finish;
+  $unmapped_sth->finish; 
+
+  ###############################################################
+  ##### Create temp table dependent_xref (until schema changes) #
+  ###############################################################
+
+ 
+  $sql = (<<SQL);
+  Create TABLE dependent_xref(
+     object_xref_id         INT NOT NULL,
+     master_xref_id         INT NOT NULL,
+     dependent_xref_id      INT NOT NULL,
+
+     PRIMARY KEY( master_xref_id ),
+     KEY dependent ( dependent_xref_id )
+
+   ) COLLATE=latin1_swedish_ci TYPE=MyISAM
+SQL
+
+  $sth = $self->core->dbc->prepare($sql);
+  $sth->execute || die "Could not create temp table dependent_xref\n";
+  $sth->finish;
+
+  ##### Delete this ONLY after the gene/transcript display_xref and description calculations.
+
+
+  ##########################################
+  # Get the offsets for object_xref, xref  #
+  ##########################################
+
+  $sth = $self->core->dbc->prepare('select MAX(xref_id) from xref');
+  my $xref_offset;
+  $sth->execute;
+  $sth->bind_columns(\$xref_offset);
+  $sth->fetch();
+  $sth->finish;
+
+  $sth = $self->core->dbc->prepare('select MAX(object_xref_id) from object_xref');
+  my $object_xref_offset;
+  $sth->execute;
+  $sth->bind_columns(\$object_xref_offset);
+  $sth->fetch();
+  $sth->finish;
+
+
+  ####################
+  # Get analysis id's 
+  ####################
+
+  my %analysis_id = $self->get_analysis(); # 
+
+
+  print "xref offset is $xref_offset, object_xref offset is $object_xref_offset\n";
+
+  #####################################
+  # Now add the new ones              #
+  #####################################
+
+     ###########################
+     # SQL to get data from xref
+     ###########################
+
+     my $direct_sth = $self->xref->dbc->prepare('select x.xref_id, x.accession, x.label, x.version, x.description, ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type from xref x, object_xref ox  where ox.ox_status = "DUMP_OUT" and ox.xref_id = x.xref_id and x.source_id = ? and x.info_type = ? order by x.xref_id');
+ 
+     my $dependent_sth = $self->xref->dbc->prepare('select  x.xref_id, x.accession, x.label, x.version, x.description, ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type, d.master_xref_id from xref x, object_xref ox,  dependent_xref d where ox.ox_status = "DUMP_OUT" and ox.xref_id = x.xref_id and d.dependent_xref_id = x.xref_id and x.source_id = ? and x.info_type = ? order by x.xref_id, ox.ensembl_id');
+
+     $go_sth = $self->xref->dbc->prepare('select  x.xref_id, x.accession, x.label, x.version, x.description, ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type, d.master_xref_id, g.linkage_type from xref x, object_xref ox,  dependent_xref d, go_xref g where ox.ox_status = "DUMP_OUT" and  g.object_xref_id = ox.object_xref_id and x.xref_id = ox.xref_id and d.object_xref_id = ox.object_xref_id and x.source_id = ? and x.info_type = ? order by x.xref_id, ox.ensembl_id');
+
+     my $seq_sth   =   $self->xref->dbc->prepare('select x.xref_id, x.accession, x.label, x.version, x.description, ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type, i.query_identity, i.target_identity, i.hit_start, i.hit_end, i.translation_start, i.translation_end, i.cigar_line, i.score, i.evalue from xref x, object_xref ox, identity_xref i  where ox.ox_status = "DUMP_OUT" and i.object_xref_id = ox.object_xref_id and ox.xref_id = x.xref_id and x.source_id = ? and x.info_type = ? order by x.xref_id');
+
+     ########################
+     # SQL to add data to core
+     #########################
+ 
+     my $add_xref_sth           = $self->core->dbc->prepare('insert into xref (xref_id, external_db_id, dbprimary_acc, display_label, version, description, info_type) values (?, ?, ?, ?, ?, ?, ?)');
+     my $add_object_xref_sth    = $self->core->dbc->prepare('insert into object_xref (object_xref_id, ensembl_id, ensembl_object_type, xref_id) values (?, ?, ?, ?)');
+     my $add_identity_xref_sth  = $self->core->dbc->prepare('insert into identity_xref (object_xref_id, xref_identity, ensembl_identity, xref_start, xref_end, ensembl_start, ensembl_end, cigar_line, score, evalue, analysis_id) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)');
+     my $add_go_xref_sth        = $self->core->dbc->prepare('insert into go_xref (object_xref_id, linkage_type) values (?, ?)');
+     my $add_dependent_xref_sth = $self->core->dbc->prepare('insert into dependent_xref (object_xref_id, master_xref_id, dependent_xref_id) values (?, ?, ?)');
+     my $add_syn_sth            = $self->core->dbc->prepare('insert into external_synonym (xref_id, synonym) values (?, ?)');
+
+  $sth = $self->xref->dbc->prepare('select s.name, s.source_id, count(*), x.info_type from xref x, object_xref ox, source s where ox.xref_id = x.xref_id  and x.source_id = s.source_id and ox_status = "DUMP_OUT"  group by s.name, x.info_type');
+  $sth->execute();
+  my ($type, $source_id);
+  $sth->bind_columns(\$name,\$source_id, \$count, \$type);
+  while($sth->fetch()){
+ 
+    my $ex_id = $name_to_external_db_id{$name};
+
+    print "updating $name in core (for $type xrefs)\n";
+
+    my @xref_list=();  # process at end. Add synonyms and set dumped = 1;
+
+   
+    # dump SEQUENCE_MATCH, DEPENDENT, DIRECT, COORDINATE_OVERLAP, INFERRED_PAIR, (MISC?? same as direct come from official naming)  
+
+    ### If DIRECT ,         xref, object_xref,                  (order by xref_id)  # maybe linked to more than one?
+    ### if INFERRED_PAIR    xref, object_xref
+    ### if MISC             xref, object_xref 
+
+    
+    if($type eq "DIRECT" or $type eq "INFERRED_PAIR" or $type eq "MISC"){
+      my $count = 0;
+      $direct_sth->execute($source_id, $type);
+      my ($xref_id, $acc, $label, $version, $desc, $object_xref_id, $ensembl_id, $ensembl_type); 
+      $direct_sth->bind_columns(\$xref_id, \$acc, \$label, \$version, \$desc, \$object_xref_id, \$ensembl_id, \$ensembl_type);
+      my $last_xref = 0;
+      while($direct_sth->fetch){
+        if($last_xref != $xref_id){
+	  push @xref_list, $xref_id;
+	  $count++;
+	  $add_xref_sth->execute(($xref_id+$xref_offset), $ex_id, $acc, $label, $version, $desc, $type);
+	  $last_xref = $xref_id;
+        }
+        $add_object_xref_sth->execute(($object_xref_id+$object_xref_offset), $ensembl_id, $ensembl_type, ($xref_id+$xref_offset));
+      }  
+      print "DIRECT $count\n";
+    }
+ 
+    ### If DEPENDENT,       xref, object_xref , dependent_xref  (order by xref_id)  # maybe linked to more than one?
+ 
+   elsif($type eq "DEPENDENT"){
+     if($name eq "GO"){
+       my $count = 0;
+       $go_sth->execute($source_id, $type);
+       my ($xref_id, $acc, $label, $version, $desc, $object_xref_id, $ensembl_id, $ensembl_type, $master_xref_id, $linkage_type); 
+       $go_sth->bind_columns(\$xref_id, \$acc, \$label, \$version, \$desc, \$object_xref_id, \$ensembl_id, \$ensembl_type, \$master_xref_id, \$linkage_type);
+       my $last_xref = 0;
+       while($go_sth->fetch){
+	 if($last_xref != $xref_id){
+	   push @xref_list, $xref_id;
+	   $count++;
+	   $add_xref_sth->execute(($xref_id+$xref_offset), $ex_id, $acc, $label, $version, $desc, $type);
+	   $last_xref = $xref_id;
+	 }
+	 $add_dependent_xref_sth->execute(($object_xref_id+$object_xref_offset), ($xref_id+$xref_offset), ($master_xref_id+$xref_offset) );
+	 $add_object_xref_sth->execute( ($object_xref_id+$object_xref_offset), $ensembl_id, $ensembl_type, ($xref_id+$xref_offset) );
+	 $add_go_xref_sth->execute( ($object_xref_offset+$object_xref_id), $linkage_type);
+       }       
+       print "GO $count\n";     
+     }
+    else{
+      my $count = 0;
+      $dependent_sth->execute($source_id, $type);
+      my ($xref_id, $acc, $label, $version, $desc, $object_xref_id, $ensembl_id, $ensembl_type, $master_xref_id); 
+      $dependent_sth->bind_columns(\$xref_id, \$acc, \$label, \$version, \$desc, \$object_xref_id, \$ensembl_id, \$ensembl_type, \$master_xref_id);
+      my $last_xref = 0;
+      my $last_ensembl = 0;
+      while($dependent_sth->fetch){
+        if($last_xref != $xref_id){
+	  push @xref_list, $xref_id;
+	  $count++;
+	  $add_xref_sth->execute(($xref_id+$xref_offset), $ex_id, $acc, $label || $acc, $version, $desc, $type);
+	  $last_xref = $xref_id;
+        }
+	if($last_xref != $xref_id or $last_ensembl != $ensembl_id){
+	  $add_object_xref_sth->execute(($object_xref_id+$object_xref_offset), $ensembl_id, $ensembl_type, ($xref_id+$xref_offset));
+	  $add_dependent_xref_sth->execute(($object_xref_id+$object_xref_offset), ($xref_id+$xref_offset), ($master_xref_id+$xref_offset) );	}
+	$last_ensembl = $ensembl_id;
+      }  
+      print "DEP $count\n";
+    }
+   }
+   ### If SEQUENCE_MATCH   xref, object_xref,  identity_xref   (order by xref_id)  # maybe linked to more than one?
+
+    elsif($type eq "SEQUENCE_MATCH"){
+      my $count = 0;
+      $seq_sth->execute($source_id, $type);
+      my ($xref_id, $acc, $label, $version, $desc, $object_xref_id, $ensembl_id, $ensembl_type); 
+      my ( $query_identity, $target_identity, $hit_start, $hit_end, $translation_start, $translation_end, $cigar_line, $score, $evalue);
+      $seq_sth->bind_columns(\$xref_id, \$acc, \$label, \$version, \$desc, \$object_xref_id, \$ensembl_id, \$ensembl_type,
+			     \$query_identity, \$target_identity, \$hit_start, \$hit_end, \$translation_start, \$translation_end, \$cigar_line, \$score, \$evalue);
+      my $last_xref = 0;
+      while($seq_sth->fetch){
+        if($last_xref != $xref_id){
+	  push @xref_list, $xref_id;
+	  $count++;
+	  $add_xref_sth->execute(($xref_id+$xref_offset), $ex_id, $acc, $label, $version, $desc, $type);
+	  $last_xref = $xref_id;
+        }
+        $add_object_xref_sth->execute(($object_xref_id+$object_xref_offset), $ensembl_id, $ensembl_type, ($xref_id+$xref_offset));
+	$add_identity_xref_sth->execute( ($object_xref_id+$object_xref_offset), $query_identity, $target_identity, $hit_start, $hit_end, 
+					 $translation_start, $translation_end, $cigar_line, $score, $evalue, $analysis_id{$ensembl_type});  
+      }  
+      print "SEQ $count\n";
+    }
+    else{
+      print "ARSE what type is $type\n";
+    }	
+
+
+    # Transfer data for synonym and set xref database xrefs to dumped.
+    if(@xref_list){
+      my $syn_sql = "select xref_id, synonym from synonym where xref_id in(".join(", ",@xref_list).")";
+      my $syn_sth    = $self->xref->dbc->prepare($syn_sql);
+      $syn_sth->execute();
+    
+      my ($xref_id, $syn);
+      $syn_sth->bind_columns(\$xref_id, \$syn);
+      while($syn_sth->fetch()){
+	$add_syn_sth->execute(($xref_id+$xref_offset), $syn)
+      }
+      $syn_sth->finish;
+
+      my $xref_dumped_sth = $self->xref->dbc->prepare("update xref set dumped = 1 where xref_id in (".join(", ",@xref_list).")");
+      $xref_dumped_sth->execute(); 
+      $xref_dumped_sth->finish;
+    }	
+
+
+
+    # if its a priority xref :-
+       # write unmapped xrefs    
+
+
+    # else not priority xref
+       # write unmapped xrefs
+ 
+
+
+  }
+  $sth->finish;
+
+
+  # remove after testing
+  $sth = $self->core->dbc->prepare("drop table dependent_xref");
+  $sth->execute || die "Could not drop temp table dependent_xref\n";
+  $sth->finish;  
+
+}
+
+
+sub get_analysis{
+  my $self = shift;
+  
+
+  my %typeToLogicName = ( 'Transcript' => 'XrefExonerateDNA',
+                          'Translation' => 'XrefExonerateProtein' );
+
+  my %analysis_id;
+
+  foreach my $key (qw(Transcript Translation)){
+    
+    my $logic_name = $typeToLogicName{$key};
+    
+    my $sth = $self->core->dbc->prepare("SELECT analysis_id FROM analysis WHERE logic_name='" . $logic_name ."'");
+    
+    $sth->execute();
+    
+    my $analysis_id;
+    
+    if (my @row = $sth->fetchrow_array()) {
+      
+      $analysis_id{$key} = $row[0];
+      
+    } else {
+      
+      print "No analysis with logic_name $logic_name found, creating ...\n";
+      $sth = $self->core->dbc->prepare("INSERT INTO analysis (logic_name, created) VALUES ('" . $logic_name. "',NOW())");
+      # TODO - other fields in analysis table
+      $sth->execute();
+      $analysis_id{$key} = $sth->{'mysql_insertid'};
+    }
+    $sth->finish();
+    
+  }
+  return %analysis_id;
+  
+}
+
+1;
-- 
GitLab