XrefLoader.pm 31.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
package XrefMapper::XrefLoader;

use vars '@ISA';
@ISA = qw{ XrefMapper::BasicMapper };

use strict;
use warnings;
use XrefMapper::BasicMapper;

use Cwd;
use DBI;
use File::Basename;
use IPC::Open3;

sub new {
  my($class, $mapper) = @_;

  my $self ={};
  bless $self,$class;
  $self->core($mapper->core);
  $self->xref($mapper->xref);
  $self->mapper($mapper);
  return $self;
}


sub mapper{
  my ($self, $arg) = @_;

  (defined $arg) &&
    ($self->{_mapper} = $arg );
  return $self->{_mapper};
}

sub update{
  my ($self, $arg) = @_;
  # remove xref, object_xref, identity_xref, depenedent_xref, go_xref, unmapped_object, (interpro???), external_synonym, projections.


Ian Longden's avatar
Ian Longden committed
40
 my $verbose = $self->mapper->verbose;
41

42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
  #####################################
  # first remove all the projections. #
  #####################################

  my $sql = "DELETE es FROM xref x, external_synonym es WHERE x.xref_id = es.xref_id and x.info_type = 'PROJECTION'";
  my $sth = $self->core->dbc->prepare($sql);
  $sth->execute();

  $sql = "DELETE object_xref FROM object_xref, xref WHERE object_xref.xref_id = xref.xref_id AND xref.info_type = 'PROJECTION'";
  $sth = $self->core->dbc->prepare($sql);
  $sth->execute();
  $sql = "DELETE xref FROM xref WHERE xref.info_type = 'PROJECTION'";
  $sth = $self->core->dbc->prepare($sql);
  $sth->execute();
  $sth->finish;

  #########################################
  # Get source_id to external_db_id       #
  #########################################

  my %name_to_external_db_id;
  $sql = "select external_db_id, db_name from external_db";
  $sth = $self->core->dbc->prepare($sql);
  $sth->execute();
  my ($id, $name);
  $sth->bind_columns(\$id, \$name);
  while($sth->fetch()){
    $name_to_external_db_id{$name} = $id;
   }
  $sth->finish;

  my %source_id_to_external_db_id;
74 75
 
  $sql = 'select s.source_id, s.name from source s, xref x where x.source_id = s.source_id group by s.source_id'; # only get those of interest
76 77 78 79 80 81 82 83 84 85 86 87 88
  $sth = $self->xref->dbc->prepare($sql);
  $sth->execute();
  $sth->bind_columns(\$id, \$name);
  while($sth->fetch()){
     if(defined($name_to_external_db_id{$name})){
      $source_id_to_external_db_id{$id} = $name_to_external_db_id{$name};
    }
    else{
      die "ERROR: Could not find $name in external_db table please add this too continue\n";
    }
  }
  $sth->finish;

Ian Longden's avatar
Ian Longden committed
89 90 91
  $sth = $self->xref->dbc->prepare("update xref set dumped = null"); # just incase this is being ran again
  $sth->execute;
  $sth->finish;
92 93 94 95 96 97
  
  ######################################
  # For each external_db to be updated #
  # Delete the existing ones           # 
  ######################################

98
  $sth = $self->xref->dbc->prepare('select s.name, count(*) from xref x, object_xref ox, source s where ox.xref_id = x.xref_id and x.source_id = s.source_id and ox_status = "DUMP_OUT"  group by s.name');
99 100 101 102 103
  $sth->execute();
  my $count;
  $sth->bind_columns(\$name,\$count);

  my $synonym_sth  =  $self->core->dbc->prepare('DELETE external_synonym FROM external_synonym, xref WHERE external_synonym.xref_id = xref.xref_id AND xref.external_db_id = ?');
104
  my $go_sth       =  $self->core->dbc->prepare('DELETE FROM go_xref');
105 106
  my $identity_sth =  $self->core->dbc->prepare('DELETE identity_xref FROM identity_xref, object_xref, xref WHERE identity_xref.object_xref_id = object_xref.object_xref_id AND object_xref.xref_id = xref.xref_id AND xref.external_db_id = ?');
  my $object_sth   =  $self->core->dbc->prepare('DELETE object_xref FROM object_xref, xref WHERE object_xref.xref_id = xref.xref_id AND xref.external_db_id = ?');
107
  my $dependent_sth = $self->core->dbc->prepare('DELETE d FROM dependent_xref d, xref x WHERE d.dependent_xref_id = x.xref_id and x.external_db_id = ?');
108 109 110 111
  my $xref_sth     =  $self->core->dbc->prepare('DELETE FROM xref WHERE xref.external_db_id = ?');
  my $unmapped_sth =  $self->core->dbc->prepare('DELETE FROM unmapped_object WHERE type="xref" and external_db_id = ?');


112 113 114
  my $transaction_start_sth  =  $self->core->dbc->prepare('start transaction');
  my $transaction_end_sth    =  $self->core->dbc->prepare('commit');

115 116
#
# ?? Is it faster to delete them all in one go with a external_db_id in (....) ???
117
# alternative load ottt etc that are not obtained from xrefs into xref table and then delete tables fully??
118 119 120 121
#



Ian Longden's avatar
Ian Longden committed
122 123
#  my $test =1;  # Can take a while so make optional when testing
#  if(!$test){
124
  $transaction_start_sth->execute();
125 126 127
  while($sth->fetch()){
    my $ex_id = $name_to_external_db_id{$name};

Ian Longden's avatar
Ian Longden committed
128
    print "Deleting data for $name from core before updating from new xref database\n" if ($verbose);
129
    $synonym_sth->execute($ex_id);
130 131 132
    if($name eq "GO"){
      $go_sth->execute();
    }
133 134
    $identity_sth->execute($ex_id);
    $object_sth->execute($ex_id);  
135
    $dependent_sth->execute($ex_id);
136 137 138 139
    $xref_sth->execute($ex_id);
    $unmapped_sth->execute($ex_id);
  }
  $sth->finish;
140
  $transaction_end_sth->execute();
Ian Longden's avatar
Ian Longden committed
141
#}
Ian Longden's avatar
Ian Longden committed
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160

  ######################################################
  # Sort out Interpro  (xrefs added by unmapped entries)
  ######################################################
  
  my $interpro_test_sth = $self->xref->dbc->prepare('select count(1) from xref x, source s where x.source_id = s.source_id and s.name like "InterPro"');
  
  $interpro_test_sth->execute();
  my ($int_count);
  $interpro_test_sth->bind_columns(\$int_count);
  $interpro_test_sth->fetch();
  if(defined($int_count) and $int_count){
    my $ex_id = $name_to_external_db_id{"InterPro"}; 
    $xref_sth->execute($ex_id);   
  }	
  $interpro_test_sth->finish;



161 162 163 164
  $synonym_sth->finish;
  $go_sth->finish;  
  $identity_sth->finish;
  $object_sth->finish;  
165
  $dependent_sth->finish;
166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
  $xref_sth->finish;
  $unmapped_sth->finish; 



  ##########################################
  # Get the offsets for object_xref, xref  #
  ##########################################

  $sth = $self->core->dbc->prepare('select MAX(xref_id) from xref');
  my $xref_offset;
  $sth->execute;
  $sth->bind_columns(\$xref_offset);
  $sth->fetch();
  $sth->finish;
Ian Longden's avatar
Ian Longden committed
181
  $xref_offset = 0 if(!defined($xref_offset));
182

183 184
  $self->add_meta_pair("xref_offset", $xref_offset);

185 186 187 188 189 190
  $sth = $self->core->dbc->prepare('select MAX(object_xref_id) from object_xref');
  my $object_xref_offset;
  $sth->execute;
  $sth->bind_columns(\$object_xref_offset);
  $sth->fetch();
  $sth->finish;
Ian Longden's avatar
Ian Longden committed
191
  $object_xref_offset = 0 if(!defined($object_xref_offset));
192

193 194
  $self->add_meta_pair("object_xref_offset", $object_xref_offset);

195 196 197 198 199

  ####################
  # Get analysis id's 
  ####################

200
  my %analysis_ids = $self->get_analysis(); # 
201 202


Ian Longden's avatar
Ian Longden committed
203
  print "xref offset is $xref_offset, object_xref offset is $object_xref_offset\n" if ($verbose);
204 205 206 207 208

  #####################################
  # Now add the new ones              #
  #####################################

Ian Longden's avatar
Ian Longden committed
209 210 211 212 213 214 215 216 217 218 219
  ###########################
  # SQL to get data from xref
  ###########################
  
  my $direct_sth = $self->xref->dbc->prepare('select x.xref_id, x.accession, x.label, x.version, x.description, x.info_text, ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type from xref x, object_xref ox  where ox.ox_status = "DUMP_OUT" and ox.xref_id = x.xref_id and x.source_id = ? and x.info_type = ? order by x.xref_id');
  
  #     $dependent_sth = $self->xref->dbc->prepare('select  x.xref_id, x.accession, x.label, x.version, x.description, x.info_text, ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type, d.master_xref_id from xref x, object_xref ox,  dependent_xref d where ox.ox_status = "DUMP_OUT" and ox.xref_id = x.xref_id and d.object_xref_id = ox.object_xref_id and x.source_id = ? and x.info_type = ? order by x.xref_id, ox.ensembl_id');
  
  $dependent_sth = $self->xref->dbc->prepare('select  x.xref_id, x.accession, x.label, x.version, x.description, x.info_text, ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type, ox.master_xref_id from xref x, object_xref ox where ox.ox_status = "DUMP_OUT" and ox.xref_id = x.xref_id and x.source_id = ? and x.info_type = ? order by x.xref_id, ox.ensembl_id');
  
  
220
  my $go_sql =(<<GSQL);
221
  SELECT  x.xref_id, x.accession, x.label, x.version, x.description, x.info_text, ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type, ox.master_xref_id, g.linkage_type 
222 223 224 225 226 227 228 229
    FROM (xref x, object_xref ox, go_xref g) 
      WHERE ox.ox_status = "DUMP_OUT" and  
            g.object_xref_id = ox.object_xref_id and 
            x.xref_id = ox.xref_id and 
            x.source_id = ? and x.info_type = ? 
            order by x.xref_id, ox.ensembl_id
GSQL

Ian Longden's avatar
Ian Longden committed
230
  $go_sth = $self->xref->dbc->prepare($go_sql);
231

Ian Longden's avatar
Ian Longden committed
232
  my $seq_sth   =   $self->xref->dbc->prepare('select x.xref_id, x.accession, x.label, x.version, x.description, x.info_text, ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type, i.query_identity, i.target_identity, i.hit_start, i.hit_end, i.translation_start, i.translation_end, i.cigar_line, i.score, i.evalue from xref x, object_xref ox, identity_xref i  where ox.ox_status = "DUMP_OUT" and i.object_xref_id = ox.object_xref_id and ox.xref_id = x.xref_id and x.source_id = ? and x.info_type = ? order by x.xref_id');
233 234


Ian Longden's avatar
Ian Longden committed
235 236 237 238 239 240 241 242 243 244 245 246
  
  ########################
  # SQL to add data to core
  #########################
  
  my $add_xref_sth           = $self->core->dbc->prepare('insert into xref (xref_id, external_db_id, dbprimary_acc, display_label, version, description, info_type, info_text) values (?, ?, ?, ?, ?, ?, ?, ?)');
  my $add_object_xref_sth    = $self->core->dbc->prepare('insert into object_xref (object_xref_id, ensembl_id, ensembl_object_type, xref_id, analysis_id) values (?, ?, ?, ?, ?)');
  my $add_identity_xref_sth  = $self->core->dbc->prepare('insert into identity_xref (object_xref_id, xref_identity, ensembl_identity, xref_start, xref_end, ensembl_start, ensembl_end, cigar_line, score, evalue) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)');
  my $add_go_xref_sth        = $self->core->dbc->prepare('insert into go_xref (object_xref_id, linkage_type) values (?, ?)');
  my $add_dependent_xref_sth = $self->core->dbc->prepare('insert ignore into dependent_xref (object_xref_id, master_xref_id, dependent_xref_id) values (?, ?, ?)');
  my $add_syn_sth            = $self->core->dbc->prepare('insert ignore into external_synonym (xref_id, synonym) values (?, ?)');
  
247
  $sth = $self->xref->dbc->prepare('select s.name, s.source_id, count(*), x.info_type, s.priority_description from xref x, object_xref ox, source s where ox.xref_id = x.xref_id  and x.source_id = s.source_id and ox_status = "DUMP_OUT" group by s.name, s.source_id, x.info_type');
248
  $sth->execute();
249 250
  my ($type, $source_id, $where_from);
  $sth->bind_columns(\$name,\$source_id, \$count, \$type, \$where_from);
Ian Longden's avatar
Ian Longden committed
251
  
252
  $transaction_start_sth->execute();
Ian Longden's avatar
Ian Longden committed
253
  
254
  while($sth->fetch()){
255 256 257
    if(defined($where_from) and $where_from ne ""){
      $where_from = "Generated via $where_from";
    }	
258 259
    my $ex_id = $name_to_external_db_id{$name};

Ian Longden's avatar
Ian Longden committed
260
    print "updating ($source_id) $name in core (for $type xrefs)\n" if ($verbose);
261 262 263 264 265 266 267 268 269 270 271 272 273 274

    my @xref_list=();  # process at end. Add synonyms and set dumped = 1;

   
    # dump SEQUENCE_MATCH, DEPENDENT, DIRECT, COORDINATE_OVERLAP, INFERRED_PAIR, (MISC?? same as direct come from official naming)  

    ### If DIRECT ,         xref, object_xref,                  (order by xref_id)  # maybe linked to more than one?
    ### if INFERRED_PAIR    xref, object_xref
    ### if MISC             xref, object_xref 

    
    if($type eq "DIRECT" or $type eq "INFERRED_PAIR" or $type eq "MISC"){
      my $count = 0;
      $direct_sth->execute($source_id, $type);
275 276
      my ($xref_id, $acc, $label, $version, $desc, $info, $object_xref_id, $ensembl_id, $ensembl_type); 
      $direct_sth->bind_columns(\$xref_id, \$acc, \$label, \$version, \$desc, \$info, \$object_xref_id, \$ensembl_id, \$ensembl_type);
277 278 279 280 281
      my $last_xref = 0;
      while($direct_sth->fetch){
        if($last_xref != $xref_id){
	  push @xref_list, $xref_id;
	  $count++;
282
	  $add_xref_sth->execute(($xref_id+$xref_offset), $ex_id, $acc, $label, $version, $desc, $type, $info || $where_from);
283 284
	  $last_xref = $xref_id;
        }
Ian Longden's avatar
Ian Longden committed
285
        $add_object_xref_sth->execute(($object_xref_id+$object_xref_offset), $ensembl_id, $ensembl_type, ($xref_id+$xref_offset), $analysis_ids{$ensembl_type});
286
      }  
Ian Longden's avatar
Ian Longden committed
287
      print "DIRECT $count\n" if ($verbose);
288 289 290 291 292 293 294 295
    }
 
    ### If DEPENDENT,       xref, object_xref , dependent_xref  (order by xref_id)  # maybe linked to more than one?
 
   elsif($type eq "DEPENDENT"){
     if($name eq "GO"){
       my $count = 0;
       $go_sth->execute($source_id, $type);
296 297
       my ($xref_id, $acc, $label, $version, $desc, $info,  $object_xref_id, $ensembl_id, $ensembl_type, $master_xref_id, $linkage_type); 
       $go_sth->bind_columns(\$xref_id, \$acc, \$label, \$version, \$desc, \$info, \$object_xref_id, \$ensembl_id, \$ensembl_type, \$master_xref_id, \$linkage_type);
298 299 300 301 302
       my $last_xref = 0;
       while($go_sth->fetch){
	 if($last_xref != $xref_id){
	   push @xref_list, $xref_id;
	   $count++;
303
	   $add_xref_sth->execute(($xref_id+$xref_offset), $ex_id, $acc, $label, $version, $desc, $type, $info || $where_from);
304 305
	   $last_xref = $xref_id;
	 }
306 307 308
	 if(defined($master_xref_id)){  # need to sort this out as all should habe one really. (interpro generates go without these!!)
	   $add_dependent_xref_sth->execute(($object_xref_id+$object_xref_offset), ($xref_id+$xref_offset), ($master_xref_id+$xref_offset) );
	 }
309 310
	 $add_object_xref_sth->execute(   ($object_xref_id+$object_xref_offset), $ensembl_id, $ensembl_type, ($xref_id+$xref_offset), $analysis_ids{$ensembl_type} );
	 $add_go_xref_sth->execute(       ($object_xref_id+$object_xref_offset), $linkage_type);
311
       }       
Ian Longden's avatar
Ian Longden committed
312
       print "GO $count\n" if ($verbose);     
313
     }
314 315
     else{
       my $count = 0;
316
       my $ox_count = 0;
317 318 319 320 321 322 323 324 325 326 327 328 329 330
       $dependent_sth->execute($source_id, $type);
       my ($xref_id, $acc, $label, $version, $desc, $info, $object_xref_id, $ensembl_id, $ensembl_type, $master_xref_id); 
       $dependent_sth->bind_columns(\$xref_id, \$acc, \$label, \$version, \$desc, \$info, \$object_xref_id, \$ensembl_id, \$ensembl_type, \$master_xref_id);
       my $last_xref = 0;
       my $last_ensembl = 0;
       while($dependent_sth->fetch){
	 if($last_xref != $xref_id){
	   push @xref_list, $xref_id;
	   $count++;
	   $add_xref_sth->execute(($xref_id+$xref_offset), $ex_id, $acc, $label || $acc, $version, $desc, $type, $info || $where_from);
	 }
	 if($last_xref != $xref_id or $last_ensembl != $ensembl_id){
	   $add_object_xref_sth->execute(($object_xref_id+$object_xref_offset), $ensembl_id, $ensembl_type, ($xref_id+$xref_offset), $analysis_ids{$ensembl_type});
	   $add_dependent_xref_sth->execute(($object_xref_id+$object_xref_offset), ($master_xref_id+$xref_offset), ($xref_id+$xref_offset) );
331
	   $ox_count++;
332
	 }
333
	 $last_xref = $xref_id;
334 335
	 $last_ensembl = $ensembl_id;
       }  
336
       print "DEP $count xrefs, $ox_count object_xrefs\n" if ($verbose);
337
     }
338 339 340 341 342 343
   }
   ### If SEQUENCE_MATCH   xref, object_xref,  identity_xref   (order by xref_id)  # maybe linked to more than one?

    elsif($type eq "SEQUENCE_MATCH"){
      my $count = 0;
      $seq_sth->execute($source_id, $type);
344
      my ($xref_id, $acc, $label, $version, $desc, $info, $object_xref_id, $ensembl_id, $ensembl_type); 
345
      my ( $query_identity, $target_identity, $hit_start, $hit_end, $translation_start, $translation_end, $cigar_line, $score, $evalue);
346
      $seq_sth->bind_columns(\$xref_id, \$acc, \$label, \$version, \$desc, \$info, \$object_xref_id, \$ensembl_id, \$ensembl_type,
347 348 349 350 351 352
			     \$query_identity, \$target_identity, \$hit_start, \$hit_end, \$translation_start, \$translation_end, \$cigar_line, \$score, \$evalue);
      my $last_xref = 0;
      while($seq_sth->fetch){
        if($last_xref != $xref_id){
	  push @xref_list, $xref_id;
	  $count++;
353
	  $add_xref_sth->execute(($xref_id+$xref_offset), $ex_id, $acc, $label, $version, $desc, $type, $info || $where_from);
354 355
	  $last_xref = $xref_id;
        }
356
        $add_object_xref_sth->execute(   ($object_xref_id+$object_xref_offset), $ensembl_id, $ensembl_type, ($xref_id+$xref_offset), $analysis_ids{$ensembl_type});
357
	$add_identity_xref_sth->execute( ($object_xref_id+$object_xref_offset), $query_identity, $target_identity, $hit_start, $hit_end, 
Ian Longden's avatar
Ian Longden committed
358
					 $translation_start, $translation_end, $cigar_line, $score, $evalue);  
359
      }  
Ian Longden's avatar
Ian Longden committed
360
      print "SEQ $count\n" if ($verbose);
361 362
    }
    else{
Ian Longden's avatar
Ian Longden committed
363
      print "PROBLEM:: what type is $type\n";
364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
    }	


    # Transfer data for synonym and set xref database xrefs to dumped.
    if(@xref_list){
      my $syn_sql = "select xref_id, synonym from synonym where xref_id in(".join(", ",@xref_list).")";
      my $syn_sth    = $self->xref->dbc->prepare($syn_sql);
      $syn_sth->execute();
    
      my ($xref_id, $syn);
      $syn_sth->bind_columns(\$xref_id, \$syn);
      while($syn_sth->fetch()){
	$add_syn_sth->execute(($xref_id+$xref_offset), $syn)
      }
      $syn_sth->finish;

Ian Longden's avatar
Ian Longden committed
380
      my $xref_dumped_sth = $self->xref->dbc->prepare("update xref set dumped = 2 where xref_id in (".join(", ",@xref_list).")");
381 382 383
      $xref_dumped_sth->execute(); 
      $xref_dumped_sth->finish;
    }	
384
 
385

386 387
  }
  $sth->finish;
388
  $transaction_end_sth->execute();
389 390 391 392 393 394 395


  #######################################
  # Remember to do unmapped entries
  # 1) make sure the reason exist/create them and get the ids for these.
  # 2) Process where dumped is null and type = DIRECT, DEPENDENT, SEQUENCE_MATCH, MISC seperately
  ########################################
396

397 398 399 400 401 402 403
  # Get the cutoff values
  $sth = $self->xref->dbc->prepare("select job_id, percent_query_cutoff, percent_target_cutoff from mapping limit 1");
  $sth->execute();
  my ($job_id, $q_cut, $t_cut);
  $sth->bind_columns(\$job_id, \$q_cut, \$t_cut);
  $sth->fetch;
  $sth->finish;
404 405


406 407 408
  my %summary_failed;
  my %desc_failed;
  my %reason_id;
409

410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452
  $summary_failed{"NO_STABLE_ID"} = "Failed to find Stable ID";
  $desc_failed{"NO_STABLE_ID"}    = "Stable ID that this xref was linked to no longer exists";

  $summary_failed{"FAILED_MAP"} = "Failed to match";
  $desc_failed{"FAILED_MAP"}    = "Unable to match to any ensembl entity at all";

  $summary_failed{"NO_MAPPING"} = "No mapping done";
  $desc_failed{"NO_MAPPING"}    = "No mapping done for this type of xref";

  $summary_failed{"FAILED_THRESHOLD"} = "Failed to match at thresholds";
  $desc_failed{"FAILED_THRESHOLD"}    = "Unable to match at the thresholds of $q_cut\% for the query or $t_cut\% for the target";

  $summary_failed{"MASTER_FAILED"} = "Master failed";
  $desc_failed{"MASTER_FAILED"}    = "The dependent xref was not matched due to the master xref not being mapped";

  $summary_failed{"NO_MASTER"} = "No Master";
  $desc_failed{"NO_MASTER"}    = "The dependent xref was not matched due to there being no master xref";

  

  foreach my $key (keys %desc_failed){
    $sth = $self->core->dbc->prepare("select unmapped_reason_id from unmapped_reason where full_description like '".$desc_failed{$key}."'");
    $sth->execute();
    my $failed_id=undef;
    $sth->bind_columns(\$failed_id);
    $sth->fetch;
    $sth->finish;
    if(!defined($failed_id)){
      $sth = $self->core->dbc->prepare('insert into unmapped_reason (summary_description, full_description) values("'.$summary_failed{$key}.'", "'.$desc_failed{$key}.'")');
      $sth->execute();
      $failed_id = $sth->{'mysql_insertid'};
      $sth->finish
    }
    $reason_id{$key} = $failed_id;
  }


  ## Dump interpro xrefs and interpro table
  # use NO_MAPPING as unmapped_reason
  
  # dump xrefs;
  # dump unmapped reasons
  # set xref status to dumped
453 454
  
  $transaction_start_sth->execute();
455 456 457 458 459 460 461 462 463 464 465

  my $get_xref_interpro_sth  = $self->xref->dbc->prepare("select x.xref_id, x.accession, x.version, x.label, x.description, x.info_type, x.info_text from xref x ,source s where s.source_id = x.source_id and s.name like 'Interpro'");

  my $get_interpro_sth       = $self->xref->dbc->prepare("select interpro, pfam from interpro");
  my $add_interpro_sth       = $self->core->dbc->prepare("insert into interpro (interpro_ac, id) values (?, ?)");
  my $set_unmapped_sth       =  $self->core->dbc->prepare("insert into unmapped_object (type, analysis_id, external_db_id, identifier, unmapped_reason_id ) values ('xref', ?, ?, ?, ?)");
  my @xref_list =();
  
  
  my $ex_id = $name_to_external_db_id{"Interpro"};
  my $analysis_id = $analysis_ids{'Transcript'};   # No real analysis here but in table it is set to not NULL
466
 
467 468 469 470 471 472 473 474 475 476 477 478
  
  $get_xref_interpro_sth->execute();
  my ($xref_id, $acc, $version, $label, $desc, $info);
  $get_xref_interpro_sth->bind_columns(\$xref_id, \$acc, \$version, \$label, \$desc, \$type, \$info);
  
  while($get_xref_interpro_sth->fetch){
    $add_xref_sth->execute(($xref_id+$xref_offset), $ex_id, $acc, $label, $version, $desc, $type, $info);
    $set_unmapped_sth->execute($analysis_id, $ex_id, $acc, $reason_id{"NO_MAPPING"} );
    push @xref_list, $xref_id;
  }
  $get_xref_interpro_sth->finish;
  $set_unmapped_sth->finish;
479

480
  if(@xref_list){
Ian Longden's avatar
Ian Longden committed
481
    my $xref_dumped_sth = $self->xref->dbc->prepare("update xref set dumped = 3 where xref_id in (".join(", ",@xref_list).")");
482 483 484
    $xref_dumped_sth->execute(); 
    $xref_dumped_sth->finish;
  }
485

486 487 488 489 490 491 492 493 494 495 496
  # delete all entries in interpro table
  my $del_sth = $self->core->dbc->prepare("delete from interpro");
  $del_sth->execute;
  $del_sth->finish;
  
  # add new entries to interpro table
  $get_interpro_sth->execute();
  my ($inter);
  $get_interpro_sth->bind_columns(\$inter,\$id);
  while($get_interpro_sth->fetch){
    $add_interpro_sth->execute($inter, $id);
497
  }
498 499 500
  $get_interpro_sth->finish;
  $add_interpro_sth->finish;

501 502
  $transaction_end_sth->execute();

503 504

#  foreach my $type (qw(MISC DEPENDENT DIRECT SEQUENCE_MATCH INFERRED_PAIR)){
505
  $transaction_start_sth->execute();
506 507 508 509 510 511 512 513

  ##########
  # DIRECT #
  ##########

  my $dbname;
  $sql =(<<DIR);
  SELECT  x.xref_id, x.accession, x.version, x.label, x.description, x.info_type, x.info_text, s.name 
514 515
    FROM source s,xref x
      LEFT JOIN  object_xref ox ON ox.xref_id = x.xref_id
516 517
      WHERE x.source_id = s.source_id 
        AND x.dumped is null 
518
        AND ox.ox_status != 'FAILED_PRIORITY'
519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538
        AND x.info_type = 'DIRECT'
DIR

  my $direct_unmapped_sth = $self->xref->dbc->prepare($sql);
  $direct_unmapped_sth->execute();
  $direct_unmapped_sth->bind_columns(\$xref_id, \$acc, \$version, \$label, \$desc, \$type, \$info, \$dbname);

  @xref_list = ();
  $analysis_id = $analysis_ids{'Transcript'};   # No real analysis here but in table it is set to not NULL
  while($direct_unmapped_sth->fetch()){
    my $ex_id = $name_to_external_db_id{$dbname};
    $add_xref_sth->execute(($xref_id+$xref_offset), $ex_id, $acc, $label, $version, $desc, $type, $info);   
    $set_unmapped_sth->execute($analysis_id, $ex_id, $acc, $reason_id{"NO_STABLE_ID"});
    push @xref_list, $xref_id;
  }
  $direct_unmapped_sth->finish;
  $set_unmapped_sth->finish;


  if(@xref_list){
Ian Longden's avatar
Ian Longden committed
539
    my $xref_dumped_sth = $self->xref->dbc->prepare("update xref set dumped = 4 where xref_id in (".join(", ",@xref_list).")");
540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572
    $xref_dumped_sth->execute(); 
    $xref_dumped_sth->finish;
  }

  ########
  # MISC #
  ########

  $sql =(<<MIS);
  SELECT  x.xref_id, x.accession, x.version, x.label, x.description, x.info_type, x.info_text, s.name 
    FROM xref x, source s 
      WHERE x.source_id = s.source_id 
        AND x.dumped is null 
        AND x.info_type = 'MISC'
MIS

  my $misc_unmapped_sth = $self->xref->dbc->prepare($sql);
  $misc_unmapped_sth->execute();
  $misc_unmapped_sth->bind_columns(\$xref_id, \$acc, \$version, \$label, \$desc, \$type, \$info, \$dbname);

  @xref_list = ();
  $analysis_id = $analysis_ids{'Transcript'};   # No real analysis here but in table it is set to not NULL
  while($misc_unmapped_sth->fetch()){
    my $ex_id = $name_to_external_db_id{$dbname};
    $add_xref_sth->execute(($xref_id+$xref_offset), $ex_id, $acc, $label, $version, $desc, $type, $info);   
    $set_unmapped_sth->execute($analysis_id, $ex_id, $acc, $reason_id{"NO_MAPPING"});
    push @xref_list, $xref_id;
  }
  $misc_unmapped_sth->finish;
  $set_unmapped_sth->finish;


  if(@xref_list){
Ian Longden's avatar
Ian Longden committed
573
    my $xref_dumped_sth = $self->xref->dbc->prepare("update xref set dumped = 5 where xref_id in (".join(", ",@xref_list).")");
574 575 576 577 578 579 580 581 582 583 584 585
    $xref_dumped_sth->execute(); 
    $xref_dumped_sth->finish;
  }

  #############
  # DEPENDENT #
  #############

  $sql = (<<DEP);
    SELECT  x.xref_id, x.accession, x.version, x.label, x.description, x.info_type, x.info_text, s.name, mx.accession 
      FROM xref mx, source s, xref x 
          LEFT JOIN dependent_xref dx ON  dx.dependent_xref_id = x.xref_id
586
          LEFT JOIN object_xref ox ON ox.xref_id = x.xref_id
587 588 589
        WHERE x.source_id = s.source_id 
          AND dx.master_xref_id = mx.xref_id 
          AND x.dumped is null 
590
          AND ox.ox_status != 'FAILED_PRIORITY'
591
          AND x.info_type = 'DEPENDENT'
592
          ORDER BY x.xref_id
593 594 595 596 597 598 599 600 601 602
DEP

  my $dep_unmapped_sth = $self->xref->dbc->prepare($sql);
  $dep_unmapped_sth->execute();
  my $parent;
  $dep_unmapped_sth->bind_columns(\$xref_id, \$acc, \$version, \$label, \$desc, \$type, \$info, \$dbname, \$parent);

  $set_unmapped_sth  =  $self->core->dbc->prepare("insert into unmapped_object (type, analysis_id, external_db_id, identifier, unmapped_reason_id, parent ) values ('xref', ?, ?, ?, '".$reason_id{"MASTER_FAILED"}."', ?)");

  @xref_list = ();
603
  my $last_xref= 0;
604 605
  while($dep_unmapped_sth->fetch()){
    my $ex_id = $name_to_external_db_id{$dbname};
606 607 608 609
    if($last_xref != $xref_id){
      $add_xref_sth->execute(($xref_id+$xref_offset), $ex_id, $acc, $label||$acc, $version, $desc, $type, $info);   
    }
    $last_xref = $xref_id;
610 611 612 613 614 615 616
    $set_unmapped_sth->execute($analysis_id, $ex_id, $acc, $parent);
    push @xref_list, $xref_id;
  }
  $dep_unmapped_sth->finish;
  $set_unmapped_sth->finish;

  if(@xref_list){
Ian Longden's avatar
Ian Longden committed
617
    my $xref_dumped_sth = $self->xref->dbc->prepare("update xref set dumped = 6 where xref_id in (".join(", ",@xref_list).")");
618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637
    $xref_dumped_sth->execute(); 
    $xref_dumped_sth->finish;
  }

  ##################
  # SEQUENCE_MATCH #
  ##################

  $sql = (<<SEQ);
    SELECT  x.xref_id, x.accession, x.version, x.label, x.description, x.info_type, x.info_text, 
            s.name, px.sequence_type, 
            ox.ensembl_object_type, ox.ensembl_id,
            ix.query_identity, ix.target_identity
      FROM source s, primary_xref px, xref x
        LEFT JOIN object_xref ox ON ox.xref_id = x.xref_id
        LEFT JOIN identity_xref ix ON ix.object_xref_id = ox.object_xref_id
      WHERE x.source_id = s.source_id
	  AND px.xref_id = x.xref_id
          AND x.dumped is null 
          AND x.info_type = 'SEQUENCE_MATCH'
638
          AND ox.ox_status != 'FAILED_PRIORITY'
639 640
          ORDER  BY x.xref_id
          
641 642 643 644 645 646 647 648 649 650 651 652 653
SEQ


  my $seq_unmapped_sth = $self->xref->dbc->prepare($sql);
  $seq_unmapped_sth->execute();
  my ($ensembl_object_type, $ensembl_id, $q_id, $t_id, $seq_type) ;
  $seq_unmapped_sth->bind_columns(\$xref_id, \$acc, \$version, \$label, \$desc, \$type, \$info, \$dbname, \$seq_type, \$ensembl_object_type, \$ensembl_id, \$q_id, \$t_id);

  my $set_unmapped_no_sth     = $self->core->dbc->prepare("insert into unmapped_object (type, analysis_id, external_db_id, identifier, unmapped_reason_id, ensembl_object_type ) values ('xref', ?, ?, ?, '".$reason_id{"FAILED_MAP"}."', ?)");
  my $set_unmapped_failed_sth = $self->core->dbc->prepare("insert into unmapped_object (type, analysis_id, external_db_id, identifier, unmapped_reason_id, query_score, target_score, ensembl_id, ensembl_object_type ) values ('xref', ?, ?, ?, '".$reason_id{"FAILED_THRESHOLD"}."',?,?,?,?)");


  @xref_list = ();
654
  $last_xref = 0;
655 656
  while($seq_unmapped_sth->fetch()){
    my $ex_id = $name_to_external_db_id{$dbname};
657 658 659 660
    if($last_xref != $xref_id){
      $add_xref_sth->execute(($xref_id+$xref_offset), $ex_id, $acc, $label, $version, $desc, $type, $info);   
    }
    $last_xref = $xref_id;
661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682
    if(defined($ensembl_id)){
      $analysis_id= $analysis_ids{$ensembl_object_type};
      $set_unmapped_failed_sth->execute($analysis_id, $ex_id, $acc, $q_id, $t_id, $ensembl_id, $ensembl_object_type );
    }
    else{
      if($seq_type eq "dna"){
	$ensembl_object_type = "Transcript";
      }
      else{
	$ensembl_object_type = "Translation";
      }	
      $analysis_id = $analysis_ids{$ensembl_object_type};
      $set_unmapped_no_sth->execute($analysis_id, $ex_id, $acc, $ensembl_object_type);
    }
    push @xref_list, $xref_id;
  }
  $seq_unmapped_sth->finish;
  $set_unmapped_no_sth->finish;
  $set_unmapped_failed_sth->finish;


  if(@xref_list){
Ian Longden's avatar
Ian Longden committed
683
    my $xref_dumped_sth = $self->xref->dbc->prepare("update xref set dumped = 7 where xref_id in (".join(", ",@xref_list).")");
684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722
    $xref_dumped_sth->execute(); 
    $xref_dumped_sth->finish;
  }

  ###########################
  # WEL (What ever is left).#
  ###########################
  
  # These are those defined as dependent but the master never existed and the xref and their descriptions etc are loaded first
  # with the dependencys added later so did not know they had no masters at time of loading.
  # (e.g. EntrezGene, WikiGene, MIN_GENE, MIM_MORBID)

 $sql = (<<WEL);
    SELECT  x.xref_id, x.accession, x.version, x.label, x.description, x.info_type, x.info_text, s.name
      FROM source s, xref x 
        WHERE x.source_id = s.source_id 
          AND x.dumped is null 
          AND x.info_type = 'DEPENDENT'
WEL

  
  my $wel_unmapped_sth = $self->xref->dbc->prepare($sql);
  $wel_unmapped_sth->execute();
  $wel_unmapped_sth->bind_columns(\$xref_id, \$acc, \$version, \$label, \$desc, \$type, \$info, \$dbname);

  $set_unmapped_sth  =  $self->core->dbc->prepare("insert into unmapped_object (type, analysis_id, external_db_id, identifier, unmapped_reason_id) values ('xref', ?, ?, ?, '".$reason_id{"NO_MASTER"}."')");

  $analysis_id = $analysis_ids{'Transcript'};   # No real analysis here but in table it is set to not NULL
  @xref_list = ();
  while($wel_unmapped_sth->fetch()){
    my $ex_id = $name_to_external_db_id{$dbname};
    $add_xref_sth->execute(($xref_id+$xref_offset), $ex_id, $acc, $label, $version, $desc, $type, $info);   
    $set_unmapped_sth->execute($analysis_id, $ex_id, $acc);
    push @xref_list, $xref_id;
  }
  $wel_unmapped_sth->finish;
  $set_unmapped_sth->finish;

  if(@xref_list){
Ian Longden's avatar
Ian Longden committed
723
    my $xref_dumped_sth = $self->xref->dbc->prepare("update xref set dumped = 8 where xref_id in (".join(", ",@xref_list).")");
724 725 726 727 728
    $xref_dumped_sth->execute(); 
    $xref_dumped_sth->finish;
  }


729
  $transaction_end_sth->execute();
730

731 732 733
  my $sth_stat = $self->xref->dbc->prepare("insert into process_status (status, date) values('core_loaded',now())");
  $sth_stat->execute();
  $sth_stat->finish;
734

735

736 737 738 739 740 741 742 743

}


sub get_analysis{
  my $self = shift;
  

Ian Longden's avatar
Ian Longden committed
744 745
  my %typeToLogicName = ( 'Gene'        => 'XrefExonerateDNA',
                          'Transcript'  => 'XrefExonerateDNA',
746 747 748 749
                          'Translation' => 'XrefExonerateProtein' );

  my %analysis_id;

Ian Longden's avatar
Ian Longden committed
750
  foreach my $key (qw(Gene Transcript Translation)){
751 752 753 754 755 756 757 758 759 760 761 762 763 764 765
    
    my $logic_name = $typeToLogicName{$key};
    
    my $sth = $self->core->dbc->prepare("SELECT analysis_id FROM analysis WHERE logic_name='" . $logic_name ."'");
    
    $sth->execute();
    
    my $analysis_id;
    
    if (my @row = $sth->fetchrow_array()) {
      
      $analysis_id{$key} = $row[0];
      
    } else {
      
Ian Longden's avatar
Ian Longden committed
766
      print "No analysis with logic_name $logic_name found, creating ...\n" if ($self->verbose);
767 768 769 770 771 772 773 774 775 776 777 778
      $sth = $self->core->dbc->prepare("INSERT INTO analysis (logic_name, created) VALUES ('" . $logic_name. "',NOW())");
      # TODO - other fields in analysis table
      $sth->execute();
      $analysis_id{$key} = $sth->{'mysql_insertid'};
    }
    $sth->finish();
    
  }
  return %analysis_id;
  
}

779 780


781
1;