From b2cb619b53679786cb3acfe16a8a99da39788eb0 Mon Sep 17 00:00:00 2001 From: Magali Ruffier <mr6@ebi.ac.uk> Date: Mon, 8 Feb 2016 17:29:20 +0000 Subject: [PATCH] remove and add all ensembl id mappings in one go --- .../XrefMapper/ProcessPrioritys.pm | 108 ++++++------------ 1 file changed, 36 insertions(+), 72 deletions(-) diff --git a/misc-scripts/xref_mapping/XrefMapper/ProcessPrioritys.pm b/misc-scripts/xref_mapping/XrefMapper/ProcessPrioritys.pm index 91e9b64082..dac09b1667 100644 --- a/misc-scripts/xref_mapping/XrefMapper/ProcessPrioritys.pm +++ b/misc-scripts/xref_mapping/XrefMapper/ProcessPrioritys.pm @@ -200,7 +200,7 @@ SEQCP $update_x_sth->execute($xref_id); # Copy synonyms across if they are missing $syn_copy_sth->execute($best_xref_id, $xref_id); - $self->process_dependents($xref_id, $best_xref_id, \@best_ensembl_id); + $self->process_dependents($xref_id, $best_xref_id); } } else{ # not DUMP_OUT @@ -253,71 +253,62 @@ SEQCP } sub process_dependents{ - my ($self, $old_master_xref_id, $new_master_xref_id, $best_ensembl_ids) = @_; - -# $best_ensembl_ids = list of e! IDs from highest priority xref for this accession # master xref IDs are entries for the current accession via various methods. We take dependent xrefs from the old and add to the new + my ($self, $old_master_xref_id, $new_master_xref_id) = @_; + - my $dep_sth = $self->xref->dbc->prepare("select distinct dependent_xref_id, dx.linkage_annotation, dx.linkage_source_id from dependent_xref dx where dx.master_xref_id = ?"); + my $matching_ens_sth = $self->xref->dbc->prepare("select distinct ensembl_object_type, ensembl_id from object_xref where ox_status not in ('FAILED_CUTOFF') and xref_id = ? order by ensembl_object_type"); + my $dep_sth = $self->xref->dbc->prepare("select distinct dx.dependent_xref_id, dx.linkage_annotation, dx.linkage_source_id, ox.ensembl_object_type from dependent_xref dx, object_xref ox where ox.xref_id = dx.dependent_xref_id and ox.master_xref_id = dx.master_xref_id and dx.master_xref_id = ? order by ox.ensembl_object_type"); my $insert_dep_x_sth = $self->xref->dbc->prepare("insert into dependent_xref(master_xref_id, dependent_xref_id, linkage_annotation, linkage_source_id) values(?, ?, ?, ?)"); my $insert_dep_ox_sth = $self->xref->dbc->prepare("insert ignore into object_xref(master_xref_id, ensembl_object_type, ensembl_id, linkage_type, ox_status, xref_id) values(?, ?, ?, 'DEPENDENT', 'DUMP_OUT', ?)"); my $dep_ox_sth = $self->xref->dbc->prepare("select object_xref_id from object_xref where master_xref_id = ? and ensembl_object_type = ? and ensembl_id = ? and linkage_type = 'DEPENDENT' AND ox_status = 'DUMP_OUT' and xref_id = ?"); my $insert_dep_go_sth = $self->xref->dbc->prepare("insert ignore into go_xref values(?, ?, ?)"); my $insert_ix_sth = $self->xref->dbc->prepare("insert ignore into identity_xref(object_xref_id, query_identity, target_identity) values(?, 100, 100)"); - my $get_type_sth = $self->xref->dbc->prepare("SELECT ensembl_object_type FROM object_xref WHERE ensembl_id = ? AND xref_id = ?"); my @master_xrefs = ($old_master_xref_id); my $recursive = 0; - my @old_ensembl_ids = $self->_get_old_ensembl_ids_associated_with_xref($old_master_xref_id, $best_ensembl_ids); - - # determine object type of the ensembl_ids we are taking links from and connecting to. - my %splonk; - foreach my $ens_id (@$best_ensembl_ids) { - $get_type_sth->execute($ens_id,$new_master_xref_id); - my ($type) = $get_type_sth->fetchrow_array(); - $splonk{$ens_id} = $type; - } - foreach my $ens_id (@old_ensembl_ids) { - $get_type_sth->execute($ens_id,$old_master_xref_id); - my ($type) = $get_type_sth->fetchrow_array(); - $splonk{$ens_id} = $type; - } + my ($new_object_type, $new_ensembl_id); + my ($dep_xref_id, $linkage_annotation, $new_object_xref_id, $linkage_source_id, $object_type); + # Create a hash of all possible mappings for this accession + my %ensembl_ids; + $matching_ens_sth->execute($new_master_xref_id); + $matching_ens_sth->bind_columns(\$new_object_type, \$new_ensembl_id); + while ($matching_ens_sth->fetch()) { + push @{ $ensembl_ids{$new_object_type} }, $new_ensembl_id; + } + ## Loop through all dependent xrefs of old master xref, and recurse while(my $xref_id = pop(@master_xrefs)){ - my ($dep_xref_id, $linkage_annotation, $new_object_xref_id, $linkage_source_id); # Get dependent xrefs, be they gene, transcript or translation $dep_sth->execute($xref_id); - $dep_sth->bind_columns(\$dep_xref_id, \$linkage_annotation, \$linkage_source_id); + $dep_sth->bind_columns(\$dep_xref_id, \$linkage_annotation, \$linkage_source_id, \$object_type); while($dep_sth->fetch()){ - # Duplicate each dependent for the new master xref if it is the first in the chain, and detach from the original + + # Remove all mappings to low priority xrefs + # Then delete any leftover identity or go xrefs of it + $self->_detach_object_xref($xref_id, $dep_xref_id, $object_type); + + # Duplicate each dependent for the new master xref if it is the first in the chain unless ($recursive) { $insert_dep_x_sth->execute($new_master_xref_id, $dep_xref_id, $linkage_annotation, $linkage_source_id); - # then remove any reference to master xref from the dependent xref where the new ensembl IDs are involved - # The object type here should be decided by the dependent xref. - $self->_detach_object_xref($xref_id, $dep_xref_id, $best_ensembl_ids,\%splonk); } - # also set type of object_xref from old_master_xref to FAILED_PRIORITY - # Then delete any leftover identity or go xrefs of it - $self->_detach_object_xref($xref_id, $dep_xref_id, \@old_ensembl_ids,\%splonk); - # Loop through all chosen (best) ensembl ids mapped to priority xref, and connect them with object_xrefs - foreach my $best_ensembl_id (@$best_ensembl_ids) { - my $e_type = $splonk{$best_ensembl_id}; + foreach my $ensembl_id (@{ $ensembl_ids{$object_type} }) { # Add new object_xref for each best_ensembl_id. - $insert_dep_ox_sth->execute($new_master_xref_id, $e_type, $best_ensembl_id, $dep_xref_id); + $insert_dep_ox_sth->execute($new_master_xref_id, $object_type, $ensembl_id, $dep_xref_id); ## If there is a linkage_annotation, it is a go xref if ($linkage_annotation) { ## Fetch the newly created object_xref to add them to go_xref - $dep_ox_sth->execute($new_master_xref_id, $e_type, $best_ensembl_id, $dep_xref_id); + $dep_ox_sth->execute($new_master_xref_id, $object_type, $ensembl_id, $dep_xref_id); $dep_ox_sth->bind_columns(\$new_object_xref_id); while ($dep_ox_sth->fetch()) { - $insert_dep_go_sth->execute($new_object_xref_id, $linkage_annotation, $new_master_xref_id); + $insert_dep_go_sth->execute($new_object_xref_id, $linkage_annotation, $new_master_xref_id); $insert_ix_sth->execute($new_object_xref_id); } } @@ -328,71 +319,44 @@ sub process_dependents{ $new_master_xref_id = $dep_xref_id; } + $matching_ens_sth->finish(); $dep_sth->finish(); $insert_dep_x_sth->finish(); $insert_dep_ox_sth->finish(); $dep_ox_sth->finish(); $insert_dep_go_sth->finish(); $insert_ix_sth->finish(); - $get_type_sth->finish(); -} - -# broken out of process_dependents. -# Get a list of IDs that are not associated with the new xref -sub _get_old_ensembl_ids_associated_with_xref { - my $self = shift; - my $old_master_xref_id = shift; - my $new_ensembl_ids = shift; - my %new_id_lookup; - map { $new_id_lookup{$_} = 1 } @$new_ensembl_ids; - - my $old_ensembl_id; - my @old_ensembl_ids; - my $old_ens_id_sth = $self->xref->dbc->prepare("select distinct ensembl_id from object_xref where xref_id = ?"); - - $old_ens_id_sth->execute($old_master_xref_id); - $old_ens_id_sth->bind_columns(\$old_ensembl_id); - while ($old_ens_id_sth->fetch()) { - unless (exists $new_id_lookup{$old_ensembl_id}) { - push @old_ensembl_ids, $old_ensembl_id; - } - } - $old_ens_id_sth->finish(); - return @old_ensembl_ids; } # Delete identity xrefs, go_xrefs for a given object xref # Set unimportant object_xrefs to FAILED_PRIORITY, and delete all those that remain sub _detach_object_xref { my $self = shift; - my ($xref_id, $dep_xref_id, $ids, $splonk) = @_; - my %id_type_hash = %$splonk; + my ($xref_id, $dep_xref_id, $object_type) = @_; # Drop all the identity and go xrefs for the dependents of an xref my $remove_dep_ox_sth = $self->xref->dbc->prepare( "DELETE ix, g FROM object_xref ox \ LEFT JOIN identity_xref ix ON ix.object_xref_id = ox.object_xref_id \ LEFT JOIN go_xref g ON g.object_xref_id = ox.object_xref_id \ - WHERE master_xref_id = ? AND ensembl_object_type = ? AND xref_id = ? AND ensembl_id = ?" + WHERE master_xref_id = ? AND ensembl_object_type = ? AND xref_id = ?" ); # Fail the object_xrefs that did link to the deleted identity/go xrefs. # This only updates one of potentially many, due to table contraints. my $update_dep_ox_sth = $self->xref->dbc->prepare( "UPDATE IGNORE object_xref SET ox_status = 'FAILED_PRIORITY' \ - WHERE master_xref_id = ? AND ensembl_object_type = ? AND xref_id = ? AND ensembl_id = ? AND ox_status = 'DUMP_OUT'" + WHERE master_xref_id = ? AND ensembl_object_type = ? AND xref_id = ? AND ox_status = 'DUMP_OUT'" ); # This deletes everything left behind by the previous query. my $clean_dep_ox_sth = $self->xref->dbc->prepare( "DELETE FROM object_xref \ - WHERE master_xref_id = ? AND ensembl_object_type = ? AND xref_id = ? AND ensembl_id = ? AND ox_status = 'DUMP_OUT'" + WHERE master_xref_id = ? AND ensembl_object_type = ? AND xref_id = ? AND ox_status = 'DUMP_OUT'" ); - foreach my $id (@$ids) { - $remove_dep_ox_sth->execute($xref_id, $id_type_hash{$id}, $dep_xref_id, $id); - # change status of object_xref to FAILED_PRIORITY for record keeping - $update_dep_ox_sth->execute($xref_id, $id_type_hash{$id}, $dep_xref_id, $id); - # delete the duplicates. - $clean_dep_ox_sth->execute($xref_id, $id_type_hash{$id}, $dep_xref_id, $id); - } + $remove_dep_ox_sth->execute($xref_id, $object_type, $dep_xref_id); + # change status of object_xref to FAILED_PRIORITY for record keeping + $update_dep_ox_sth->execute($xref_id, $object_type, $dep_xref_id); + # delete the duplicates. + $clean_dep_ox_sth->execute($xref_id, $object_type, $dep_xref_id); $remove_dep_ox_sth->finish(); $update_dep_ox_sth->finish(); -- GitLab