From 6d4dbc50c53813c8327301829e65687d975119a7 Mon Sep 17 00:00:00 2001 From: Kieron Taylor <ktaylor@ebi.ac.uk> Date: Tue, 29 May 2012 14:01:48 +0000 Subject: [PATCH] Xref table overhaul. Uniqueness enforced, coupled with insert-then-query execution order in DBEntryAdaptor. Data patching is slow. --- modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm | 453 ++++++++---------- .../homo_sapiens/core/xref.sql | 1 - .../homo_sapiens/core/xref.txt | 370 +++++++------- sql/patch_67_68_b.sql | 46 ++ 4 files changed, 439 insertions(+), 431 deletions(-) create mode 100644 sql/patch_67_68_b.sql diff --git a/modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm index ce3778aa5f..e886ad6112 100644 --- a/modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/DBEntryAdaptor.pm @@ -132,7 +132,7 @@ sub fetch_by_dbID { -type => $type, -secondary_db_name => $secondary_db_name, -secondary_db_table => $secondary_db_table, - -description => $description + -description => $description ); @@ -623,7 +623,7 @@ sub fetch_by_db_accession { =head2 store - Arg [1] : Bio::EnsEMBL::DBEntry $exObj + Arg [1] : Bio::EnsEMBL::DBEntry $dbEntry The DBEntry (xref) to be stored Arg [2] : Int $ensID The dbID of an EnsEMBL object to associate with this external @@ -648,7 +648,7 @@ sub fetch_by_db_accession { =cut sub store { - my ( $self, $exObj, $ensID, $ensType, $ignore_release ) = @_; + my ( $self, $dbEntry, $ensID, $ensType, $ignore_release ) = @_; my $dbJustInserted; @@ -658,7 +658,6 @@ sub store { # my $ensembl_id; - if ( defined($ensID) ) { if ( $ensID =~ /^\d+$/ ) { $ensembl_id = $ensID; @@ -683,222 +682,59 @@ sub store { throw("Invalid dbID passed to DBEntryAdaptor->store()"); } } - - # - # Check for the existance of the external_db, throw if it does not exist - # - - my ($dbRef, $release_clause); - - if ( !$ignore_release ) { - - if(defined $exObj->release()){ - $release_clause = " AND db_release = ?"; - } - else{ - $release_clause = " AND db_release is NULL"; - } - - - my $sth = $self->prepare( " - SELECT external_db_id - FROM external_db - WHERE db_name = ? - $release_clause" ); - - $sth->bind_param( 1, $exObj->dbname(), SQL_VARCHAR ); - $sth->bind_param( 2, $exObj->release(), SQL_VARCHAR ) if defined $exObj->release(); - $sth->execute(); - - ($dbRef) = $sth->fetchrow_array(); - $sth->finish(); - - if ( !$dbRef ) { - throw( - sprintf( "external_db [%s] release [%s] does not exist", - $exObj->dbname(), $exObj->release() ) ); - } - } else { - - my $sth = $self->prepare( " - SELECT external_db_id - FROM external_db - WHERE db_name = ? " ); - - $sth->bind_param( 1, $exObj->dbname(), SQL_VARCHAR ); - - $sth->execute(); - - ($dbRef) = $sth->fetchrow_array(); - - if ( !$dbRef ) { - throw( - sprintf( "external_db [%s] does not exist", $exObj->dbname() ) - ); - } - } - - # - # Check for the existance of the external reference, add it if not present - # - - my $sql = "SELECT xref_id FROM xref - WHERE external_db_id = ? - AND dbprimary_acc = ? - AND version = ?"; - - if(defined $exObj->info_type){ - $sql .= " AND info_type = ?"; - } - else{ - $sql .= " AND info_type is null"; - } - - if(defined $exObj->info_text){ - $sql .= " AND info_text = ?"; - } - else{ - $sql .= " AND info_text is null"; - } - - my $sth = $self->prepare($sql); - - $sth->bind_param(1,$dbRef,SQL_INTEGER); - $sth->bind_param(2,$exObj->primary_id,SQL_VARCHAR); - $sth->bind_param(3,$exObj->version,SQL_VARCHAR); - - my $i = 4; - if(defined $exObj->info_type){ - $sth->bind_param($i++,$exObj->info_type,SQL_VARCHAR); - } - if(defined $exObj->info_text){ - $sth->bind_param($i++,$exObj->info_text,SQL_VARCHAR); - } - $sth->execute(); - - my ($dbX) = $sth->fetchrow_array(); - - $sth->finish(); - if(!$dbX) { - if(!$exObj->primary_id()) { - throw("DBEntry cannot be stored without a primary_id attribute."); - } - - # - # store the new xref - # - $sth = $self->prepare( " - INSERT ignore INTO xref - SET dbprimary_acc = ?, - display_label = ?, - version = ?, - description = ?, - external_db_id = ?, - info_type = ?, - info_text = ?"); - $sth->bind_param(1, $exObj->primary_id,SQL_VARCHAR); - $sth->bind_param(2, $exObj->display_id,SQL_VARCHAR); - $sth->bind_param(3, $exObj->version,SQL_VARCHAR); - $sth->bind_param(4, $exObj->description,SQL_VARCHAR); - $sth->bind_param(5, $dbRef,SQL_INTEGER); - $sth->bind_param(6, $exObj->info_type, SQL_VARCHAR); - $sth->bind_param(7, $exObj->info_text, SQL_VARCHAR); - - $sth->execute(); - - $dbX = $sth->{'mysql_insertid'}; - $exObj->dbID($dbX); - $sth->finish(); - # - # store the synonyms for the new xref - # - my $synonym_check_sth = $self->prepare( - "SELECT xref_id, synonym - FROM external_synonym - WHERE xref_id = ? - AND synonym = ?"); - - my $synonym_store_sth = $self->prepare( - "INSERT ignore INTO external_synonym - SET xref_id = ?, synonym = ?"); - - my $synonyms = $exObj->get_all_synonyms(); - foreach my $syn ( @$synonyms ) { - $synonym_check_sth->bind_param(1,$dbX,SQL_INTEGER); - $synonym_check_sth->bind_param(2,$syn,SQL_VARCHAR); - $synonym_check_sth->execute(); - my ($dbSyn) = $synonym_check_sth->fetchrow_array(); - $synonym_store_sth->bind_param(1,$dbX,SQL_INTEGER); - $synonym_store_sth->bind_param(2,$syn,SQL_VARCHAR); - $synonym_store_sth->execute() if(!$dbSyn); + + + + # Ensure external_db contains a record of the intended xref source + my $dbRef; + $dbRef = $self->_check_external_db($dbEntry,$ignore_release); + + # Attempt to insert DBEntry + my $xref_id = $self->_store_or_fetch_xref($dbEntry,$dbRef); + $dbEntry->dbID($xref_id); #keeps DBEntry in sync with database + ### Attempt to create an object->xref mapping + if ($ensembl_id) {$self->_store_object_xref_mapping($ensembl_id,$dbEntry,$ensType)}; + + return $xref_id; +} + +sub _store_object_xref_mapping { + my $self = shift; + my $ensembl_id = shift; + my $dbEntry = shift; + my $ensembl_type = shift; + + if (not defined ($ensembl_type)) { warning("No Ensembl data type provided for new xref");} + + my $analysis_id; + if ( $dbEntry->analysis() ) { + $analysis_id = $self->db()->get_AnalysisAdaptor->store( $dbEntry->analysis() ); + } else { + $analysis_id = 0; ## This used to be undef, but uniqueness in mysql requires a value } - $synonym_check_sth->finish(); - $synonym_store_sth->finish(); - } elsif (! defined( $exObj->dbID() ) ) { - $exObj->dbID($dbX); - } - - if ( defined($ensembl_id) ) { - # - # check if the object mapping was already stored - # - $sth = $self->prepare( - qq( -SELECT xref_id -FROM object_xref -WHERE xref_id = ? - AND ensembl_object_type = ? - AND ensembl_id = ? - AND ( linkage_annotation = ? - OR linkage_annotation IS NULL )) ); - - $sth->bind_param( 1, $dbX, SQL_INTEGER ); - $sth->bind_param( 2, $ensType, SQL_VARCHAR ); - $sth->bind_param( 3, $ensembl_id, SQL_INTEGER ); - $sth->bind_param( 4, $exObj->linkage_annotation(), SQL_VARCHAR ); - + + my $sth = $self->prepare(qq( + INSERT IGNORE INTO object_xref + SET xref_id = ?, + ensembl_object_type = ?, + ensembl_id = ?, + linkage_annotation = ?, + analysis_id = ? ) + ); + $sth->bind_param( 1, $dbEntry->dbID(), SQL_INTEGER ); + $sth->bind_param( 2, $ensembl_type, SQL_VARCHAR ); + $sth->bind_param( 3, $ensembl_id, SQL_INTEGER ); + $sth->bind_param( 4, $dbEntry->linkage_annotation(),SQL_VARCHAR ); + $sth->bind_param( 5, $analysis_id, SQL_INTEGER ); $sth->execute(); - - my ($tst) = $sth->fetchrow_array(); - $sth->finish(); - - if ( !$tst ) { - # - # Store the reference to the internal ensembl object - # - my $analysis_id; - if ( $exObj->analysis() ) { - $analysis_id = - $self->db()->get_AnalysisAdaptor->store( $exObj->analysis() ); - } else { - $analysis_id = undef; - } - - $sth = $self->prepare( - qq( -INSERT INTO object_xref - SET xref_id = ?, - ensembl_object_type = ?, - ensembl_id = ?, - linkage_annotation = ?, - analysis_id = ? ) ); - - $sth->bind_param( 1, $dbX, SQL_INTEGER ); - $sth->bind_param( 2, $ensType, SQL_VARCHAR ); - $sth->bind_param( 3, $ensembl_id, SQL_INTEGER ); - $sth->bind_param( 4, $exObj->linkage_annotation(), SQL_VARCHAR ); - $sth->bind_param( 5, $analysis_id, SQL_INTEGER ); - - $sth->execute(); - $exObj->adaptor($self); - my $Xidt = $sth->{'mysql_insertid'}; - - # - # If this is an IdentityXref need to store in that table too - # If its OntologyXref add the linkage type to ontology_xref table - # - if ( $exObj->isa('Bio::EnsEMBL::IdentityXref') ) { + my $object_xref_id = $self->last_insert_id(); + + $dbEntry->adaptor($self); # hand Adaptor to dbEntry for future use with OntologyXrefs + + if ($object_xref_id) { + #no existing object_xref, therefore + if ( $dbEntry->isa('Bio::EnsEMBL::IdentityXref') ) { $sth = $self->prepare( " INSERT ignore INTO identity_xref SET object_xref_id = ?, @@ -911,43 +747,170 @@ INSERT INTO object_xref cigar_line = ?, score = ?, evalue = ?" ); - $sth->bind_param( 1, $Xidt, SQL_INTEGER ); - $sth->bind_param( 2, $exObj->xref_identity, SQL_INTEGER ); - $sth->bind_param( 3, $exObj->ensembl_identity, SQL_INTEGER ); - $sth->bind_param( 4, $exObj->xref_start, SQL_INTEGER ); - $sth->bind_param( 5, $exObj->xref_end, SQL_INTEGER ); - $sth->bind_param( 6, $exObj->ensembl_start, SQL_INTEGER ); - $sth->bind_param( 7, $exObj->ensembl_end, SQL_INTEGER ); - $sth->bind_param( 8, $exObj->cigar_line, SQL_LONGVARCHAR ); - $sth->bind_param( 9, $exObj->score, SQL_DOUBLE ); - $sth->bind_param( 10, $exObj->evalue, SQL_DOUBLE ); + $sth->bind_param( 1, $object_xref_id, SQL_INTEGER ); + $sth->bind_param( 2, $dbEntry->xref_identity, SQL_INTEGER ); + $sth->bind_param( 3, $dbEntry->ensembl_identity, SQL_INTEGER ); + $sth->bind_param( 4, $dbEntry->xref_start, SQL_INTEGER ); + $sth->bind_param( 5, $dbEntry->xref_end, SQL_INTEGER ); + $sth->bind_param( 6, $dbEntry->ensembl_start, SQL_INTEGER ); + $sth->bind_param( 7, $dbEntry->ensembl_end, SQL_INTEGER ); + $sth->bind_param( 8, $dbEntry->cigar_line, SQL_LONGVARCHAR ); + $sth->bind_param( 9, $dbEntry->score, SQL_DOUBLE ); + $sth->bind_param( 10, $dbEntry->evalue, SQL_DOUBLE ); $sth->execute(); - } elsif ( $exObj->isa('Bio::EnsEMBL::OntologyXref') ) { + } elsif ( $dbEntry->isa('Bio::EnsEMBL::OntologyXref') ) { $sth = $self->prepare( " INSERT ignore INTO ontology_xref SET object_xref_id = ?, source_xref_id = ?, linkage_type = ? " ); - foreach my $info ( @{ $exObj->get_all_linkage_info() } ) { - my ( $lt, $sourceXref ) = @{$info}; - my $sourceXid = undef; - if ($sourceXref) { - $sourceXref->is_stored( $self->dbc ) - || $self->store($sourceXref); - $sourceXid = $sourceXref->dbID; - } - $sth->bind_param( 1, $Xidt, SQL_INTEGER ); - $sth->bind_param( 2, $sourceXid, SQL_INTEGER ); - $sth->bind_param( 3, $lt, SQL_VARCHAR ); - $sth->execute(); - } - } - } ## end if ( !$tst ) - } ## end if ( defined($ensembl_id...)) + foreach my $info ( @{ $dbEntry->get_all_linkage_info() } ) { + my ( $linkage_type, $sourceXref ) = @{$info}; + my $sourceXid = undef; + if ($sourceXref) { + $sourceXref->is_stored( $self->dbc ) || $self->store($sourceXref); + $sourceXid = $sourceXref->dbID; + } + $sth->bind_param( 1, $object_xref_id, SQL_INTEGER ); + $sth->bind_param( 2, $sourceXid, SQL_INTEGER ); + $sth->bind_param( 3, $linkage_type, SQL_VARCHAR ); + $sth->execute(); + } #end foreach + } #end elsif + } # end if ($object_xref_id) + return $object_xref_id; +} + +=head2 _check_external_db + + Arg [1] : DBEntry object + Arg [2] : Ignore version flag + Description: Looks for a record of the given external database + Exceptions : Throws on missing external database entry + Returntype : Int - return $dbX; +=cut + +sub _check_external_db { + my ($self,$db_entry,$ignore) = @_; + my ($sql,@bound_params,$sql_helper,$db_name,$db_release); + + $db_name = $db_entry->dbname(); + $db_release = $db_entry->release(); + $sql_helper = $self->dbc->sql_helper; + + $sql = 'SELECT external_db_id FROM external_db WHERE db_name = ?'; + push @bound_params,$db_name; + unless ($ignore) { + if ($db_release) { + $sql .= ' AND db_release = ?'; + push @bound_params,$db_release; + } else { + $sql .= ' AND db_release is NULL'; + } + } + + my ($db_id) = @{ $sql_helper->execute_simple(-SQL => $sql, -PARAMS => \@bound_params) }; + + if ($db_id) { + return $db_id; + } + else { + throw( sprintf( "external_db [%s] release [%s] does not exist", + $db_name, $db_release) + ); + } } +=head2 _store_or_fetch_xref + + Arg [1] : DBEntry object + Arg [2] : Database accession for external database + Description: Thread-safe method for adding xrefs, or otherwise returning + an xref ID for the inserted or retrieved xref. Also inserts + synonyms for that xref when entire new + Returns : Int - the DB ID of the xref after insertion +=cut +sub _store_or_fetch_xref { + my $self = shift; + my $dbEntry = shift; + my $dbRef = shift; + my $xref_id; + + my $sth = $self->prepare( " + INSERT IGNORE INTO xref + SET dbprimary_acc = ?, + display_label = ?, + version = ?, + description = ?, + external_db_id = ?, + info_type = ?, + info_text = ?"); + $sth->bind_param(1, $dbEntry->primary_id,SQL_VARCHAR); + $sth->bind_param(2, $dbEntry->display_id,SQL_VARCHAR); + $sth->bind_param(3, $dbEntry->version,SQL_VARCHAR); + $sth->bind_param(4, $dbEntry->description,SQL_VARCHAR); + $sth->bind_param(5, $dbRef,SQL_INTEGER); + $sth->bind_param(6, ($dbEntry->info_type || 'NONE'), SQL_VARCHAR); + $sth->bind_param(7, ($dbEntry->info_text || ''), SQL_VARCHAR); + + $sth->execute(); + $xref_id = $self->last_insert_id('xref_id',undef,'xref'); + $sth->finish(); + + if ($xref_id) { #insert was successful, store supplementary synonyms + # thread safety no longer an issue. + my $synonym_check_sth = $self->prepare( + "SELECT xref_id, synonym + FROM external_synonym + WHERE xref_id = ? + AND synonym = ?"); + + my $synonym_store_sth = $self->prepare( + "INSERT ignore INTO external_synonym + SET xref_id = ?, synonym = ?"); + + my $synonyms = $dbEntry->get_all_synonyms(); + foreach my $syn ( @$synonyms ) { + $synonym_check_sth->bind_param(1,$xref_id,SQL_INTEGER); + $synonym_check_sth->bind_param(2,$syn,SQL_VARCHAR); + $synonym_check_sth->execute(); + my ($dbSyn) = $synonym_check_sth->fetchrow_array(); + $synonym_store_sth->bind_param(1,$xref_id,SQL_INTEGER); + $synonym_store_sth->bind_param(2,$syn,SQL_VARCHAR); + $synonym_store_sth->execute() if(!$dbSyn); + } + $synonym_check_sth->finish(); + $synonym_store_sth->finish(); + + } else { # xref_id already exists, retrieve it + my $sql = 'SELECT xref_id FROM xref + WHERE + dbprimary_acc = ? + AND display_label = ? + AND version = ? + AND external_db_id = ? + AND info_type = ? + AND info_text = ? + AND description'; + if ($dbEntry->description) {$sql .= ' = ?'} + else {$sql .= ' is NULL'} + + $sth = $self->prepare( $sql ); + $sth->bind_param(1, $dbEntry->primary_id,SQL_VARCHAR); + $sth->bind_param(2, $dbEntry->display_id,SQL_VARCHAR); + $sth->bind_param(3, $dbEntry->version,SQL_VARCHAR); + $sth->bind_param(4, $dbRef,SQL_INTEGER); + $sth->bind_param(5, ($dbEntry->info_type || 'NONE'), SQL_VARCHAR); + $sth->bind_param(6, ($dbEntry->info_text || ''), SQL_VARCHAR); + if ($dbEntry->description) {$sth->bind_param(7, $dbEntry->description,SQL_VARCHAR);} + $sth->execute(); + ($xref_id) = $sth->fetchrow_array(); + $sth->finish; + } + + return $xref_id; +} =head2 exists diff --git a/modules/t/test-genome-DBs/homo_sapiens/core/xref.sql b/modules/t/test-genome-DBs/homo_sapiens/core/xref.sql index 0b6d717a32..131bd3830c 100644 --- a/modules/t/test-genome-DBs/homo_sapiens/core/xref.sql +++ b/modules/t/test-genome-DBs/homo_sapiens/core/xref.sql @@ -7,7 +7,6 @@ CREATE TABLE `xref` ( `description` text collate latin1_bin, `info_type` enum('NONE','PROJECTION','MISC','DEPENDENT','DIRECT','SEQUENCE_MATCH','INFERRED_PAIR','PROBE','UNMAPPED','CHECKSUM') collate latin1_bin NOT NULL default 'NONE', `info_text` varchar(255) collate latin1_bin NOT NULL default '', - `priority` int(11) default NULL, PRIMARY KEY (`xref_id`), UNIQUE KEY `id_index` (`dbprimary_acc`,`external_db_id`,`info_type`,`info_text`), KEY `display_index` (`display_label`) diff --git a/modules/t/test-genome-DBs/homo_sapiens/core/xref.txt b/modules/t/test-genome-DBs/homo_sapiens/core/xref.txt index d0ed823482..aef5bd6065 100644 --- a/modules/t/test-genome-DBs/homo_sapiens/core/xref.txt +++ b/modules/t/test-genome-DBs/homo_sapiens/core/xref.txt @@ -1,185 +1,185 @@ -614 6 GO:0006464 GO:0006464 1 description \D \D 1 -6990 6 GO:0004835 GO:0004835 1 description \D \D 1 -102409 2 AL031658 AL031658 1 description \D \D 1 -152200 7 Q9BR19 Q9BR19 1 description \D \D 1 -152201 3 CAC33959 CAC33959 1 description \D \D 1 -152202 5 16118 C20orf125 1 description \D \D 1 -152191 7 Q9BR18 Q9BR18 1 description \D \D 1 -152192 3 CAC33960 CAC33960 1 description \D \D 1 -40088 9 NM_030815 NM_030815 1 description \D \D 1 -40089 10 81572 81572 1 description \D \D 1 -40090 5 16119 C20orf126 1 description \D \D 1 -102414 7 Q9NUG6 Q9NUG6 1 description \D \D 1 -102415 3 CAB88101 CAB88101 1 description \D \D 1 -102411 7 Q9NUG5 Q9NUG5 1 description \D \D 1 -102412 3 CAB88102 CAB88102 1 description \D \D 1 -102413 5 16152 C20orf159 1 description \D \D 1 -34984 9 NM_080625 NM_080625 1 description \D \D 1 -34985 10 140706 140706 1 description \D \D 1 -34986 5 16153 C20orf160 1 description \D \D 1 -51533 7 Q96MD5 Q96MD5 1 description \D \D 1 -51534 2 AK057090 AK057090 1 description \D \D 1 -51535 3 BAB71363 BAB71363 1 description \D \D 1 -994 6 GO:0007242 GO:0007242 1 description \D \D 1 -102408 7 Q9NUG4 Q9NUG4 1 description \D \D 1 -102410 3 CAB88103 CAB88103 1 description \D \D 1 -257 6 GO:0004674 GO:0004674 1 description \D \D 1 -258 6 GO:0004713 GO:0004713 1 description \D \D 1 -259 6 GO:0005524 GO:0005524 1 description \D \D 1 -260 6 GO:0006468 GO:0006468 1 description \D \D 1 -612 6 GO:0016740 GO:0016740 1 description \D \D 1 -5371 6 GO:0007498 GO:0007498 1 description \D \D 1 -120579 2 AL049539 AL049539 1 description \D \D 1 -158504 1 P08631 HCK_HUMAN 1 description \D \D 1 -158505 2 M16591 M16591 1 description \D \D 1 -158506 3 AAA52643 AAA52643 1 description \D \D 1 -158507 2 M16592 M16592 1 description \D \D 1 -158508 3 AAA52644 AAA52644 1 description \D \D 1 -158509 2 BC014435 BC014435 1 description \D \D 1 -158510 3 AAH14435 AAH14435 1 description \D \D 1 -158511 2 AK026432 AK026432 1 description \D \D 1 -158512 3 BAB15482 BAB15482 1 description \D \D 1 -158513 3 CAB75606 CAB75606 1 description \D \D 1 -158514 2 X58741 X58741 1 description \D \D 1 -158515 3 CAA41565 CAA41565 1 description \D \D 1 -158516 2 X58742 X58742 1 description \D \D 1 -158517 2 X58743 X58743 1 description \D \D 1 -158518 8 2HCK 2HCK 1 description \D \D 1 -158519 8 3HCK 3HCK 1 description \D \D 1 -158520 8 4HCK 4HCK 1 description \D \D 1 -158521 8 5HCK 5HCK 1 description \D \D 1 -158522 8 1AD5 1AD5 1 description \D \D 1 -158523 8 1BU1 1BU1 1 description \D \D 1 -158524 4 142370 142370 1 description \D \D 1 -158525 5 4840 HCK 1 description \D \D 1 -166799 9 NM_002110 NM_002110 1 description \D \D 1 -166800 10 3055 3055 1 description \D \D 1 -438 6 GO:0016021 GO:0016021 1 description \D \D 1 -793 6 GO:0006810 GO:0006810 1 description \D \D 1 -1166 6 GO:0005215 GO:0005215 1 description \D \D 1 -37039 9 NM_014742 NM_014742 1 description \D \D 1 -37040 10 9777 9777 1 description \D \D 1 -120576 1 Q92544 T9S4_HUMAN 1 description \D \D 1 -120577 2 D87444 D87444 1 description \D \D 1 -120578 3 BAA13385 BAA13385 1 description \D \D 1 -120580 3 CAB75607 CAB75607 1 description \D \D 1 -120581 2 BC021107 BC021107 1 description \D \D 1 -120582 3 AAH21107 AAH21107 1 description \D \D 1 -120583 2 BC022850 BC022850 1 description \D \D 1 -120584 3 AAH22850 AAH22850 1 description \D \D 1 -143 6 GO:0005634 GO:0005634 1 description \D \D 1 -315 6 GO:0003677 GO:0003677 1 description \D \D 1 -3908 6 GO:0006334 GO:0006334 1 description \D \D 1 -28816 2 AL121897 AL121897 1 description \D \D 1 -128605 7 Q9H489 Q9H489 1 description \D \D 1 -128606 3 CAC16422 CAC16422 1 description \D \D 1 -128607 5 16256 TSPYL3 1 description \D \D 1 -139 6 GO:0003700 GO:0003700 1 description \D \D 1 -141 6 GO:0006355 GO:0006355 1 description \D \D 1 -17031 9 NM_002657 NM_002657 1 description \D \D 1 -17032 4 604866 604866 1 description \D \D 1 -17033 10 5326 5326 1 description \D \D 1 -17034 5 9047 PLAGL2 1 description \D \D 1 -58594 1 Q9UPG8 PAL2_HUMAN 1 description \D \D 1 -58595 2 AF006005 AF006005 1 description \D \D 1 -58596 3 AAC34252 AAC34252 1 description \D \D 1 -58597 2 D83784 D83784 1 description \D \D 1 -58598 3 BAA12113 BAA12113 1 description \D \D 1 -58599 3 CAC16423 CAC16423 1 description \D \D 1 -168444 7 Q9BW76 Q9BW76 1 description \D \D 1 -168445 2 BC000582 BC000582 1 description \D \D 1 -168446 3 AAH00582 AAH00582 1 description \D \D 1 -673 6 GO:0006493 GO:0006493 1 description \D \D 1 -1284 6 GO:0016932 GO:0016932 1 description \D \D 1 -19243 6 GO:0030173 GO:0030173 1 description \D \D 1 -54393 6 GO:0008417 GO:0008417 1 description \D \D 1 -128347 6 GO:0006004 GO:0006004 1 description \D \D 1 -128591 7 Q9H488 Q9H488 1 description \D \D 1 -128592 2 AF375884 AF375884 1 description \D \D 1 -128593 3 AAL09576 AAL09576 1 description \D \D 1 -128594 3 CAC16424 CAC16424 1 description \D \D 1 -128595 2 D80002 D80002 1 description \D \D 1 -128596 3 BAA11497 BAA11497 1 description \D \D 1 -128597 5 14988 POFUT1 1 description \D \D 1 -20 6 GO:0005875 GO:0005875 1 description \D \D 1 -891 6 GO:0007017 GO:0007017 1 description \D \D 1 -1444 6 GO:0003777 GO:0003777 1 description \D \D 1 -28813 1 O15066 KF3B_HUMAN 1 description \D \D 1 -28814 2 AB002357 AB002357 1 description \D \D 1 -28815 3 BAA20815 BAA20815 1 description \D \D 1 -28817 3 CAC16425 CAC16425 1 description \D \D 1 -28818 4 603754 603754 1 description \D \D 1 -28819 5 6320 KIF3B 1 description \D \D 1 -28820 6 GO:0008574 GO:0008574 1 description \D \D 1 -28821 6 GO:0007368 GO:0007368 1 description \D \D 1 -28822 6 GO:0008089 GO:0008089 1 description \D \D 1 -28823 6 GO:0005873 GO:0005873 1 description \D \D 1 -38683 9 NM_004798 NM_004798 1 description \D \D 1 -38684 10 9371 9371 1 description \D \D 1 -29946 7 Q9UFP8 Q9UFP8 1 description \D \D 1 -29947 2 AL117518 AL117518 1 description \D \D 1 -29948 3 CAB55975 CAB55975 1 description \D \D 1 -128324 7 Q9H466 Q9H466 1 description \D \D 1 -128325 2 AL121583 AL121583 1 description \D \D 1 -128326 3 CAC00581 CAC00581 1 description \D \D 1 -34890 9 NM_080616 NM_080616 1 description \D \D 1 -34891 10 140688 140688 1 description \D \D 1 -34892 5 16106 C20orf112 1 description \D \D 1 -53919 7 Q96MY1 Q96MY1 1 description \D \D 1 -53920 2 AK056286 AK056286 1 description \D \D 1 -53921 3 BAB71138 BAB71138 1 description \D \D 1 -89324 2 AL034550 AL034550 1 description \D \D 1 -152329 7 Q9BR34 Q9BR34 1 description \D \D 1 -152330 3 CAC33994 CAC33994 1 description \D \D 1 -89326 7 Q9NQF6 Q9NQF6 1 description \D \D 1 -89327 3 CAC00609 CAC00609 1 description \D \D 1 -89323 7 Q9NQF5 Q9NQF5 1 description \D \D 1 -89325 3 CAC00610 CAC00610 1 description \D \D 1 -332 7 Q9BXE6 Q9BXE6 1 description \D \D 1 -333 2 AF336876 AF336876 1 description \D \D 1 -334 3 AAK21302 AAK21302 1 description \D \D 1 -168321 7 Q9BW53 Q9BW53 1 description \D \D 1 -168322 2 BC000628 BC000628 1 description \D \D 1 -168323 3 AAH00628 AAH00628 1 description \D \D 1 -168324 5 16223 C20orf92 1 description \D \D 1 -1043 6 GO:0008372 GO:0008372 1 description \D \D 1 -11120 6 GO:0006306 GO:0006306 1 description \D \D 1 -15196 6 GO:0003886 GO:0003886 1 description \D \D 1 -15947 1 Q9UBC3 DM3B_HUMAN 1 description \D \D 1 -15948 2 AF156487 AF156487 1 description DEPENDENT test dependent 1 -15949 3 AAD53062 AAD53062 1 description DEPENDENT test dependent 2 1 -15950 2 AF156488 AF156488 1 description SEQUENCE_MATCH test seq match 1 -15951 3 AAD53063 AAD53063 1 description INFERRED_PAIR test infereed pair 1 -15952 2 AF176228 AF176228 1 description \D \D 1 -15953 3 AAF04015 AAF04015 1 description \D \D 1 -15954 2 AL035071 AL035071 1 description \D \D 1 -15955 3 CAB53069 CAB53069 1 description \D \D 1 -15956 3 CAB53070 CAB53070 1 description \D \D 1 -15957 3 CAB53071 CAB53071 1 description \D \D 1 -15958 2 AF129267 AF129267 1 description \D \D 1 -15959 3 AAD31432 AAD31432 1 description \D \D 1 -15960 2 AF129268 AF129268 1 description \D \D 1 -15961 3 AAD31433 AAD31433 1 description \D \D 1 -15962 2 AF129269 AF129269 1 description \D \D 1 -15963 3 AAD31434 AAD31434 1 description \D \D 1 -15964 4 242860 242860 1 description \D \D 1 -15965 4 602900 602900 1 description \D \D 1 -15966 5 2979 DNMT3B 1 description \D \D 1 -15967 6 GO:0008326 GO:0008326 1 description \D \D 1 -72100 9 NM_006892 NM_006892 1 description \D \D 1 -72101 10 1789 1789 1 description \D \D 1 -50 6 GO:0000074 GO:0000074 1 description \D \D 1 -51 6 GO:0007048 GO:0007048 1 description \D \D 1 -269 6 GO:0008283 GO:0008283 1 description \D \D 1 -2617 6 GO:0008017 GO:0008017 1 description \D \D 1 -18952 6 GO:0008022 GO:0008022 1 description \D \D 1 -97756 9 NM_012325 NM_012325 1 description \D \D 1 -97757 4 603108 603108 1 description \D \D 1 -97758 10 22919 22919 1 description \D \D 1 -97759 5 6890 MAPRE1 1 description \D \D 1 -103295 1 Q15691 MAE1_HUMAN 1 description \D \D 1 -103296 2 U24166 U24166 1 description \D \D 1 -103297 3 AAC09471 AAC09471 1 description \D \D 1 -103298 3 CAB53072 CAB53072 1 description \D \D 1 -999998 11 IPR000001 IPR000001 1 Test interpro desc1 \D \D 1 -999999 11 IPR000010 IPR000010 1 Test interpro desc2 \D \D 1 +614 6 GO:0006464 GO:0006464 1 description \D \D +6990 6 GO:0004835 GO:0004835 1 description \D \D +102409 2 AL031658 AL031658 1 description \D \D +152200 7 Q9BR19 Q9BR19 1 description \D \D +152201 3 CAC33959 CAC33959 1 description \D \D +152202 5 16118 C20orf125 1 description \D \D +152191 7 Q9BR18 Q9BR18 1 description \D \D +152192 3 CAC33960 CAC33960 1 description \D \D +40088 9 NM_030815 NM_030815 1 description \D \D +40089 10 81572 81572 1 description \D \D +40090 5 16119 C20orf126 1 description \D \D +102414 7 Q9NUG6 Q9NUG6 1 description \D \D +102415 3 CAB88101 CAB88101 1 description \D \D +102411 7 Q9NUG5 Q9NUG5 1 description \D \D +102412 3 CAB88102 CAB88102 1 description \D \D +102413 5 16152 C20orf159 1 description \D \D +34984 9 NM_080625 NM_080625 1 description \D \D +34985 10 140706 140706 1 description \D \D +34986 5 16153 C20orf160 1 description \D \D +51533 7 Q96MD5 Q96MD5 1 description \D \D +51534 2 AK057090 AK057090 1 description \D \D +51535 3 BAB71363 BAB71363 1 description \D \D +994 6 GO:0007242 GO:0007242 1 description \D \D +102408 7 Q9NUG4 Q9NUG4 1 description \D \D +102410 3 CAB88103 CAB88103 1 description \D \D +257 6 GO:0004674 GO:0004674 1 description \D \D +258 6 GO:0004713 GO:0004713 1 description \D \D +259 6 GO:0005524 GO:0005524 1 description \D \D +260 6 GO:0006468 GO:0006468 1 description \D \D +612 6 GO:0016740 GO:0016740 1 description \D \D +5371 6 GO:0007498 GO:0007498 1 description \D \D +120579 2 AL049539 AL049539 1 description \D \D +158504 1 P08631 HCK_HUMAN 1 description \D \D +158505 2 M16591 M16591 1 description \D \D +158506 3 AAA52643 AAA52643 1 description \D \D +158507 2 M16592 M16592 1 description \D \D +158508 3 AAA52644 AAA52644 1 description \D \D +158509 2 BC014435 BC014435 1 description \D \D +158510 3 AAH14435 AAH14435 1 description \D \D +158511 2 AK026432 AK026432 1 description \D \D +158512 3 BAB15482 BAB15482 1 description \D \D +158513 3 CAB75606 CAB75606 1 description \D \D +158514 2 X58741 X58741 1 description \D \D +158515 3 CAA41565 CAA41565 1 description \D \D +158516 2 X58742 X58742 1 description \D \D +158517 2 X58743 X58743 1 description \D \D +158518 8 2HCK 2HCK 1 description \D \D +158519 8 3HCK 3HCK 1 description \D \D +158520 8 4HCK 4HCK 1 description \D \D +158521 8 5HCK 5HCK 1 description \D \D +158522 8 1AD5 1AD5 1 description \D \D +158523 8 1BU1 1BU1 1 description \D \D +158524 4 142370 142370 1 description \D \D +158525 5 4840 HCK 1 description \D \D +166799 9 NM_002110 NM_002110 1 description \D \D +166800 10 3055 3055 1 description \D \D +438 6 GO:0016021 GO:0016021 1 description \D \D +793 6 GO:0006810 GO:0006810 1 description \D \D +1166 6 GO:0005215 GO:0005215 1 description \D \D +37039 9 NM_014742 NM_014742 1 description \D \D +37040 10 9777 9777 1 description \D \D +120576 1 Q92544 T9S4_HUMAN 1 description \D \D +120577 2 D87444 D87444 1 description \D \D +120578 3 BAA13385 BAA13385 1 description \D \D +120580 3 CAB75607 CAB75607 1 description \D \D +120581 2 BC021107 BC021107 1 description \D \D +120582 3 AAH21107 AAH21107 1 description \D \D +120583 2 BC022850 BC022850 1 description \D \D +120584 3 AAH22850 AAH22850 1 description \D \D +143 6 GO:0005634 GO:0005634 1 description \D \D +315 6 GO:0003677 GO:0003677 1 description \D \D +3908 6 GO:0006334 GO:0006334 1 description \D \D +28816 2 AL121897 AL121897 1 description \D \D +128605 7 Q9H489 Q9H489 1 description \D \D +128606 3 CAC16422 CAC16422 1 description \D \D +128607 5 16256 TSPYL3 1 description \D \D +139 6 GO:0003700 GO:0003700 1 description \D \D +141 6 GO:0006355 GO:0006355 1 description \D \D +17031 9 NM_002657 NM_002657 1 description \D \D +17032 4 604866 604866 1 description \D \D +17033 10 5326 5326 1 description \D \D +17034 5 9047 PLAGL2 1 description \D \D +58594 1 Q9UPG8 PAL2_HUMAN 1 description \D \D +58595 2 AF006005 AF006005 1 description \D \D +58596 3 AAC34252 AAC34252 1 description \D \D +58597 2 D83784 D83784 1 description \D \D +58598 3 BAA12113 BAA12113 1 description \D \D +58599 3 CAC16423 CAC16423 1 description \D \D +168444 7 Q9BW76 Q9BW76 1 description \D \D +168445 2 BC000582 BC000582 1 description \D \D +168446 3 AAH00582 AAH00582 1 description \D \D +673 6 GO:0006493 GO:0006493 1 description \D \D +1284 6 GO:0016932 GO:0016932 1 description \D \D +19243 6 GO:0030173 GO:0030173 1 description \D \D +54393 6 GO:0008417 GO:0008417 1 description \D \D +128347 6 GO:0006004 GO:0006004 1 description \D \D +128591 7 Q9H488 Q9H488 1 description \D \D +128592 2 AF375884 AF375884 1 description \D \D +128593 3 AAL09576 AAL09576 1 description \D \D +128594 3 CAC16424 CAC16424 1 description \D \D +128595 2 D80002 D80002 1 description \D \D +128596 3 BAA11497 BAA11497 1 description \D \D +128597 5 14988 POFUT1 1 description \D \D +20 6 GO:0005875 GO:0005875 1 description \D \D +891 6 GO:0007017 GO:0007017 1 description \D \D +1444 6 GO:0003777 GO:0003777 1 description \D \D +28813 1 O15066 KF3B_HUMAN 1 description \D \D +28814 2 AB002357 AB002357 1 description \D \D +28815 3 BAA20815 BAA20815 1 description \D \D +28817 3 CAC16425 CAC16425 1 description \D \D +28818 4 603754 603754 1 description \D \D +28819 5 6320 KIF3B 1 description \D \D +28820 6 GO:0008574 GO:0008574 1 description \D \D +28821 6 GO:0007368 GO:0007368 1 description \D \D +28822 6 GO:0008089 GO:0008089 1 description \D \D +28823 6 GO:0005873 GO:0005873 1 description \D \D +38683 9 NM_004798 NM_004798 1 description \D \D +38684 10 9371 9371 1 description \D \D +29946 7 Q9UFP8 Q9UFP8 1 description \D \D +29947 2 AL117518 AL117518 1 description \D \D +29948 3 CAB55975 CAB55975 1 description \D \D +128324 7 Q9H466 Q9H466 1 description \D \D +128325 2 AL121583 AL121583 1 description \D \D +128326 3 CAC00581 CAC00581 1 description \D \D +34890 9 NM_080616 NM_080616 1 description \D \D +34891 10 140688 140688 1 description \D \D +34892 5 16106 C20orf112 1 description \D \D +53919 7 Q96MY1 Q96MY1 1 description \D \D +53920 2 AK056286 AK056286 1 description \D \D +53921 3 BAB71138 BAB71138 1 description \D \D +89324 2 AL034550 AL034550 1 description \D \D +152329 7 Q9BR34 Q9BR34 1 description \D \D +152330 3 CAC33994 CAC33994 1 description \D \D +89326 7 Q9NQF6 Q9NQF6 1 description \D \D +89327 3 CAC00609 CAC00609 1 description \D \D +89323 7 Q9NQF5 Q9NQF5 1 description \D \D +89325 3 CAC00610 CAC00610 1 description \D \D +332 7 Q9BXE6 Q9BXE6 1 description \D \D +333 2 AF336876 AF336876 1 description \D \D +334 3 AAK21302 AAK21302 1 description \D \D +168321 7 Q9BW53 Q9BW53 1 description \D \D +168322 2 BC000628 BC000628 1 description \D \D +168323 3 AAH00628 AAH00628 1 description \D \D +168324 5 16223 C20orf92 1 description \D \D +1043 6 GO:0008372 GO:0008372 1 description \D \D +11120 6 GO:0006306 GO:0006306 1 description \D \D +15196 6 GO:0003886 GO:0003886 1 description \D \D +15947 1 Q9UBC3 DM3B_HUMAN 1 description \D \D +15948 2 AF156487 AF156487 1 description DEPENDENT test dependent +15949 3 AAD53062 AAD53062 1 description DEPENDENT test dependent 2 +15950 2 AF156488 AF156488 1 description SEQUENCE_MATCH test seq match +15951 3 AAD53063 AAD53063 1 description INFERRED_PAIR test inferred pair +15952 2 AF176228 AF176228 1 description \D \D +15953 3 AAF04015 AAF04015 1 description \D \D +15954 2 AL035071 AL035071 1 description \D \D +15955 3 CAB53069 CAB53069 1 description \D \D +15956 3 CAB53070 CAB53070 1 description \D \D +15957 3 CAB53071 CAB53071 1 description \D \D +15958 2 AF129267 AF129267 1 description \D \D +15959 3 AAD31432 AAD31432 1 description \D \D +15960 2 AF129268 AF129268 1 description \D \D +15961 3 AAD31433 AAD31433 1 description \D \D +15962 2 AF129269 AF129269 1 description \D \D +15963 3 AAD31434 AAD31434 1 description \D \D +15964 4 242860 242860 1 description \D \D +15965 4 602900 602900 1 description \D \D +15966 5 2979 DNMT3B 1 description \D \D +15967 6 GO:0008326 GO:0008326 1 description \D \D +72100 9 NM_006892 NM_006892 1 description \D \D +72101 10 1789 1789 1 description \D \D +50 6 GO:0000074 GO:0000074 1 description \D \D +51 6 GO:0007048 GO:0007048 1 description \D \D +269 6 GO:0008283 GO:0008283 1 description \D \D +2617 6 GO:0008017 GO:0008017 1 description \D \D +18952 6 GO:0008022 GO:0008022 1 description \D \D +97756 9 NM_012325 NM_012325 1 description \D \D +97757 4 603108 603108 1 description \D \D +97758 10 22919 22919 1 description \D \D +97759 5 6890 MAPRE1 1 description \D \D +103295 1 Q15691 MAE1_HUMAN 1 description \D \D +103296 2 U24166 U24166 1 description \D \D +103297 3 AAC09471 AAC09471 1 description \D \D +103298 3 CAB53072 CAB53072 1 description \D \D +999998 11 IPR000001 IPR000001 1 Test interpro desc1 \D \D +999999 11 IPR000010 IPR000010 1 Test interpro desc2 \D \D diff --git a/sql/patch_67_68_b.sql b/sql/patch_67_68_b.sql new file mode 100644 index 0000000000..b0853eb844 --- /dev/null +++ b/sql/patch_67_68_b.sql @@ -0,0 +1,46 @@ +# patch_67_68_b.sql +# +# Title: +# +# Description: +# Remove null values from xref and object_xref tables. See also DBEntryAdaptor thread safety changes + + +ALTER TABLE xref MODIFY info_type enum('NONE','PROJECTION','MISC','DEPENDENT','DIRECT','SEQUENCE_MATCH','INFERRED_PAIR','PROBE','UNMAPPED','COORDINATE_OVERLAP','CHECKSUM') DEFAULT 'NONE'; + +UPDATE xref SET info_type='NONE' WHERE info_type is NULL; + +ALTER TABLE xref MODIFY info_type enum('NONE','PROJECTION','MISC','DEPENDENT','DIRECT','SEQUENCE_MATCH','INFERRED_PAIR','PROBE','UNMAPPED','COORDINATE_OVERLAP','CHECKSUM') DEFAULT 'NONE' NOT NULL; + +UPDATE xref SET info_text='' WHERE info_text is NULL; +ALTER TABLE xref MODIFY info_text varchar(255) DEFAULT '' NOT NULL; + +# Remove duplicate nulls in object_xref table + +create temporary table object_xref_dups +select `ensembl_id`, `ensembl_object_type`, `xref_id`, `linkage_annotation`, `analysis_id`, min(object_xref_id) as object_xref_id, count(*) as c +from object_xref +group by `ensembl_id`, `ensembl_object_type`, `xref_id`, `linkage_annotation`, `analysis_id` +having c > 1; + +create temporary table object_xref_MFD +select ox.object_xref_id +from object_xref ox join `object_xref_dups` oxd on ( + ox.`ensembl_id` = oxd.`ensembl_id` + and ox.ensembl_object_type = oxd.ensembl_object_type + and ox.xref_id = oxd.xref_id + and (ox.linkage_annotation = oxd.linkage_annotation || (ox.`linkage_annotation` IS NULL and oxd.`linkage_annotation` IS NULL)) + and (ox.analysis_id = oxd.analysis_id || (ox.`analysis_id` IS NULL and oxd.`analysis_id` IS NULL)) + and oxd.`object_xref_id` <> ox.`object_xref_id` +); +ALTER TABLE object_xref_MFD ADD INDEX dribbling_simpleton(object_xref_id); + +-- DELETE FROM object_xref WHERE object_xref_id = ANY (SELECT object_xref_id FROM object_xref_MFD); +DELETE FROM object_xref USING object_xref JOIN object_xref_MFD WHERE object_xref.object_xref_id = object_xref_MFD.object_xref_id; + +UPDATE object_xref SET analysis_id = 0 WHERE analysis_id is NULL; +ALTER TABLE object_xref MODIFY analysis_id smallint(5) unsigned DEFAULT 0 NOT NULL; + +# Patch identifier +INSERT INTO meta (species_id, meta_key, meta_value) + VALUES (NULL, 'patch', 'patch_67_68_b.sql|xref_uniqueness'); -- GitLab