Commit e3b39cb6 authored by Andy Yates's avatar Andy Yates
Browse files

Additional patch to bring in version into the unique key for xref. Also apply...

Additional patch to bring in version into the unique key for xref. Also apply changes to gene and transcript
parent 8ad112e4
......@@ -884,17 +884,20 @@ sub _store_or_fetch_xref {
$synonym_check_sth->finish();
$synonym_store_sth->finish();
} else { # xref_id already exists, retrieve it
} else { # xref_id already exists, retrieve it according to fields in the unique key
my $sql = 'SELECT xref_id FROM xref
WHERE dbprimary_acc = ?
AND version =?
AND external_db_id = ?
AND info_type = ?
AND info_text = ?';
my $info_type = $dbEntry->info_type() || 'NONE';
my $info_text = $dbEntry->info_text() || q{};
my $version = $dbEntry->version() || q{0};
$sth = $self->prepare( $sql );
$sth->bind_param(1, $dbEntry->primary_id,SQL_VARCHAR);
$sth->bind_param(2, $version, SQL_VARCHAR);
$sth->bind_param(2, $dbRef, SQL_INTEGER);
$sth->bind_param(3, $info_type, SQL_VARCHAR);
$sth->bind_param(4, $info_text, SQL_VARCHAR);
......@@ -904,7 +907,7 @@ sub _store_or_fetch_xref {
if(!$xref_id) {
my $msg = 'Cannot find an xref id for %s (version=%d) with external db id %d.';
throw(sprintf($msg, $dbEntry->primary_id(), $dbEntry->version(), $dbRef))
throw(sprintf($msg, $dbEntry->primary_id(), $version, $dbRef))
}
}
......
......@@ -8,7 +8,7 @@ CREATE TABLE `xref` (
`info_type` enum('NONE','PROJECTION','MISC','DEPENDENT','DIRECT','SEQUENCE_MATCH','INFERRED_PAIR','PROBE','UNMAPPED','CHECKSUM') collate latin1_bin NOT NULL default 'NONE',
`info_text` varchar(255) collate latin1_bin NOT NULL default '',
PRIMARY KEY (`xref_id`),
UNIQUE KEY `id_index` (`dbprimary_acc`,`external_db_id`,`info_type`,`info_text`),
UNIQUE KEY `id_index` (`dbprimary_acc`,`external_db_id`,`info_type`,`info_text`,`version`),
KEY `display_index` (`display_label`),
KEY info_type_idx (info_type)
) ENGINE=MyISAM AUTO_INCREMENT=1000006 DEFAULT CHARSET=latin1 COLLATE=latin1_bin;
......@@ -884,7 +884,7 @@ CREATE TABLE `xref` (
`info_type` enum('NONE','PROJECTION','MISC','DEPENDENT','DIRECT','SEQUENCE_MATCH','INFERRED_PAIR','PROBE','UNMAPPED','CHECKSUM') COLLATE latin1_bin NOT NULL DEFAULT 'NONE',
`info_text` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '',
PRIMARY KEY (`xref_id`),
UNIQUE KEY `id_index` (`dbprimary_acc`,`external_db_id`,`info_type`,`info_text`),
UNIQUE KEY `id_index` (`dbprimary_acc`,`external_db_id`,`info_type`,`info_text`,`version`),
KEY `display_index` (`display_label`)
) ENGINE=MyISAM;
# patch_67_68_b.sql
#
# Title:
# Title: Xref unique constraint enforcement
#
# Description:
# Remove null values from xref and object_xref tables. See also DBEntryAdaptor thread safety changes
# Remove null values from xref and object_xref tables enforcing the unique
# index. Fields are unique on:
#
# XREF - primary accession, version, DB, info type and info text
# OBJECT XREF - xref id, ensembl id, ensembl object type and analysis id
#
# This means we now force info type to be NONE, info text to be '' and
# analysis id to be 0 IF THEY WOULD HAVE BEEN NULL.
#
# See also DBEntryAdaptor thread safety changes
## NB A lot of the functions here use IFNULL(). We could have used the NULL safe comparison operator <=> but were unaware at the time of its existence
# Remove duplicate nulls in xref table
# Need to find the duplicates first and select the lowest xref_id as our "canonical" xref_id
create temporary table xref_dups
select `dbprimary_acc`,`external_db_id`,IFNULL(`info_type`, 'NONE') as info_type, IFNULL(`info_text`, '') as info_text, min(xref_id) as xref_id, count(*) as c
select `dbprimary_acc`,`version`,`external_db_id`,IFNULL(`info_type`, 'NONE') as info_type, IFNULL(`info_text`, '') as info_text, min(xref_id) as xref_id, count(*) as c
from xref
group by `dbprimary_acc`,`external_db_id`,IFNULL(`info_type`, 'NONE'),IFNULL(`info_text`, '')
group by `dbprimary_acc`,`version`,`external_db_id`,IFNULL(`info_type`, 'NONE'),IFNULL(`info_text`, '')
having c > 1;
# Mark all other duplicate xrefs and flag their new canonical ID
......@@ -20,21 +31,34 @@ having c > 1;
create temporary table xref_MFD
select x.xref_id, xd.xref_id AS canonical_xref_id
from xref x join `xref_dups` xd on (
x.`dbprimary_acc` = xd.`dbprimary_acc`
x.`dbprimary_acc` = xd.`dbprimary_acc`
and x.`version` = xd.`version`
and x.`external_db_id` = xd.`external_db_id`
and IFNULL(x.`info_type`, 'NONE') = xd.`info_type`
and IFNULL(x.`info_text`, '') = xd.info_text
and xd.`xref_id` <> x.`xref_id`
);
# Remove the unique constraint
ALTER TABLE xref DROP KEY id_index;
# Delete the duplicates
DELETE FROM xref USING xref JOIN xref_MFD WHERE xref.xref_id = xref_MFD.xref_id;
# Update object_xref xref_ids to the canonical ones
# Update object_xref, dependent_xref, ontology_xref xref_ids to the canonical ones
UPDATE object_xref ox join xref_MFD xmfd using (xref_id) set ox.xref_id = xmfd.canonical_xref_id;
UPDATE dependent_xref dx join xref_MFD xmfd on (dx.master_xref_id = xmfd.xref_id) set dx.master_xref_id = xmfd.canonical_xref_id;
UPDATE dependent_xref dx join xref_MFD xmfd on (dx.dependent_xref_id = xmfd.xref_id) set dx.dependent_xref_id = xmfd.canonical_xref_id;
UPDATE ontology_xref ox join xref_MFD xmfd on (ox.source_xref_id = xmfd.xref_id) set ox.source_xref_id = xmfd.canonical_xref_id;
UPDATE gene g join xref_MFD xmfd on (g.display_xref_id = xmfd.xref_id) set g.display_xref_id = xmfd.canonical_xref_id;
UPDATE transcript t join xref_MFD xmfd on (t.display_xref_id = xmfd.xref_id) set t.display_xref_id = xmfd.canonical_xref_id;
# Apply info_text update and set not null constraint
UPDATE xref SET info_text='' WHERE info_text is NULL;
......@@ -48,6 +72,9 @@ UPDATE xref SET info_type='NONE' WHERE info_type is NULL;
ALTER TABLE xref MODIFY info_type enum('NONE','PROJECTION','MISC','DEPENDENT','DIRECT','SEQUENCE_MATCH','INFERRED_PAIR','PROBE','UNMAPPED','COORDINATE_OVERLAP','CHECKSUM') DEFAULT 'NONE' NOT NULL;
# Add the constraint back
ALTER TABLE xref ADD UNIQUE KEY id_index (dbprimary_acc, external_db_id, info_type, info_text, version);
# Remove duplicate nulls in object_xref table
create temporary table object_xref_dups
......
# patch_67_68_e.sql
#
# Title: Xref unique index fix
#
# Description: Alters the table xref to turn on the unique index. The uniquness
# required has already been applied by patch b. This is ensuring
# users are synchronized with the 67 schema and we do so with
# a stored procedure which can optionally add the index if it was
# missing
#
DELIMITER $$
DROP PROCEDURE IF EXISTS `create_index_if_not_exists`$$
CREATE PROCEDURE `create_index_if_not_exists`(table_schema_vc varchar(64))
SQL SECURITY INVOKER
BEGIN
set @Index_version_count = (
select count(1)
FROM INFORMATION_SCHEMA.STATISTICS
WHERE table_name = 'xref'
and index_name = 'id_index'
and TABLE_SCHEMA = table_schema_vc
and COLUMN_NAME = 'version'
);
IF @Index_version_count <=> 0 THEN
PREPARE idx_remove from 'alter table xref drop index id_index;';
EXECUTE idx_remove;
DEALLOCATE PREPARE idx_remove;
PREPARE stmt FROM 'Alter table xref ADD UNIQUE INDEX id_index (dbprimary_acc,external_db_id,info_type,info_text,version);';
EXECUTE stmt;
DEALLOCATE PREPARE stmt;
END IF;
END$$
DELIMITER ;
call create_index_if_not_exists(database());
DROP PROCEDURE `create_index_if_not_exists`;
# Allows us to do the following without creating new indexes without good reason
# ALTER TABLE xref DROP KEY id_index;
# ALTER TABLE xref ADD UNIQUE KEY id_index (dbprimary_acc, version, external_db_id, info_type, info_text);
# Patch identifier:
INSERT INTO meta (species_id, meta_key, meta_value)
VALUES (NULL, 'patch', 'patch_67_68_e.sql|fix_67_68_e_xref_index');
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment