Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
E
ensembl
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Iterations
Wiki
Requirements
Jira
Code
Merge requests
1
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ensembl-gh-mirror
ensembl
Commits
22ee1374
Commit
22ee1374
authored
13 years ago
by
Andreas Kusalananda Kähäri
Browse files
Options
Downloads
Patches
Plain Diff
Moved to Monika's private repository.
parent
4bccb1da
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
sql/delete_duplicates.sql
+0
-40
0 additions, 40 deletions
sql/delete_duplicates.sql
sql/link_xrefs_to_genes.sql
+0
-59
0 additions, 59 deletions
sql/link_xrefs_to_genes.sql
with
0 additions
and
99 deletions
sql/delete_duplicates.sql
deleted
100644 → 0
+
0
−
40
View file @
4bccb1da
CREATE
PROCEDURE
delete_duplicates
()
BEGIN
DECLARE
v_identifier
varChar
(
255
);
DECLARE
v_unmapped_object_id
INT
;
DECLARE
existing_object_id
INT
DEFAULT
0
;
DECLARE
v_parent
varChar
(
255
);
DECLARE
v_ensembl_id
INT
;
DECLARE
v_unmapped_reason_id
INT
;
DECLARE
v_ensembl_object_type
varChar
(
15
);
DECLARE
v_external_db_id
INT
;
DECLARE
done
INT
DEFAULT
0
;
DECLARE
row_count
INT
DEFAULT
0
;
DECLARE
cur1
CURSOR
FOR
SELECT
unmapped_object_id
,
identifier
,
ensembl_id
,
parent
,
unmapped_reason_id
,
ensembl_object_type
,
external_db_id
from
unmapped_object
where
ensembl_id
is
not
null
and
parent
is
not
null
and
ensembl_object_type
is
not
null
and
external_db_id
is
not
null
order
by
unmapped_object_id
;
DECLARE
CONTINUE
HANDLER
FOR
NOT
FOUND
SET
done
=
TRUE
;
OPEN
cur1
;
main_loop
:
LOOP
SET
done
=
FALSE
;
FETCH
cur1
INTO
v_unmapped_object_id
,
v_identifier
,
v_ensembl_id
,
v_parent
,
v_unmapped_reason_id
,
v_ensembl_object_type
,
v_external_db_id
;
IF
done
THEN
LEAVE
main_loop
;
END
IF
;
SET
row_count
=
0
;
SELECT
COUNT
(
1
)
INTO
row_count
FROM
unmapped_object
WHERE
identifier
=
v_identifier
and
ensembl_id
=
v_ensembl_id
and
parent
=
v_parent
and
unmapped_reason_id
=
v_unmapped_reason_id
and
ensembl_object_type
=
v_ensembl_object_type
and
external_db_id
=
v_external_db_id
;
IF
row_count
>
1
THEN
DELETE
FROM
unmapped_object
where
unmapped_object_id
<>
v_unmapped_object_id
and
identifier
=
v_identifier
and
ensembl_id
=
v_ensembl_id
and
parent
=
v_parent
and
unmapped_reason_id
=
v_unmapped_reason_id
and
ensembl_object_type
=
v_ensembl_object_type
and
external_db_id
=
v_external_db_id
;
END
IF
;
END
LOOP
;
CLOSE
cur1
;
END
This diff is collapsed.
Click to expand it.
sql/link_xrefs_to_genes.sql
deleted
100644 → 0
+
0
−
59
View file @
4bccb1da
CREATE
PROCEDURE
link_xrefs_to_genes
()
BEGIN
DECLARE
count_deleted
,
count_updated
INT
DEFAULT
0
;
DECLARE
v_object_xref_id
INT
;
DECLARE
existing_object_xref
INT
DEFAULT
0
;
DECLARE
v_ensembl_id
INT
;
DECLARE
v_ensembl_object_type
varChar
(
15
);
DECLARE
v_xref_id
INT
;
DECLARE
v_gene_id
INT
DEFAULT
0
;
DECLARE
done
INT
DEFAULT
0
;
DECLARE
cur1
CURSOR
FOR
SELECT
object_xref_id
,
ensembl_id
,
ensembl_object_type
,
o
.
xref_id
FROM
object_xref
o
INNER
JOIN
xref
x
on
o
.
xref_id
=
x
.
xref_id
INNER
JOIN
external_db
e
on
e
.
external_db_id
=
x
.
external_db_id
WHERE
db_name
in
(
'UniGene'
,
'Uniprot_genename'
,
'DBASS3'
,
'DBASS5'
)
AND
ensembl_object_type
<>
'Gene'
;
DECLARE
CONTINUE
HANDLER
FOR
NOT
FOUND
SET
done
=
TRUE
;
OPEN
cur1
;
main_loop
:
LOOP
SET
done
=
FALSE
;
FETCH
cur1
INTO
v_object_xref_id
,
v_ensembl_id
,
v_ensembl_object_type
,
v_xref_id
;
IF
done
THEN
LEAVE
main_loop
;
END
IF
;
-- find the corresponding gene_id
SET
v_gene_id
=
0
;
CASE
WHEN
v_ensembl_object_type
=
'Transcript'
THEN
SELECT
COALESCE
(
gene_id
,
0
)
INTO
v_gene_id
FROM
transcript
where
transcript_id
=
v_ensembl_id
;
WHEN
v_ensembl_object_type
=
'Translation'
THEN
SELECT
COALESCE
(
gene_id
,
0
)
INTO
v_gene_id
FROM
transcript
tp
inner
join
translation
tl
where
tp
.
transcript_id
=
tl
.
transcript_id
and
translation_id
=
v_ensembl_id
;
END
CASE
;
SET
existing_object_xref
=
0
;
IF
v_gene_id
>
0
THEN
SELECT
COALESCE
(
object_xref_id
,
0
)
INTO
existing_object_xref
FROM
object_xref
where
ensembl_id
=
v_gene_id
AND
xref_id
=
v_xref_id
AND
ensembl_object_type
=
'Gene'
;
IF
existing_object_xref
>
0
THEN
-- if gene_id already linked to this xref_id delete the object_xref
DELETE
FROM
object_xref
WHERE
object_xref_id
=
v_object_xref_id
;
SET
count_deleted
=
count_deleted
+
1
;
DELETE
FROM
identity_xref
WHERE
object_xref_id
=
v_object_xref_id
;
DELETE
FROM
dependent_xref
WHERE
object_xref_id
=
v_object_xref_id
;
ELSE
-- if not linked update the current object_xref
UPDATE
object_xref
SET
ensembl_id
=
v_gene_id
,
ensembl_object_type
=
'Gene'
WHERE
object_xref_id
=
v_object_xref_id
;
SET
count_updated
=
count_updated
+
1
;
END
IF
;
END
IF
;
END
LOOP
;
CLOSE
cur1
;
-- populate the meta table with patch id and counts for updated rows
INSERT
INTO
meta
(
species_id
,
meta_key
,
meta_value
)
VALUES
(
NULL
,
'data_changes'
,
CONCAT
(
'link_xrefs_to_genes.sql|updated '
,
count_updated
,
' xrefs, deleted '
,
count_deleted
,
' xrefs'
));
END
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment