diff --git a/macros/dumps_data_condition.sql b/macros/dumps_data_condition.sql index dc54245ee46cf4fad584e4b1fa2d6859b1c7aaae..42aa62b90b05725ba02898e3952d64127c60f14b 100644 --- a/macros/dumps_data_condition.sql +++ b/macros/dumps_data_condition.sql @@ -1,4 +1,4 @@ --- This macro filters data showed in SQL dumps +-- This macro filters data showed in the ChEBI Data products: SQL DUMPS, Ontology, TSV Files -- It receives two table aliases: compound_table alias which as an alias for the compounds table -- And table_name alias which is an alias for a table you want to filter (structure, chemical_data, etc) {% macro dumps_data_condition(table_name_alias, compounds_table_alias='c') -%} diff --git a/macros/ontology_data_condition.sql b/macros/ontology_data_condition.sql deleted file mode 100644 index e8425c989da378888e0680e632d59bc0b3fa76e8..0000000000000000000000000000000000000000 --- a/macros/ontology_data_condition.sql +++ /dev/null @@ -1,4 +0,0 @@ --- This macro filters data showed in the ontology. Ontology includes compound with 1,2 and 3 stars. -{% macro ontology_data_condition(table_name_alias, compounds_table_alias='c') -%} - ({{- compounds_table_alias -}}.stars IN (1, 2, 3) AND {{ table_name_alias -}}.status_id IN (1, 3, 9, 7)) -{%- endmacro %} \ No newline at end of file diff --git a/models/chebi_allstar/properties.yml b/models/chebi_allstar/properties.yml index dbefa999f9ee8ebbcacfac019fbf170bf20257c8..6268090a41f6545dc714cec47d9322213a88825b 100644 --- a/models/chebi_allstar/properties.yml +++ b/models/chebi_allstar/properties.yml @@ -293,7 +293,31 @@ models: data_type: text constraints: - type: not_null - + + # relation_type table + - name: relation_type + description: Relation types can have an entity + config: + contract: + enforced: true + columns: + - name: id + data_type: bigint + constraints: + - type: not_null + - type: primary_key + - type: check + expression: "id > 0" + - name: code + data_type: varchar + constraints: + - type: not_null + - name: allow_cycles + data_type: bool + constraints: + - type: not_null + - name: description + data_type: varchar # relation table - name: relation @@ -313,6 +337,8 @@ models: data_type: bigint constraints: - type: not_null + - type: foreign_key + expression: "chebi_allstar.relation_type(id)" - name: init_id data_type: bigint constraints: diff --git a/models/chebi_allstar/relation.sql b/models/chebi_allstar/relation.sql index d332172ea4166961c0e4e2600caecf081b3d6fd9..a8f4174d29d8c6d81a535fdbcd14e2218a7a0c24 100644 --- a/models/chebi_allstar/relation.sql +++ b/models/chebi_allstar/relation.sql @@ -7,11 +7,11 @@ WITH relation AS ( r.status_id, r.evidence_dbaccid FROM {{ source('chebi_production', 'relation') }} AS r + INNER JOIN {{ source('chebi_production', 'relation_type') }} AS rt ON rt.id = r.relation_type_id INNER JOIN (SELECT id, stars FROM {{ ref('compounds') }}) AS c ON r.init_id = c.id INNER JOIN (SELECT id, stars FROM {{ ref('compounds') }}) AS c2 ON r.final_id = c2.id WHERE - {{ dumps_data_condition('r') }} - OR {{ dumps_data_condition('r', 'c2') }} + r.status_id IN (SELECT s.id FROM {{ ref('status') }} AS s) ) SELECT * FROM relation diff --git a/models/chebi_allstar/relation_type.sql b/models/chebi_allstar/relation_type.sql new file mode 100644 index 0000000000000000000000000000000000000000..cb01c45f662502825542e2dc5a078daf66b8a21d --- /dev/null +++ b/models/chebi_allstar/relation_type.sql @@ -0,0 +1,10 @@ +WITH cte_relation_type AS ( + SELECT + rt.id, + rt.code, + rt.allow_cycles, + rt.description + FROM {{ source('chebi_production', 'relation_type') }} AS rt +) + +SELECT * FROM cte_relation_type diff --git a/models/chebi_ontology/axioms_exact_synonyms.sql b/models/chebi_ontology/axioms_exact_synonyms.sql index 62482746be14bb012768c0f4ed7c6837f2a6e726..5783e2a9f12ea7f70c76ccebc08f4a0fec4a5df6 100644 --- a/models/chebi_ontology/axioms_exact_synonyms.sql +++ b/models/chebi_ontology/axioms_exact_synonyms.sql @@ -18,12 +18,11 @@ WITH cte_exact_synonym_disperse AS ( n.name, s.name AS axiom, CONCAT('chebi', ':', REPLACE(n."type", ' ', '_')) AS axiom_type -- convert IUPAC NAME TO IUPAC_NAME - FROM {{ source('chebi_production', 'compounds') }} AS c - INNER JOIN {{ source('chebi_production', 'names') }} AS n ON c.id = n.compound_id - INNER JOIN {{ source('chebi_production', 'source') }} AS s ON n.source_id = s.id + FROM {{ ref('compounds') }} AS c + INNER JOIN {{ ref('names') }} AS n ON c.id = n.compound_id + INNER JOIN {{ ref('source') }} AS s ON n.source_id = s.id WHERE n."type" = 'IUPAC NAME' - AND ({{ ontology_data_condition('n') }}) AND c.parent_id IS NULL UNION SELECT @@ -35,13 +34,12 @@ WITH cte_exact_synonym_disperse AS ( CASE WHEN n."type" = 'IUPAC NAME' THEN CONCAT('chebi', ':', REPLACE(n."type", ' ', '_')) END AS axiom_type -- convert IUPAC NAME TO IUPAC_NAME - FROM {{ source('chebi_production', 'compounds') }} AS c - LEFT JOIN {{ source('chebi_production', 'compounds') }} AS c2 ON c.id = c2.parent_id - INNER JOIN {{ source('chebi_production', 'names') }} AS n ON c2.id = n.compound_id - INNER JOIN {{ source('chebi_production', 'source') }} AS s ON n.source_id = s.id + FROM {{ ref('compounds') }} AS c + LEFT JOIN {{ ref('compounds') }} AS c2 ON c.id = c2.parent_id + INNER JOIN {{ ref('names') }} AS n ON c2.id = n.compound_id + INNER JOIN {{ ref('source') }} AS s ON n.source_id = s.id WHERE - LOWER(n.ascii_name) IN (SELECT LOWER(n.ascii_name) FROM {{ source('chebi_production', 'names') }} AS n WHERE n."type" = 'IUPAC NAME') - AND ({{ ontology_data_condition('n') }}) + LOWER(n.ascii_name) IN (SELECT LOWER(n.ascii_name) FROM {{ ref('names') }} AS n WHERE n."type" = 'IUPAC NAME') AND c.parent_id IS NULL ), diff --git a/models/chebi_ontology/axioms_related_synonyms.sql b/models/chebi_ontology/axioms_related_synonyms.sql index 7a673aa3a380d8e1245bbe1429c7630bd4683dba..99be361e9cf98f2fd5ca88d2cf500798002816cd 100644 --- a/models/chebi_ontology/axioms_related_synonyms.sql +++ b/models/chebi_ontology/axioms_related_synonyms.sql @@ -20,13 +20,12 @@ WITH cte_related_synonym_disperse AS ( WHEN n."type" = 'BRAND NAME' THEN CONCAT('chebi', ':', REPLACE(n."type", ' ', '_')) WHEN n."type" = 'INN' THEN CONCAT('chebi', ':', n."type") END AS axiom_type - FROM {{ source('chebi_production', 'compounds') }} AS c - INNER JOIN {{ source('chebi_production', 'names') }} AS n ON c.id = n.compound_id - INNER JOIN {{ source('chebi_production', 'source') }} AS s ON n.source_id = s.id + FROM {{ ref('compounds') }} AS c + INNER JOIN {{ ref('names') }} AS n ON c.id = n.compound_id + INNER JOIN {{ ref('source') }} AS s ON n.source_id = s.id LEFT JOIN {{ ref('axioms_exact_synonyms') }} AS es ON n.ascii_name = es.ascii_name WHERE - ({{ ontology_data_condition('n') }}) - AND es.ascii_name IS NULL + es.ascii_name IS NULL AND c.parent_id IS NULL UNION SELECT @@ -40,14 +39,13 @@ WITH cte_related_synonym_disperse AS ( WHEN n."type" = 'BRAND NAME' THEN CONCAT('chebi', ':', REPLACE(n."type", ' ', '_')) WHEN n."type" = 'INN' THEN CONCAT('chebi', ':', n."type") END AS axiom_type - FROM {{ source('chebi_production', 'compounds') }} AS c - LEFT JOIN {{ source('chebi_production', 'compounds') }} AS c2 ON c.id = c2.parent_id - INNER JOIN {{ source('chebi_production', 'names') }} AS n ON c2.id = n.compound_id - INNER JOIN {{ source('chebi_production', 'source') }} AS s ON n.source_id = s.id + FROM {{ ref('compounds') }} AS c + LEFT JOIN {{ ref('compounds') }} AS c2 ON c.id = c2.parent_id + INNER JOIN {{ ref('names') }} AS n ON c2.id = n.compound_id + INNER JOIN {{ ref('source') }} AS s ON n.source_id = s.id LEFT JOIN {{ ref('axioms_exact_synonyms') }} AS es ON n.ascii_name = es.ascii_name WHERE - ({{ ontology_data_condition('n') }}) - AND es.ascii_name IS NULL + es.ascii_name IS NULL AND c.parent_id IS NULL ), diff --git a/models/chebi_ontology/compound_base_information.sql b/models/chebi_ontology/compound_base_information.sql index 50ad421e939796c92746976325f9f35d653198e1..e3f70256ef830ad0f4d0e0cff80a9adfb29001a7 100644 --- a/models/chebi_ontology/compound_base_information.sql +++ b/models/chebi_ontology/compound_base_information.sql @@ -3,8 +3,8 @@ WITH cte_alternative_ids AS ( SELECT c.id, --parent_id STRING_AGG(c2.chebi_accession, '|') AS alternative_ids --children ids - FROM {{ source('chebi_production', 'compounds') }} AS c - LEFT JOIN {{ source('chebi_production', 'compounds') }} AS c2 ON c.id = c2.parent_id + FROM {{ ref('compounds') }} AS c + LEFT JOIN {{ ref('compounds') }} AS c2 ON c.id = c2.parent_id GROUP BY c.id ), @@ -73,7 +73,7 @@ SELECT DISTINCT rs.related_synonym, c.stars AS star_id, 'chebi:' || c.stars || '_STAR' AS stars -FROM {{ source('chebi_production', 'compounds') }} AS c +FROM {{ ref('compounds') }} AS c INNER JOIN {{ ref('valid_relations') }} AS cr ON c.id = cr.compound_id LEFT JOIN {{ ref('valid_chemical_data') }} AS cd ON c.id = cd.compound_id LEFT JOIN {{ ref('valid_structures') }} AS s ON c.id = s.compound_id diff --git a/models/chebi_ontology/deprecated_compounds.sql b/models/chebi_ontology/deprecated_compounds.sql index 677fe9cd766781709ecbc82e0eaeae1e2c83bbb8..adf9e4911ec80ff70a3b103a4bcbe6bd41b25be0 100644 --- a/models/chebi_ontology/deprecated_compounds.sql +++ b/models/chebi_ontology/deprecated_compounds.sql @@ -1,49 +1,35 @@ --- There are two kind of deprecated compounds: -- 1. Deprecated because of a merging process. Basically, when a compound c0 is merged with a compound C1, the field parent_id -- in the c0 compound is populated with the id of C1. --- 2. Deprecated because of a deleted compound - WITH cte_deprecated_for_merging AS ( SELECT c.id AS compound_id, c.chebi_accession AS curie, + c.stars, + c.status_id, 'owl:Class' AS property_type, 1 AS is_deprecated, - CONCAT('CHEBI:', c.parent_id) AS terms_merged_with, - 'obo:IAO_0000227' AS terms_merged_definition, - -- This is a curie to describe the merged process: https://ontobee.org/ontology/IAO?iri=http://purl.obolibrary.org/obo/IAO_0000227 - NULL AS namespace, - NULL AS object_property - FROM {{ source('chebi_production', 'compounds') }} AS c + CASE + WHEN c.id NOT IN (27189) THEN CONCAT('CHEBI:', c.parent_id) + END AS terms_merged_with, + CASE + -- This is a curie to describe the merged process: https://ontobee.org/ontology/IAO?iri=http://purl.obolibrary.org/obo/IAO_0000227 + WHEN c.id NOT IN (27189) THEN 'obo:IAO_0000227' + END AS terms_merged_definition, + CASE + WHEN c.id NOT IN (27189) THEN NULL + ELSE 'chebi_ontology' + END AS namespace, + CASE + WHEN c.id NOT IN (27189) THEN NULL + ELSE c.chebi_accession + END AS object_property + FROM {{ ref('compounds') }} AS c WHERE c.parent_id IS NOT NULL - AND c.stars > 0 -), - -cte_deprecated_for_deleted AS ( - SELECT - c.id AS compound_id, - c.chebi_accession AS curie, - 'owl:Class' AS property_type, - 1 AS is_deprecated, - NULL AS terms_merged_with, - NULL AS terms_merged_definition, - 'chebi_ontology' AS namespace, - c.chebi_accession AS object_property - FROM {{ source('chebi_production', 'compounds') }} AS c - WHERE ( - (c.status_id = 2 AND c.stars IN (1, 2, 3)) -- Status 2 = Deleted - OR c.id = 27189 - ) -- We must include the 'unclassified' definition, CHEBI:27189 -), - -cte_deprecated_compounds AS ( - SELECT * FROM cte_deprecated_for_merging - UNION - SELECT * FROM cte_deprecated_for_deleted + OR c.id = 27189 -- We must include the 'unclassified' definition, CHEBI:27189 ) SELECT ROW_NUMBER() OVER (ORDER BY cdc.compound_id) AS id, cdc.* -FROM cte_deprecated_compounds AS cdc +FROM cte_deprecated_for_merging AS cdc; diff --git a/models/chebi_ontology/valid_chemical_data.sql b/models/chebi_ontology/valid_chemical_data.sql index 468417b1221345e015c73897d4c37af9faa762e5..a0ed9c47a0d1ecb58260143109c8e1560f4f32e4 100644 --- a/models/chebi_ontology/valid_chemical_data.sql +++ b/models/chebi_ontology/valid_chemical_data.sql @@ -9,11 +9,10 @@ WITH cte_chemical_data_parents AS ( cd.charge::text, cd.mass::text, cd.monoisotopic_mass::text - FROM {{ source('chebi_production', 'compounds') }} AS c - LEFT JOIN {{ source('chebi_production', 'chemical_data') }} AS cd ON c.id = cd.compound_id + FROM {{ ref('compounds') }} AS c + LEFT JOIN {{ ref('chemical_data') }} AS cd ON c.id = cd.compound_id WHERE - {{ ontology_data_condition('cd') }} - AND c.parent_id IS NULL + c.parent_id IS NULL --AND c.id IN (41981, 27427) ), @@ -25,13 +24,10 @@ cte_chemical_data_children AS ( STRING_AGG(DISTINCT cd.charge::text, ';') AS charge, STRING_AGG(DISTINCT cd.mass::text, ';') AS mass, STRING_AGG(DISTINCT cd.monoisotopic_mass::text, ';') AS monoisotopic_mass - FROM {{ source('chebi_production', 'compounds') }} AS c - LEFT JOIN {{ source('chebi_production', 'compounds') }} AS c2 ON c.id = c2.parent_id - LEFT JOIN {{ source('chebi_production', 'chemical_data') }} AS cd + FROM {{ ref('compounds') }} AS c + LEFT JOIN {{ ref('compounds') }} AS c2 ON c.id = c2.parent_id + LEFT JOIN {{ ref('chemical_data') }} AS cd ON c2.id = cd.compound_id - WHERE - {{ ontology_data_condition('cd') }} - --AND c.id IN (41981, 27427) GROUP BY c.id ), diff --git a/models/chebi_ontology/valid_database_references.sql b/models/chebi_ontology/valid_database_references.sql index 6ce885307450e5ac2791056ab77ddbdc46cce335..f66fc4ccd8751cb4a2ebc077672f4e4f516a099f 100644 --- a/models/chebi_ontology/valid_database_references.sql +++ b/models/chebi_ontology/valid_database_references.sql @@ -14,12 +14,11 @@ WITH cte_database_references_disperse AS ( END AS prefix, sr.name AS source_name, da.source_id - FROM {{ source('chebi_production', 'compounds') }} AS c - INNER JOIN {{ source('chebi_production', 'database_accession') }} AS da ON c.id = da.compound_id - INNER JOIN {{ source('chebi_production', 'source') }} AS sr ON da.source_id = sr.id + FROM {{ ref('compounds') }} AS c + INNER JOIN {{ ref('database_accession') }} AS da ON c.id = da.compound_id + INNER JOIN {{ ref('source') }} AS sr ON da.source_id = sr.id WHERE - ({{ ontology_data_condition('da') }}) - AND c.parent_id IS NULL + c.parent_id IS NULL UNION SELECT c.id AS compound_id, @@ -32,13 +31,12 @@ WITH cte_database_references_disperse AS ( END AS prefix, sr.name AS source_name, da.source_id - FROM {{ source('chebi_production', 'compounds') }} AS c - LEFT JOIN {{ source('chebi_production', 'compounds') }} AS c2 ON c.id = c2.parent_id - INNER JOIN {{ source('chebi_production', 'database_accession') }} AS da ON c2.id = da.compound_id - INNER JOIN {{ source('chebi_production', 'source') }} AS sr ON da.source_id = sr.id + FROM {{ ref('compounds') }} AS c + LEFT JOIN {{ ref('compounds') }} AS c2 ON c.id = c2.parent_id + INNER JOIN {{ ref('database_accession') }} AS da ON c2.id = da.compound_id + INNER JOIN {{ ref('source') }} AS sr ON da.source_id = sr.id WHERE - ({{ ontology_data_condition('da') }}) - AND c.parent_id IS NULL + c.parent_id IS NULL ) SELECT diff --git a/models/chebi_ontology/valid_relations.sql b/models/chebi_ontology/valid_relations.sql index 338c1e0224c51966a369afae38fcbc0a70cfc454..fb05c676cdb96b0bad4e4607ef7d9150ddaaa37c 100644 --- a/models/chebi_ontology/valid_relations.sql +++ b/models/chebi_ontology/valid_relations.sql @@ -10,30 +10,28 @@ WITH cte_compounds_relations AS ( c.id AS compound_id, r.init_id, r.relation_type_id - FROM {{ source('chebi_production', 'compounds') }} AS c - INNER JOIN {{ source('chebi_production', 'relation') }} AS r ON c.id = r.final_id - INNER JOIN {{ source('chebi_production', 'relation_type') }} AS rt ON r.relation_type_id = rt.id + FROM {{ ref('compounds') }} AS c + INNER JOIN {{ ref('relation') }} AS r ON c.id = r.final_id + INNER JOIN {{ ref('relation_type') }} AS rt ON r.relation_type_id = rt.id WHERE - {{ ontology_data_condition('r') }} - AND r.init_id <> 27189 -- We're NOT showing compounds that have 'unclassified' AS a parent, the chebi_id FOR unclassified IS 27189 + r.init_id <> 27189 -- We're NOT showing compounds that have 'unclassified' AS a parent, the chebi_id FOR unclassified IS 27189 UNION SELECT c.id AS compound_id, --c2.id AS alternative_id, r.init_id, r.relation_type_id - FROM {{ source('chebi_production', 'compounds') }} AS c - LEFT JOIN {{ source('chebi_production', 'compounds') }} AS c2 ON c.id = c2.parent_id - INNER JOIN {{ source('chebi_production', 'relation') }} AS r ON c2.id = r.final_id + FROM {{ ref('compounds') }} AS c + LEFT JOIN {{ ref('compounds') }} AS c2 ON c.id = c2.parent_id + INNER JOIN {{ ref('relation') }} AS r ON c2.id = r.final_id WHERE - {{ ontology_data_condition('r') }} - AND r.init_id <> 27189 + r.init_id <> 27189 UNION SELECT c.id AS compound_id, NULL AS init_id, NULL AS relation_type_id - FROM {{ source('chebi_production', 'compounds') }} AS c + FROM {{ ref('compounds') }} AS c -- There ARE 3 chebi ids that are special, they don't have any parents because they are the upper definitions: -- chemical entity, role and subatomic particle. Anyway, they must appear in the chebi ontology! WHERE c.id IN (24431, 50906, 36342) @@ -48,7 +46,7 @@ cte_compound_relations_parents AS ( cr.init_id, cr.relation_type_id FROM cte_compounds_relations AS cr -- the previous cte_compounds_relations - LEFT JOIN {{ source('chebi_production', 'compounds') }} AS c ON cr.init_id = c.id + LEFT JOIN {{ ref('compounds') }} AS c ON cr.init_id = c.id WHERE c.parent_id IS NULL UNION SELECT @@ -56,7 +54,7 @@ cte_compound_relations_parents AS ( c.parent_id AS init_id, cr.relation_type_id FROM cte_compounds_relations AS cr - LEFT JOIN {{ source('chebi_production', 'compounds') }} AS c ON cr.init_id = c.id + LEFT JOIN {{ ref('compounds') }} AS c ON cr.init_id = c.id WHERE c.parent_id IS NOT NULL ), @@ -99,7 +97,7 @@ cte_compounds_by_relations_disperse AS ( END AS compounds_is_tautomer_of FROM cte_compound_relations_parents AS r -- left join to take into account the special ones chebi ids: 24431, 50906, 36342, - LEFT JOIN {{ source('chebi_production', 'relation_type') }} AS rt ON r.relation_type_id = rt.id + LEFT JOIN {{ ref('relation_type') }} AS rt ON r.relation_type_id = rt.id GROUP BY r.compound_id, r.relation_type_id diff --git a/models/chebi_ontology/valid_structures.sql b/models/chebi_ontology/valid_structures.sql index 87d41cbfddf9b23cdead949f0f540522e76270c7..385b23076ab53da9ccb2222a9ad7e90b92324dc5 100644 --- a/models/chebi_ontology/valid_structures.sql +++ b/models/chebi_ontology/valid_structures.sql @@ -7,12 +7,11 @@ WITH cte_structures_parents AS ( s.standard_inchi_key, s.standard_inchi, s.smiles - FROM {{ source('chebi_production', 'compounds') }} AS c - LEFT JOIN {{ source('chebi_production', 'structures') }} AS s ON c.id = s.compound_id + FROM {{ ref('compounds') }} AS c + LEFT JOIN {{ ref('structures') }} AS s ON c.id = s.compound_id WHERE s.molfile IS NOT NULL AND s.default_structure - AND ({{ ontology_data_condition('s') }}) AND c.parent_id IS NULL ), @@ -23,13 +22,12 @@ cte_structures_children AS ( s.standard_inchi_key, s.standard_inchi, s.smiles - FROM {{ source('chebi_production', 'compounds') }} AS c - LEFT JOIN {{ source('chebi_production', 'compounds') }} AS c2 ON c.id = c2.parent_id - LEFT JOIN {{ source('chebi_production', 'structures') }} AS s ON c2.id = s.compound_id + FROM {{ ref('compounds') }} AS c + LEFT JOIN {{ ref('compounds') }} AS c2 ON c.id = c2.parent_id + LEFT JOIN {{ ref('structures') }} AS s ON c2.id = s.compound_id WHERE s.molfile IS NOT NULL AND s.default_structure - AND ({{ ontology_data_condition('s') }}) ), cte_structure AS (