Commit c6370ec1 authored by David Mendez's avatar David Mendez
Browse files

Entities Join: add more cases for entities join

parent 65196a1c
......@@ -111,7 +111,7 @@ def get_cache_key(destination_entity_browser_state_template,
:return: a cache key corresponding to the parameters given
"""
query_identifier = f'{destination_entity_browser_state_template}-{entity_from}-{entity_to}-{raw_es_query}-' \
f'{raw_selection_description}-{previous_hash}'
f'{raw_selection_description}-{previous_hash}-{RUN_CONFIG.get("cache_key_suffix")}'
query_identifier_digest = hashlib.sha256(query_identifier.encode('utf-8')).digest()
base64_identifier_hash = base64.b64encode(query_identifier_digest).decode('utf-8')
......
"""
Module with functions that help to load the ids for the entities joiner
"""
import json
from app.entities_joiner import standardisation
from app.es_data import es_data
from utils import dict_property_access
......@@ -21,7 +23,6 @@ def load_ids_for_query(es_query, selection_description, from_property, index_nam
doc_source = doc_i['_source']
from_property_value = dict_property_access.get_property_value(doc_source, from_property)
ids.append(from_property_value)
return ids
......
......@@ -16,6 +16,9 @@ class PossibleOriginEntities(Enum):
CHEMBL_CELL_LINES = 'CHEMBL_CELL_LINES'
CHEMBL_TISSUES = 'CHEMBL_TISSUES'
CHEMBL_DRUG_WARNINGS = 'CHEMBL_DRUG_WARNINGS'
CHEMBL_DRUGS = 'CHEMBL_DRUGS'
CHEMBL_DRUG_MECHANISMS = 'CHEMBL_DRUG_MECHANISMS'
CHEMBL_DRUG_INDICATIONS = 'CHEMBL_DRUG_INDICATIONS'
class PossibleDestinationEntities(Enum):
......@@ -46,6 +49,8 @@ class SelectionModes(Enum):
INDEX_NAMES_FOR_FROM_ENTITIES = {
PossibleOriginEntities.CHEMBL_ACTIVITIES: 'chembl_activity',
PossibleOriginEntities.CHEMBL_COMPOUNDS: 'chembl_molecule',
PossibleOriginEntities.CHEMBL_DRUGS: 'chembl_molecule',
PossibleOriginEntities.CHEMBL_DRUG_MECHANISMS: 'chembl_mechanism_by_parent_target',
PossibleOriginEntities.CHEMBL_TARGETS: 'chembl_target',
PossibleOriginEntities.CHEMBL_ASSAYS: 'chembl_assay',
PossibleOriginEntities.CHEMBL_DOCUMENTS: 'chembl_document',
......@@ -65,9 +70,9 @@ def get_index_name_for_origin_entity(parsed_origin_entity):
return index_name
def create_simple_query_generator(destination_property):
def create_simple_query_generator(destination_properties):
"""
:param destination_property: property to use to build the query
:param destination_properties: list of properties to use to build the query
:return: a function to be used to build the join query
"""
......@@ -76,19 +81,70 @@ def create_simple_query_generator(destination_property):
:param ids: ids of the items matches
:return: query to use for the join
"""
ids_clauses = " OR ".join([f'"{item_id}"' for item_id in ids])
return f'{destination_property}({ids_clauses})'
# make it a set to avoid duplicates
ids_set = set(ids)
clauses_groups = []
for destination_property in destination_properties:
ids_clauses = " OR ".join([f'"{item_id}"' for item_id in ids_set])
clauses_group = f'{destination_property}:({ids_clauses})'
clauses_groups.append(clauses_group)
return ' OR '.join(clauses_groups)
return join_function
JOIN_PROPERTIES = {
'from': {
PossibleDestinationEntities.CHEMBL_COMPOUNDS: {
PossibleOriginEntities.CHEMBL_COMPOUNDS: {
'to': {
PossibleDestinationEntities.CHEMBL_ACTIVITIES: {
'origin_property': 'molecule_chembl_id',
'destination_query_generator': create_simple_query_generator(['molecule_chembl_id'])
},
PossibleDestinationEntities.CHEMBL_DRUGS: {
'origin_property': 'molecule_chembl_id',
'destination_query_generator': create_simple_query_generator(['molecule_chembl_id'])
},
PossibleDestinationEntities.CHEMBL_DRUG_MECHANISMS: {
'origin_property': 'molecule_chembl_id',
'destination_query_generator': create_simple_query_generator(
['mechanism_of_action.molecule_chembl_id', 'mechanism_of_action.parent_molecule_chembl_id']
)
},
PossibleDestinationEntities.CHEMBL_DRUG_INDICATIONS: {
'origin_property': 'molecule_chembl_id',
'destination_query_generator': create_simple_query_generator(
['drug_indication.molecule_chembl_id', 'drug_indication.parent_molecule_chembl_id']
)
}
}
},
PossibleOriginEntities.CHEMBL_DRUGS: {
'to': {
PossibleDestinationEntities.CHEMBL_ACTIVITIES: {
'origin_property': 'molecule_chembl_id',
'destination_query_generator': create_simple_query_generator('molecule_chembl_id')
'destination_query_generator': create_simple_query_generator(['molecule_chembl_id'])
},
PossibleDestinationEntities.CHEMBL_DRUG_MECHANISMS: {
'origin_property': 'molecule_chembl_id',
'destination_query_generator': create_simple_query_generator(
['mechanism_of_action.molecule_chembl_id', 'mechanism_of_action.parent_molecule_chembl_id']
)
},
PossibleDestinationEntities.CHEMBL_DRUG_INDICATIONS: {
'origin_property': 'molecule_chembl_id',
'destination_query_generator': create_simple_query_generator(
['drug_indication.molecule_chembl_id', 'drug_indication.parent_molecule_chembl_id']
)
}
}
},
PossibleOriginEntities.CHEMBL_DRUG_MECHANISMS: {
'to': {
PossibleDestinationEntities.CHEMBL_DRUGS: {
'origin_property': 'parent_molecule.molecule_chembl_id',
'destination_query_generator': create_simple_query_generator(['molecule_chembl_id'])
}
}
},
......@@ -96,23 +152,23 @@ JOIN_PROPERTIES = {
'to': {
PossibleDestinationEntities.CHEMBL_ACTIVITIES: {
'origin_property': 'drug_warning.molecule_chembl_id',
'destination_query_generator': create_simple_query_generator('molecule_chembl_id')
'destination_query_generator': create_simple_query_generator(['molecule_chembl_id'])
},
PossibleDestinationEntities.CHEMBL_COMPOUNDS: {
'origin_property': 'drug_warning.molecule_chembl_id',
'destination_query_generator': create_simple_query_generator('molecule_chembl_id')
'destination_query_generator': create_simple_query_generator(['molecule_chembl_id'])
},
PossibleDestinationEntities.CHEMBL_DRUGS: {
'origin_property': 'drug_warning.parent_molecule_chembl_id',
'destination_query_generator': create_simple_query_generator('molecule_chembl_id')
'destination_query_generator': create_simple_query_generator(['molecule_chembl_id'])
},
PossibleDestinationEntities.CHEMBL_DRUG_MECHANISMS: {
'origin_property': 'drug_warning.parent_molecule_chembl_id',
'destination_query_generator': create_simple_query_generator('molecule_chembl_id')
'destination_query_generator': create_simple_query_generator(['molecule_chembl_id'])
},
PossibleDestinationEntities.CHEMBL_DRUG_INDICATIONS: {
'origin_property': 'drug_warning.parent_molecule_chembl_id',
'destination_query_generator': create_simple_query_generator('molecule_chembl_id')
'destination_query_generator': create_simple_query_generator(['molecule_chembl_id'])
}
}
}
......
{
"query": {
"bool": {
"must": [
{
"query_string": {
"analyze_wildcard": true,
"query": "*"
}
},
{
"term": {
"_metadata.drug.is_drug": true
}
}
],
"filter": [
[
{
"bool": {
"should": [
{
"term": {
"molecule_type": "Oligonucleotide"
}
}
]
}
}
]
]
}
}
}
\ No newline at end of file
{
"query": {
"bool": {
"must": [
{
"query_string": {
"analyze_wildcard": true,
"query": "*"
}
}
],
"filter": [
[
{
"bool": {
"should": [
{
"term": {
"mechanism_of_action.action_type": "MODULATOR"
}
}
]
}
}
]
]
}
}
}
\ No newline at end of file
"""
Module that tests the endpoints to do joins among entities selecting all ids
"""
from specific_tests import utils
from specific_tests.entities_join import utils as entities_join_utils
def run_test(server_base_url, delayed_jobs_server_base_path):
"""
Tests doing a join among different entities selecting all ids
:param server_base_url: base url of the running server. E.g. http://127.0.0.1:5000
:param delayed_jobs_server_base_path: base path for the delayed_jobs
"""
print('-------------------------------------------')
print('Testing joins among entities')
print('-------------------------------------------')
dataset_query = utils.load_json_data('functional_tests/specific_tests/data/entities_join_query_0.json')
selection_description = {"selectionMode": "allItemsExcept", "exceptions": []}
entities_join_utils.test_entities_join(dataset_query, selection_description, server_base_url,
entity_from='CHEMBL_DRUG_WARNINGS', entity_to='CHEMBL_ACTIVITIES')
# pylint: disable=import-error
"""
Module that tests the endpoints to do joins among entities selecting all ids except some
"""
from specific_tests import utils
from specific_tests.entities_join import utils as entities_join_utils
def run_test(server_base_url, delayed_jobs_server_base_path):
"""
Tests doing a join among different entities selecting all ids except some
:param server_base_url: base url of the running server. E.g. http://127.0.0.1:5000
:param delayed_jobs_server_base_path: base path for the delayed_jobs
"""
print('-------------------------------------------')
print('Testing joins among entities selecting all except some')
print('-------------------------------------------')
dataset_query = utils.load_json_data('functional_tests/specific_tests/data/entities_join_query_0.json')
selection_description = {"selectionMode": "allItemsExcept", "exceptions": ['CHEMBL2107495', 'CHEMBL340978']}
entities_join_utils.test_entities_join(dataset_query, selection_description, server_base_url,
entity_from='CHEMBL_DRUG_WARNINGS',
entity_to='CHEMBL_ACTIVITIES')
# pylint: disable=import-error
"""
Module that tests the endpoints to do joins among entities selecting no ids except some
"""
from specific_tests import utils
from specific_tests.entities_join import utils as entities_join_utils
def run_test(server_base_url, delayed_jobs_server_base_path):
"""
Tests doing a join among different entities selecting no ids except some
:param server_base_url: base url of the running server. E.g. http://127.0.0.1:5000
:param delayed_jobs_server_base_path: base path for the delayed_jobs
"""
print('-------------------------------------------')
print('Testing joins among entities selecting all none except')
print('-------------------------------------------')
dataset_query = utils.load_json_data('functional_tests/specific_tests/data/entities_join_query_0.json')
selection_description = {"selectionMode": "noItemsExcept", "exceptions": ['CHEMBL2107495', 'CHEMBL340978']}
entities_join_utils.test_entities_join(dataset_query, selection_description, server_base_url,
entity_from='CHEMBL_DRUG_WARNINGS', entity_to='CHEMBL_ACTIVITIES')
# pylint: disable=import-error
"""
Module that tests the endpoints to do joins among entities from drug warnings to compounds
"""
from specific_tests import utils
from specific_tests.entities_join import utils as entities_join_utils
def run_test(server_base_url, delayed_jobs_server_base_path):
"""
Tests doing a join among different entities from drug warnings to compounds
:param server_base_url: base url of the running server. E.g. http://127.0.0.1:5000
:param delayed_jobs_server_base_path: base path for the delayed_jobs
"""
print('-------------------------------------------')
print('Testing joins among entities selecting all none except')
print('-------------------------------------------')
dataset_query = utils.load_json_data('functional_tests/specific_tests/data/entities_join_query_0.json')
selection_description = {"selectionMode": "allItemsExcept", "exceptions": []}
entities_join_utils.test_entities_join(dataset_query, selection_description, server_base_url,
entity_from='CHEMBL_DRUG_WARNINGS', entity_to='CHEMBL_COMPOUNDS')
# pylint: disable=import-error
"""
Module that tests the endpoints to do joins among entities from drug warnings to compounds
"""
from specific_tests import utils
from specific_tests.entities_join import utils as entities_join_utils
def run_test(server_base_url, delayed_jobs_server_base_path):
"""
Tests doing a join among different entities from drug warnings to compounds
:param server_base_url: base url of the running server. E.g. http://127.0.0.1:5000
:param delayed_jobs_server_base_path: base path for the delayed_jobs
"""
print('-------------------------------------------')
print('Testing joins among entities selecting all none except')
print('-------------------------------------------')
dataset_query = utils.load_json_data('functional_tests/specific_tests/data/entities_join_query_0.json')
selection_description = {"selectionMode": "allItemsExcept", "exceptions": []}
entities_join_utils.test_entities_join(dataset_query, selection_description, server_base_url,
entity_from='CHEMBL_DRUG_WARNINGS', entity_to='CHEMBL_DRUGS')
# pylint: disable=import-error
"""
Module that tests the endpoints to do joins among entities from drug warnings to drug indications
"""
from specific_tests import utils
from specific_tests.entities_join import utils as entities_join_utils
def run_test(server_base_url, delayed_jobs_server_base_path):
"""
Tests doing a join among different entities from drug warnings to drug indications
:param server_base_url: base url of the running server. E.g. http://127.0.0.1:5000
:param delayed_jobs_server_base_path: base path for the delayed_jobs
"""
print('-------------------------------------------')
print('Testing joins among entities selecting all none except')
print('-------------------------------------------')
dataset_query = utils.load_json_data('functional_tests/specific_tests/data/entities_join_query_0.json')
selection_description = {"selectionMode": "allItemsExcept", "exceptions": []}
entities_join_utils.test_entities_join(dataset_query, selection_description, server_base_url,
entity_from='CHEMBL_DRUG_WARNINGS', entity_to='CHEMBL_DRUG_INDICATIONS')
# pylint: disable=import-error
"""
Module that tests the endpoints to do joins among entities from drug warnings to drug indications
"""
from specific_tests import utils
from specific_tests.entities_join import utils as entities_join_utils
def run_test(server_base_url, delayed_jobs_server_base_path):
"""
Tests doing a join among different entities from drug warnings to drug mechanisms
:param server_base_url: base url of the running server. E.g. http://127.0.0.1:5000
:param delayed_jobs_server_base_path: base path for the delayed_jobs
"""
print('-------------------------------------------')
print('Testing joins among entities selecting all none except')
print('-------------------------------------------')
dataset_query = utils.load_json_data('functional_tests/specific_tests/data/entities_join_query_0.json')
selection_description = {"selectionMode": "allItemsExcept", "exceptions": []}
entities_join_utils.test_entities_join(dataset_query, selection_description, server_base_url,
entity_from='CHEMBL_DRUG_WARNINGS', entity_to='CHEMBL_DRUG_MECHANISMS')
# pylint: disable=import-error
"""
Module that tests the endpoints to do joins among entities from drug warnings to drug indications
"""
from specific_tests import utils
from specific_tests.entities_join import utils as entities_join_utils
def run_test(server_base_url, delayed_jobs_server_base_path):
"""
Tests doing a join among different entities from drug warnings to drug mechanisms
:param server_base_url: base url of the running server. E.g. http://127.0.0.1:5000
:param delayed_jobs_server_base_path: base path for the delayed_jobs
"""
print('-------------------------------------------')
print('Testing joins among entities selecting all none except')
print('-------------------------------------------')
dataset_query = utils.load_json_data('functional_tests/specific_tests/data/entities_join_query_1.json')
selection_description = {"selectionMode": "allItemsExcept", "exceptions": []}
entities_join_utils.test_entities_join(dataset_query, selection_description, server_base_url,
entity_from='CHEMBL_COMPOUNDS', entity_to='CHEMBL_ACTIVITIES')
"""
Module that runs all the tests related to the entities join
"""
from specific_tests.entities_join import fun_test_entities_join_0, fun_test_entities_join_1, fun_test_entities_join_2, \
fun_test_entities_join_3, fun_test_entities_join_4, fun_test_entities_join_5, fun_test_entities_join_6, \
fun_test_entities_join_7
from specific_tests import utils
from specific_tests.entities_join import utils as entities_join_utils
TESTS_TO_RUN = [
{
'entity_from': 'CHEMBL_DRUG_WARNINGS',
'entity_to': 'CHEMBL_ACTIVITIES',
'dataset_query_path': 'functional_tests/specific_tests/data/entities_join_query_0.json',
'selection_description': {"selectionMode": "allItemsExcept", "exceptions": []}
},
{
'entity_from': 'CHEMBL_DRUG_WARNINGS',
'entity_to': 'CHEMBL_ACTIVITIES',
'dataset_query_path': 'functional_tests/specific_tests/data/entities_join_query_0.json',
'selection_description': {"selectionMode": "allItemsExcept", "exceptions": ['CHEMBL2107495', 'CHEMBL340978']}
},
{
'entity_from': 'CHEMBL_DRUG_WARNINGS',
'entity_to': 'CHEMBL_ACTIVITIES',
'dataset_query_path': 'functional_tests/specific_tests/data/entities_join_query_0.json',
'selection_description': {"selectionMode": "noItemsExcept", "exceptions": ['CHEMBL2107495', 'CHEMBL340978']}
},
{
'entity_from': 'CHEMBL_DRUG_WARNINGS',
'entity_to': 'CHEMBL_COMPOUNDS',
'dataset_query_path': 'functional_tests/specific_tests/data/entities_join_query_0.json',
'selection_description': {"selectionMode": "allItemsExcept", "exceptions": []}
},
{
'entity_from': 'CHEMBL_DRUG_WARNINGS',
'entity_to': 'CHEMBL_DRUGS',
'dataset_query_path': 'functional_tests/specific_tests/data/entities_join_query_0.json',
'selection_description': {"selectionMode": "allItemsExcept", "exceptions": []}
},
{
'entity_from': 'CHEMBL_DRUG_WARNINGS',
'entity_to': 'CHEMBL_DRUG_INDICATIONS',
'dataset_query_path': 'functional_tests/specific_tests/data/entities_join_query_0.json',
'selection_description': {"selectionMode": "allItemsExcept", "exceptions": []}
},
{
'entity_from': 'CHEMBL_DRUG_WARNINGS',
'entity_to': 'CHEMBL_DRUG_MECHANISMS',
'dataset_query_path': 'functional_tests/specific_tests/data/entities_join_query_0.json',
'selection_description': {"selectionMode": "allItemsExcept", "exceptions": []}
},
{
'entity_from': 'CHEMBL_COMPOUNDS',
'entity_to': 'CHEMBL_ACTIVITIES',
'dataset_query_path': 'functional_tests/specific_tests/data/entities_join_query_1.json',
'selection_description': {"selectionMode": "allItemsExcept", "exceptions": []}
},
{
'entity_from': 'CHEMBL_COMPOUNDS',
'entity_to': 'CHEMBL_DRUGS',
'dataset_query_path': 'functional_tests/specific_tests/data/entities_join_query_1.json',
'selection_description': {"selectionMode": "allItemsExcept", "exceptions": []}
},
{
'entity_from': 'CHEMBL_COMPOUNDS',
'entity_to': 'CHEMBL_DRUG_MECHANISMS',
'dataset_query_path': 'functional_tests/specific_tests/data/entities_join_query_1.json',
'selection_description': {"selectionMode": "allItemsExcept", "exceptions": []}
},
{
'entity_from': 'CHEMBL_COMPOUNDS',
'entity_to': 'CHEMBL_DRUG_INDICATIONS',
'dataset_query_path': 'functional_tests/specific_tests/data/entities_join_query_1.json',
'selection_description': {"selectionMode": "allItemsExcept", "exceptions": []}
},
{
'entity_from': 'CHEMBL_DRUGS',
'entity_to': 'CHEMBL_ACTIVITIES',
'dataset_query_path': 'functional_tests/specific_tests/data/entities_join_query_2.json',
'selection_description': {"selectionMode": "allItemsExcept", "exceptions": []}
},
{
'entity_from': 'CHEMBL_DRUGS',
'entity_to': 'CHEMBL_DRUG_MECHANISMS',
'dataset_query_path': 'functional_tests/specific_tests/data/entities_join_query_2.json',
'selection_description': {"selectionMode": "allItemsExcept", "exceptions": []}
},
{
'entity_from': 'CHEMBL_DRUGS',
'entity_to': 'CHEMBL_DRUG_INDICATIONS',
'dataset_query_path': 'functional_tests/specific_tests/data/entities_join_query_2.json',
'selection_description': {"selectionMode": "allItemsExcept", "exceptions": []}
},
{
'entity_from': 'CHEMBL_DRUG_MECHANISMS',
'entity_to': 'CHEMBL_DRUGS',
'dataset_query_path': 'functional_tests/specific_tests/data/entities_join_query_3.json',
'selection_description': {"selectionMode": "allItemsExcept", "exceptions": []}
},
]
def run_test(server_base_url, delayed_jobs_server_base_path):
......@@ -12,7 +104,14 @@ def run_test(server_base_url, delayed_jobs_server_base_path):
:param server_base_url: base url of the running server. E.g. http://127.0.0.1:5000
:param delayed_jobs_server_base_path: base path for the delayed_jobs
"""
for test_module in [fun_test_entities_join_0, fun_test_entities_join_1, fun_test_entities_join_2,
fun_test_entities_join_3, fun_test_entities_join_4, fun_test_entities_join_5,
fun_test_entities_join_6, fun_test_entities_join_7]:
test_module.run_test(server_base_url, delayed_jobs_server_base_path)
for test_description in TESTS_TO_RUN:
dataset_query = utils.load_json_data(test_description['dataset_query_path'])
selection_description = test_description['selection_description']
entity_from = test_description['entity_from']
entity_to = test_description['entity_to']
entities_join_utils.test_entities_join(dataset_query, selection_description, server_base_url,
entity_from=entity_from, entity_to=entity_to)
print(f'Ran {len(TESTS_TO_RUN)} tests of entities join successfully')
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment