Commit 0f026994 authored by David Mendez's avatar David Mendez
Browse files

Entities Join: star to implement join query generation

parent a8b612ac
......@@ -4,6 +4,7 @@ Module with functions to make joins of entities
import json
from app.entities_joiner import standardisation
from app.entities_joiner import ids_loader
class EntitiesJoinerError(Exception):
......@@ -11,31 +12,31 @@ class EntitiesJoinerError(Exception):
def get_tiny_hash_to_related_items(destination_entity_browser_state_template,
entity_from, entity_to, es_query,
selection_description):
entity_from, entity_to, raw_es_query,
raw_selection_description):
"""
:param destination_entity_browser_state_template: template for building the resulting browser url
:param entity_from: source entity of the items
:param entity_to: destination entity of the join
:param es_query: query in elasticsearch for the dataset
:param selection_description: stringifyed javascript object describing de selection of items in the dataset
:param raw_es_query: stringifyed query in elasticsearch for the dataset
:param raw_selection_description: stringifyed javascript object describing de selection of items in the dataset
:return: the hash to the link with the generated state
"""
parsed_from = None
parsed_from_entity = None
try:
parsed_from = standardisation.PossibleFroms(entity_from)
parsed_from_entity = standardisation.PossibleEntitiesFrom(entity_from)
except ValueError as error:
raise EntitiesJoinerError(
f'entity_from: {str(error)}. Possible values are {[item.value for item in standardisation.PossibleFroms]}')
f'entity_from: {str(error)}. Possible values are {[item.value for item in standardisation.PossibleEntitiesFrom]}')
parsed_to = None
parsed_to_entity = None
try:
parsed_to = standardisation.PossibleTos(entity_to)
parsed_to_entity = standardisation.PossibleEntitiesTo(entity_to)
except ValueError as error:
raise EntitiesJoinerError(
f'entity_to: {str(error)}. Possible values are {[item.value for item in standardisation.PossibleTos]}')
f'entity_to: {str(error)}. Possible values are {[item.value for item in standardisation.PossibleEntitiesTo]}')
selection_description_dict = json.loads(selection_description)
selection_description_dict = json.loads(raw_selection_description)
try:
parsed_selection_mode = standardisation.SelectionModes(selection_description_dict['selectionMode'])
if parsed_selection_mode == standardisation.SelectionModes.NO_ITEMS_EXCEPT:
......@@ -49,5 +50,49 @@ def get_tiny_hash_to_related_items(destination_entity_browser_state_template,
if entity_to == entity_from:
raise EntitiesJoinerError(f'entity_to ({entity_to}) and entity_from ({entity_from}) cannot be the same!')
print('get_tiny_hash_to_related_items')
print('load ids:')
index_name = standardisation.get_index_name_for_from_entity(parsed_from_entity)
if index_name is None:
raise EntitiesJoinerError(f'There is no index name configured for queries from {parsed_from_entity.value}')
print('index_name: ', index_name)
from_property = standardisation.get_from_property(parsed_from_entity, parsed_to_entity)
if from_property is None:
raise EntitiesJoinerError(f'There is no from property configured for queries from {parsed_from_entity.value}')
print('from_property: ', from_property)
selection_description = json.loads(raw_selection_description)
es_query = json.loads(raw_es_query)
ids = ids_loader.load_ids_for_query(es_query, selection_description, from_property, index_name)
to_property = standardisation.get_to_property(parsed_from_entity, parsed_to_entity)
join_query = get_join_query(ids, to_property)
print('join_query: ')
print(join_query)
return 'holaaaa'
def get_join_query(ids, to_property):
"""
:param ids: list of its for the join
:param to_property: entity to which to do the join
:return: the query to use for the join
"""
print('get_join_query')
print('ids: ')
print(ids)
print('to_property: ')
print(to_property)
return
{
"query": {
"terms": {
"molecule_chembl_id": ["CHEMBL2107495", "CHEMBL1204165"]
}
}
}
......@@ -2,13 +2,34 @@
Module with functions that help to load the ids for the entities joiner
"""
from app.entities_joiner import standardisation
from app.es_data import es_data
from utils import dict_property_access
def load_ids_for_query(es_query, selection_description, from_property, index_name):
"""
:param es_query: query for the dataset
:param selection_description: dict describing the selection
:param from_property: property to get to do the join
:param index_name: name of the index to query
:return: a list of ids for the query and the selection description indicated.
"""
ids = []
ids_query = get_ids_query(es_query, selection_description, from_property)
ids_scanner = es_data.get_es_scanner(index_name, ids_query)
for doc_i in ids_scanner:
doc_source = doc_i['_source']
from_property_value = dict_property_access.get_property_value(doc_source, from_property)
ids.append(from_property_value)
return ids
def get_ids_query(es_query, selection_description, from_property):
"""
:param es_query: query for the dataset
:param selection_description: dict describing the selection
:param from_property: property to get to to the join
:param from_property: property to get to do the join
:return: the query to use to get the ids depending on the selection description
"""
selection_mode = selection_description['selectionMode']
......
......@@ -4,7 +4,7 @@ Module that helps with the standardisation of the package
from enum import Enum
class PossibleFroms(Enum):
class PossibleEntitiesFrom(Enum):
"""
Enumeration with the possible froms allowed
"""
......@@ -18,7 +18,7 @@ class PossibleFroms(Enum):
CHEMBL_DRUG_WARNINGS = 'CHEMBL_DRUG_WARNINGS'
class PossibleTos(Enum):
class PossibleEntitiesTo(Enum):
"""
Enumeration with the possible to allowed
"""
......@@ -38,3 +38,66 @@ class SelectionModes(Enum):
"""
ALL_ITEMS_EXCEPT = 'allItemsExcept'
NO_ITEMS_EXCEPT = 'noItemsExcept'
INDEX_NAMES_FOR_FROM_ENTITIES = {
PossibleEntitiesFrom.CHEMBL_ACTIVITIES: 'chembl_activity',
PossibleEntitiesFrom.CHEMBL_COMPOUNDS: 'chembl_molecule',
PossibleEntitiesFrom.CHEMBL_TARGETS: 'chembl_target',
PossibleEntitiesFrom.CHEMBL_ASSAYS: 'chembl_assay',
PossibleEntitiesFrom.CHEMBL_DOCUMENTS: 'chembl_document',
PossibleEntitiesFrom.CHEMBL_CELL_LINES: 'chembl_cell_line',
PossibleEntitiesFrom.CHEMBL_TISSUES: 'chembl_tissue',
PossibleEntitiesFrom.CHEMBL_DRUG_WARNINGS: 'chembl_drug_warning_by_parent',
}
def get_index_name_for_from_entity(parsed_from_entity):
"""
:param parsed_from_entity: entity 'from' parsed by the PossibleEntitiesFrom enum
:return: the index name corresponding to the from entity
"""
index_name = INDEX_NAMES_FOR_FROM_ENTITIES.get(parsed_from_entity)
return index_name
JOIN_PROPERTIES = {
'from': {
PossibleEntitiesFrom.CHEMBL_DRUG_WARNINGS: {
'to': {
PossibleEntitiesTo.CHEMBL_ACTIVITIES: {
'from_property': 'drug_warning.molecule_chembl_id',
'to_property': 'molecule_chembl_id'
}
}
}
}
}
def get_from_property(parsed_from_entity, parsed_to_entity):
"""
:param parsed_from_entity: entity 'from' parsed by the PossibleEntitiesFrom enum
:param parsed_to_entity: entity 'to' parsed by the PossibleEntitiesTo enum
:return: the property used in the from entity of the join
"""
return JOIN_PROPERTIES. \
get('from', {}). \
get(parsed_from_entity, {}). \
get('to', {}). \
get(parsed_to_entity, {}).get('from_property')
def get_to_property(parsed_from_entity, parsed_to_entity):
"""
:param parsed_from_entity: entity 'from' parsed by the PossibleEntitiesFrom enum
:param parsed_to_entity: entity 'to' parsed by the PossibleEntitiesTo enum
:return: the property used in the from entity of the join
"""
return JOIN_PROPERTIES. \
get('from', {}). \
get(parsed_from_entity, {}). \
get('to', {}). \
get(parsed_to_entity, {}).get('to_property')
......@@ -7,6 +7,7 @@ import base64
import time
import elasticsearch
from elasticsearch.helpers import scan
from app.es_connection import ES
from app import cache
......@@ -229,3 +230,22 @@ def record_that_response_not_cached(index_name, es_query, time_taken):
es_request_digest = get_es_request_digest(es_query)
is_cached = False
statistics_saver.save_index_usage_record(index_name, es_query, es_request_digest, is_cached, time_taken)
def get_es_scanner(index_name, ids_query):
"""
:param index_name: name of the index to query
:param ids_query: query to get the ids
:return: a elasticsearch scanner for the query and the parameters given
"""
print('get_es_scanner')
print('index_name: ', index_name)
return scan(
ES,
index=index_name,
scroll=u'1m',
size=1000,
request_timeout=60,
query=ids_query
)
......@@ -3,9 +3,10 @@
Module that tests the endpoints to do joins among entities selecting all ids
"""
import json
import requests
from specific_tests import utils
def run_test(server_base_url, delayed_jobs_server_base_path):
"""
......@@ -78,6 +79,12 @@ def run_test(server_base_url, delayed_jobs_server_base_path):
}
url = f'{server_base_url}/entities_join/get_link_to_related_items'
print('doing post')
print('doing post: ', url)
request = requests.post(url, data=join_params)
print('post done!')
status_code = request.status_code
print(f'status_code: {status_code}')
response_text = request.text
utils.print_es_response(response_text)
assert status_code == 200, 'The request failed!'
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment