Commit 86363e7d authored by David Mendez's avatar David Mendez
Browse files

Entities join: Fix bug when documents of an index are not identified by chembl id.

parent 027f44db
The location of the config file can be set up with the variable CONFIG_FILE_PATH, if not set, the value is 'config.yml'
# Environment cheats
```shell
python3 -m venv .venv
source .venv/bin/activate
pip3 install -r requirements.txt
```
# To run unit tests
```bash
```shell
python -m unittest
```
# Run development server
```bash
```shell
FLASK_APP=app flask run --host=0.0.0.0
```
......@@ -28,7 +28,6 @@ def get_link_to_related_items():
es_query = request_parameters.sanitise_parameter(form_data.get('es_query'))
selection_description = request_parameters.sanitise_parameter(form_data.get('selection_description'))
previous_hash = request_parameters.sanitise_parameter(form_data.get('previous_hash'))
origin_id_property = request_parameters.sanitise_parameter(form_data.get('origin_id_property'))
is_test = schema_utils.parse_boolean_param(form_data, 'is_test')
app_logging.debug(f'destination_entity_browser_state_template: {destination_entity_browser_state_template}')
......@@ -42,7 +41,7 @@ def get_link_to_related_items():
json_response = entities_join_service.get_tiny_hash_to_related_items(destination_entity_browser_state_template,
entity_from, entity_to, es_query,
selection_description, previous_hash,
origin_id_property, is_test)
is_test)
return jsonify(json_response)
except entities_join_service.EntitiesJoinServiceError as error:
app_logging.debug(str(error))
......
......@@ -10,7 +10,7 @@ class EntitiesJoinServiceError(Exception):
def get_tiny_hash_to_related_items(destination_entity_browser_state_template,
entity_from, entity_to, es_query,
selection_description, previous_hash, origin_id_property=None, is_test=False):
selection_description, previous_hash, is_test=False):
"""
:param destination_entity_browser_state_template: template for building the resulting browser url
:param entity_from: source entity of the items
......@@ -18,16 +18,13 @@ def get_tiny_hash_to_related_items(destination_entity_browser_state_template,
:param es_query: query in elasticsearch for the dataset
:param selection_description: stringifyed javascript object describing de selection of items in the dataset
:param previous_hash: hash of the state that originated this join of entities
:param origin_id_property: id property to identify the origin items. If not provided, it will be equal as
from_property later
:param is_test: tells if the request is marked as form a test
:return: a dict with the tiny url to the link with the generated state
"""
try:
tiny_hash = entities_joiner.get_tiny_hash_to_related_items(destination_entity_browser_state_template,
entity_from, entity_to, es_query,
selection_description, previous_hash,
origin_id_property, is_test)
selection_description, previous_hash, is_test)
return {
'tiny_hash': tiny_hash
......
......@@ -22,7 +22,7 @@ class EntitiesJoinerError(Exception):
def get_tiny_hash_to_related_items(destination_entity_browser_state_template,
raw_origin_entity, raw_destination_entity, raw_es_query,
raw_selection_description, previous_hash, origin_id_property=None, is_test=False):
raw_selection_description, previous_hash, is_test=False):
"""
:param destination_entity_browser_state_template: template for building the resulting browser url
:param raw_origin_entity: text with the origin entity of the items
......@@ -30,8 +30,6 @@ def get_tiny_hash_to_related_items(destination_entity_browser_state_template,
:param raw_es_query: stringifyed query in elasticsearch for the dataset
:param raw_selection_description: stringifyed javascript object describing de selection of items in the dataset
:param previous_hash: hash of the state that originated this join of entities
:param origin_id_property: id property to identify the origin items. If not provided, it will be equal as
from_property later
:param is_test: tells if the request is marked as form a test
:return: the hash to the link with the generated state
"""
......@@ -52,8 +50,7 @@ def get_tiny_hash_to_related_items(destination_entity_browser_state_template,
es_query = json.loads(raw_es_query)
ids = ids_loader.load_ids_for_query(es_query, join_parameters['selection_description'],
join_parameters['origin_property'], join_parameters['index_name'],
origin_id_property)
join_parameters['origin_property'], join_parameters['index_name'])
query_parameters = parse_query_parameters(origin_destination['parsed_origin_entity'],
origin_destination['parsed_destination_entity'])
......
......@@ -6,38 +6,36 @@ from app.es_data import es_data
from utils import dict_property_access
def load_ids_for_query(es_query, selection_description, from_property, index_name, origin_id_property=None):
def load_ids_for_query(es_query, selection_description, from_property, index_name):
"""
:param es_query: query for the dataset
:param selection_description: dict describing the selection
:param from_property: property to get to do the join
:param index_name: name of the index to query
:param origin_id_property: id property to identify the origin items. If not provided, it will be equal as
from_property
:return: a list of ids for the query and the selection description indicated.
"""
if origin_id_property is None:
origin_id_property = from_property
ids = []
ids_query = get_ids_query(es_query, selection_description, from_property, origin_id_property)
ids = [] # use a list to keep the same order as the query returns
already_added_ids = set()
ids_query = get_ids_query(es_query, selection_description, from_property)
ids_scanner = es_data.get_es_scanner(index_name, ids_query)
for doc_i in ids_scanner:
doc_source = doc_i['_source']
from_property_value = dict_property_access.get_property_value(doc_source, from_property)
if from_property_value in already_added_ids:
continue
already_added_ids.add(from_property_value)
ids.append(from_property_value)
return ids
def get_ids_query(es_query, selection_description, from_property, id_property=None):
def get_ids_query(es_query, selection_description, from_property):
"""
:param es_query: query for the dataset
:param selection_description: dict describing the selection
:param from_property: property to get to do the join
:param id_property: id property to identify the items. If not provided, it will be equal as from_property
:return: the query to use to get the ids depending on the selection description
"""
if id_property is None:
id_property = from_property
selection_mode = selection_description['selectionMode']
parsed_selection_mode = standardisation.SelectionModes(selection_mode)
......@@ -46,10 +44,10 @@ def get_ids_query(es_query, selection_description, from_property, id_property=No
if parsed_selection_mode == standardisation.SelectionModes.ALL_ITEMS_EXCEPT:
if len(exceptions) == 0:
return get_ids_query_for_all_items(es_query, from_property)
return get_ids_query_for_all_items_except_some(es_query, from_property, exceptions, id_property)
return get_ids_query_for_all_items_except_some(es_query, from_property, exceptions)
# Selecting none except some
return get_ids_query_for_no_items_except_some(es_query, from_property, exceptions, id_property)
return get_ids_query_for_no_items_except_some(es_query, from_property, exceptions)
def get_ids_query_for_all_items(es_query, from_property):
......@@ -64,12 +62,11 @@ def get_ids_query_for_all_items(es_query, from_property):
}
def get_ids_query_for_all_items_except_some(es_query, from_property, exceptions, id_property):
def get_ids_query_for_all_items_except_some(es_query, from_property, exceptions):
"""
:param es_query: query for the dataset
:param from_property: property to get to to the join
:param exceptions: selection exceptions
:param id_property: id property to identify the items.
:return: the ids query for all items except some
"""
dataset_query = es_query.get('query')
......@@ -79,7 +76,7 @@ def get_ids_query_for_all_items_except_some(es_query, from_property, exceptions,
dataset_query['bool']['must_not'] = []
dataset_query['bool']['must_not'].append({
'terms': {
id_property: exceptions
'_id': exceptions
}
})
return {
......@@ -88,19 +85,18 @@ def get_ids_query_for_all_items_except_some(es_query, from_property, exceptions,
}
def get_ids_query_for_no_items_except_some(es_query, from_property, exceptions, id_property):
def get_ids_query_for_no_items_except_some(es_query, from_property, exceptions):
"""
:param es_query: query for the dataset
:param from_property: property to get to to the join
:param exceptions: selection exceptions
:param id_property: id property to identify the items.
:return: the ids query for all items except some
"""
dataset_query = es_query.get('query')
dataset_query['bool'] = {
'filter': [{
'terms': {
id_property: exceptions
'_id': exceptions
}
}]
}
......
......@@ -562,10 +562,6 @@ paths:
description: 'hash of the state that is making the call'
type: 'string'
default: 'sQseUMn43BEG1hQ-doPggw=='
- name: 'origin_id_property'
in: 'formData'
description: 'Property that identifies the origin ids, if different from the configured from property'
type: 'string'
responses:
'200':
description: "success"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment