Commit a0861f64 authored by David Mendez's avatar David Mendez
Browse files

Entities Join: add parameter to include a specific id property for the origin items

parent 323f60c1
......@@ -27,6 +27,7 @@ def get_link_to_related_items():
es_query = request_parameters.sanitise_parameter(form_data.get('es_query'))
selection_description = request_parameters.sanitise_parameter(form_data.get('selection_description'))
previous_hash = request_parameters.sanitise_parameter(form_data.get('previous_hash'))
origin_id_property = request_parameters.sanitise_parameter(form_data.get('origin_id_property'))
app_logging.debug(f'destination_entity_browser_state_template: {destination_entity_browser_state_template}')
app_logging.debug(f'entity_from: {entity_from}')
......@@ -37,7 +38,8 @@ def get_link_to_related_items():
try:
json_response = entities_join_service.get_tiny_hash_to_related_items(destination_entity_browser_state_template,
entity_from, entity_to, es_query,
selection_description, previous_hash)
selection_description, previous_hash,
origin_id_property)
return jsonify(json_response)
except entities_join_service.EntitiesJoinServiceError as error:
app_logging.debug(str(error))
......
......@@ -14,3 +14,4 @@ class EntitiesJoinQuery(Schema):
es_query = fields.String(required=True)
selection_description = fields.String(required=True)
previous_hash = fields.String()
origin_id_property = fields.String()
......@@ -10,7 +10,7 @@ class EntitiesJoinServiceError(Exception):
def get_tiny_hash_to_related_items(destination_entity_browser_state_template,
entity_from, entity_to, es_query,
selection_description, previous_hash):
selection_description, previous_hash, origin_id_property=None):
"""
:param destination_entity_browser_state_template: template for building the resulting browser url
:param entity_from: source entity of the items
......@@ -18,13 +18,16 @@ def get_tiny_hash_to_related_items(destination_entity_browser_state_template,
:param es_query: query in elasticsearch for the dataset
:param selection_description: stringifyed javascript object describing de selection of items in the dataset
:param previous_hash: hash of the state that originated this join of entities
:param origin_id_property: id property to identify the origin items. If not provided, it will be equal as
from_property later
:return: a dict with the tiny url to the link with the generated state
"""
try:
tiny_hash = entities_joiner.get_tiny_hash_to_related_items(destination_entity_browser_state_template,
entity_from, entity_to, es_query,
selection_description, previous_hash)
selection_description, previous_hash,
origin_id_property)
return {
'tiny_hash': tiny_hash
......
......@@ -21,7 +21,7 @@ class EntitiesJoinerError(Exception):
def get_tiny_hash_to_related_items(destination_entity_browser_state_template,
raw_origin_entity, raw_destination_entity, raw_es_query,
raw_selection_description, previous_hash):
raw_selection_description, previous_hash, origin_id_property=None):
"""
:param destination_entity_browser_state_template: template for building the resulting browser url
:param raw_origin_entity: text with the origin entity of the items
......@@ -29,6 +29,8 @@ def get_tiny_hash_to_related_items(destination_entity_browser_state_template,
:param raw_es_query: stringifyed query in elasticsearch for the dataset
:param raw_selection_description: stringifyed javascript object describing de selection of items in the dataset
:param previous_hash: hash of the state that originated this join of entities
:param origin_id_property: id property to identify the origin items. If not provided, it will be equal as
from_property later
:return: the hash to the link with the generated state
"""
cache_key = get_cache_key(destination_entity_browser_state_template, raw_origin_entity, raw_destination_entity,
......@@ -48,7 +50,8 @@ def get_tiny_hash_to_related_items(destination_entity_browser_state_template,
es_query = json.loads(raw_es_query)
ids = ids_loader.load_ids_for_query(es_query, join_parameters['selection_description'],
join_parameters['origin_property'], join_parameters['index_name'])
join_parameters['origin_property'], join_parameters['index_name'],
origin_id_property)
query_parameters = parse_query_parameters(origin_destination['parsed_origin_entity'],
origin_destination['parsed_destination_entity'])
......
......@@ -6,16 +6,20 @@ from app.es_data import es_data
from utils import dict_property_access
def load_ids_for_query(es_query, selection_description, from_property, index_name):
def load_ids_for_query(es_query, selection_description, from_property, index_name, origin_id_property=None):
"""
:param es_query: query for the dataset
:param selection_description: dict describing the selection
:param from_property: property to get to do the join
:param index_name: name of the index to query
:param origin_id_property: id property to identify the origin items. If not provided, it will be equal as
from_property
:return: a list of ids for the query and the selection description indicated.
"""
if origin_id_property is None:
origin_id_property = from_property
ids = []
ids_query = get_ids_query(es_query, selection_description, from_property)
ids_query = get_ids_query(es_query, selection_description, from_property, origin_id_property)
ids_scanner = es_data.get_es_scanner(index_name, ids_query)
for doc_i in ids_scanner:
doc_source = doc_i['_source']
......@@ -24,13 +28,17 @@ def load_ids_for_query(es_query, selection_description, from_property, index_nam
return ids
def get_ids_query(es_query, selection_description, from_property):
def get_ids_query(es_query, selection_description, from_property, id_property=None):
"""
:param es_query: query for the dataset
:param selection_description: dict describing the selection
:param from_property: property to get to do the join
:param id_property: id property to identify the items. If not provided, it will be equal as from_property
:return: the query to use to get the ids depending on the selection description
"""
if id_property is None:
id_property = from_property
selection_mode = selection_description['selectionMode']
parsed_selection_mode = standardisation.SelectionModes(selection_mode)
exceptions = selection_description.get('exceptions', [])
......@@ -38,10 +46,10 @@ def get_ids_query(es_query, selection_description, from_property):
if parsed_selection_mode == standardisation.SelectionModes.ALL_ITEMS_EXCEPT:
if len(exceptions) == 0:
return get_ids_query_for_all_items(es_query, from_property)
return get_ids_query_for_all_items_except_some(es_query, from_property, exceptions)
return get_ids_query_for_all_items_except_some(es_query, from_property, exceptions, id_property)
# Selecting none except some
return get_ids_query_for_no_items_except_some(es_query, from_property, exceptions)
return get_ids_query_for_no_items_except_some(es_query, from_property, exceptions, id_property)
def get_ids_query_for_all_items(es_query, from_property):
......@@ -56,11 +64,12 @@ def get_ids_query_for_all_items(es_query, from_property):
}
def get_ids_query_for_all_items_except_some(es_query, from_property, exceptions):
def get_ids_query_for_all_items_except_some(es_query, from_property, exceptions, id_property):
"""
:param es_query: query for the dataset
:param from_property: property to get to to the join
:param exceptions: selection exceptions
:param id_property: id property to identify the items.
:return: the ids query for all items except some
"""
dataset_query = es_query.get('query')
......@@ -70,7 +79,7 @@ def get_ids_query_for_all_items_except_some(es_query, from_property, exceptions)
dataset_query['bool']['must_not'] = []
dataset_query['bool']['must_not'].append({
'terms': {
from_property: exceptions
id_property: exceptions
}
})
return {
......@@ -79,18 +88,19 @@ def get_ids_query_for_all_items_except_some(es_query, from_property, exceptions)
}
def get_ids_query_for_no_items_except_some(es_query, from_property, exceptions):
def get_ids_query_for_no_items_except_some(es_query, from_property, exceptions, id_property):
"""
:param es_query: query for the dataset
:param from_property: property to get to to the join
:param exceptions: selection exceptions
:param id_property: id property to identify the items.
:return: the ids query for all items except some
"""
dataset_query = es_query.get('query')
dataset_query['bool'] = {
'filter': [{
'terms': {
from_property: exceptions
id_property: exceptions
}
}]
}
......
......@@ -115,3 +115,71 @@ class TestIDsLoader(unittest.TestCase):
ids_query_got = ids_loader.get_ids_query(es_query, selection_description, from_property)
self.assertDictEqual(ids_query_must_be, ids_query_got,
msg='The query was not generated correctly when selecting no items except some!')
def test_generates_query_to_get_ids_when_selecting_all_except_some_selecting_by_id(self):
"""
test that it generates the required query when selected all items in dataset except some and the items
are identified by _id
"""
es_query = load_sample_query('sample_query_0.json')
selection_description = {
"selectionMode": "allItemsExcept",
"exceptions": ['CHEMBL64___999', 'CHEMBL1366___152']
}
from_property = 'drug_warning.molecule_chembl_id'
id_property = '_id'
ids_query_must_be = {
"query": {
"bool": {
"filter": [],
"should": [],
"must_not": [
{
"terms": {
id_property: selection_description['exceptions']
}
}
]
}
},
"_source": [from_property],
}
ids_query_got = ids_loader.get_ids_query(es_query, selection_description, from_property, id_property)
self.assertDictEqual(ids_query_must_be, ids_query_got,
msg='The query was not generated correctly when selecting all items except some using'
' a different id property!')
def test_generates_query_to_get_ids_when_selecting_none_except_some_selecting_by_id(self):
"""
test that it generates the required query when selecting none except zone selecting by _id
"""
es_query = load_sample_query('sample_query_0.json')
selection_description = {
"selectionMode": "noItemsExcept",
"exceptions": ['CHEMBL64___999', 'CHEMBL1366___152']
}
from_property = 'drug_warning.molecule_chembl_id'
id_property = '_id'
ids_query_must_be = {
"query": {
"bool": {
"filter": [{
"terms": {
id_property: selection_description['exceptions']
}
}],
}
},
"_source": [from_property],
}
ids_query_got = ids_loader.get_ids_query(es_query, selection_description, from_property, id_property)
self.assertDictEqual(ids_query_must_be, ids_query_got,
msg='The query was not generated correctly when selecting no items except some!')
......@@ -540,6 +540,10 @@ paths:
description: 'hash of the state that is making the call'
type: 'string'
default: 'sQseUMn43BEG1hQ-doPggw=='
- name: 'origin_id_property'
in: 'formData'
description: 'Property that identifies the origin ids, if different from the configured from property'
type: 'string'
responses:
'200':
description: "success"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment