Commit 16cb8e25 authored by David Mendez's avatar David Mendez
Browse files

Add functionality of getting results with context

parent 94844fc1
import traceback
import json
import hashlib
import base64
from django.http import JsonResponse, HttpResponse
from glados.usage_statistics import glados_server_statistics
from django.views.decorators.csrf import csrf_exempt
from django.views.decorators.http import require_POST
from django.core.cache import cache
from django.conf import settings
from glados.api.chembl.es_proxy.services import es_proxy_service
@csrf_exempt
@require_POST
def get_es_data(request):
index_name = request.POST.get('index_name', '')
raw_search_data = request.POST.get('search_data', '')
raw_context = request.POST.get('context_obj')
id_property = request.POST.get('id_property')
raw_contextual_sort_data = request.POST.get('contextual_sort_data')
cache_key = get_request_cache_key(index_name, raw_search_data, raw_context, id_property, raw_contextual_sort_data)
cache_response = cache.get(cache_key)
if cache_response is not None:
return JsonResponse(cache_response)
try:
if raw_context is None or raw_context == 'undefined' or raw_context == 'null':
response = glados_server_statistics.get_and_record_es_cached_response(index_name, raw_search_data)
else:
response = es_proxy_service.get_items_with_context(index_name, raw_search_data, raw_context, id_property,
raw_contextual_sort_data)
except Exception as e:
traceback.print_exc()
return HttpResponse('Internal Server Error', status=500)
if response is None:
return HttpResponse('ELASTIC SEARCH RESPONSE IS EMPTY!', status=500)
cache_time = settings.ES_PROXY_CACHE_SECONDS
cache.set(cache_key, response, cache_time)
return JsonResponse(response)
def get_request_cache_key(index_name, raw_search_data, raw_context, id_property, raw_contextual_sort_data):
"""
Returns a cache key from the request parameters
:param index_name: name of the index for which the request is made
:param raw_search_data: stringified dict with the query to send to ES
:param raw_context: stringified dict describing the context of the request
:param id_property: property used to identify the items
:param raw_contextual_sort_data: stringified dict descibing the sorting by the contextual properties
"""
stable_raw_search_data = json.dumps(json.loads(raw_search_data), sort_keys=True)
stable_raw_context = json.dumps(json.loads(raw_context), sort_keys=True)
stable_raw_contextual_sort_data = json.dumps(json.loads(raw_contextual_sort_data), sort_keys=True)
merged_params = '{index_name}-{raw_search_data}-{raw_context}-{id_property}-{raw_contextual_sort_data}'.format(
index_name=index_name,
raw_search_data=stable_raw_search_data,
raw_context=stable_raw_context,
id_property=id_property,
raw_contextual_sort_data=stable_raw_contextual_sort_data
)
merged_params_digest = hashlib.sha256(merged_params.encode('utf-8')).digest()
base64_search_data_hash = base64.b64encode(merged_params_digest).decode('utf-8')
return 'es_proxy-{}'.format(base64_search_data_hash)
......@@ -5,6 +5,11 @@ import json
from app import app_logging
from app.es_data import es_data
from app.blueprints.es_proxy.services.helpers import context_loader
from app.config import RUN_CONFIG
CONTEXT_PREFIX = '_context'
class ESProxyServiceError(Exception):
"""Base class for exceptions in this file."""
......@@ -31,7 +36,124 @@ def get_es_data(index_name, raw_es_query, raw_context, id_property, raw_contextu
else:
return {'msg': 'hola'}
app_logging.debug(f'Using context: {raw_context}')
es_response, metadata = get_items_with_context(index_name, raw_es_query, raw_context, id_property,
raw_contextual_sort_data)
response = {
'es_response': es_response,
'metadata': metadata
}
return response
def get_items_with_context(index_name, raw_es_query, raw_context, id_property, raw_contextual_sort_data='{}'):
"""
:param index_name: name of the index to query
:param raw_es_query: es_query stringifyied
:param raw_context: context dict stringifyied
:param id_property: property used to identify the items
:param raw_contextual_sort_data:
:return: the items in the es_query with the context given in the context description
"""
context_dict = json.loads(raw_context)
context, total_results = context_loader.get_context(context_dict)
# create a context index so access is faster
context_id = context_dict['context_id']
context_index = context_loader.load_context_index(context_id, id_property, context)
parsed_search_data = json.loads(raw_es_query)
if raw_contextual_sort_data is not None:
contextual_sort_data = json.loads(raw_contextual_sort_data)
else:
contextual_sort_data = {}
scores_query = get_scores_query(contextual_sort_data, id_property, total_results, context_index)
parsed_search_data['query']['bool']['must'].append(scores_query)
ids_list = list(context_index.keys())
ids_query = get_request_for_chembl_ids(id_property, ids_list)
parsed_search_data['query']['bool']['filter'].append(ids_query)
raw_search_data_with_injections = json.dumps(parsed_search_data)
es_response = es_data.get_es_response(index_name, json.loads(raw_search_data_with_injections))
hits = es_response['hits']['hits']
for hit in hits:
hit_id = hit['_id']
context_obj = context_index[hit_id]
hit['_source'][CONTEXT_PREFIX] = context_obj
metadata = {
'total_results': len(context_index),
'max_results_injected': RUN_CONFIG.get('filter_query_max_clauses')
}
return es_response, metadata
def get_scores_query(contextual_sort_data, id_property, total_results, context_index):
"""
Returns the query with the scores for the data to sort it with the contextual properties.
:param contextual_sort_data: dict describing the sorting by contextual properties
:param id_property: property used to identity each item
:param total_results: total number of results
:param context_index: index with the context
"""
contextual_sort_data_keys = contextual_sort_data.keys()
if len(contextual_sort_data_keys) == 0:
# if nothing is specified use the default scoring script, which is to score them according to their original
# position in the results
score_property = 'index'
score_script = "String id=doc['" + id_property + "'].value; " \
"return " + str(
total_results) + " - params.scores[id]['" + score_property + "'];"
else:
raw_score_property = list(contextual_sort_data_keys)[0]
score_property = raw_score_property.replace('{}.'.format(CONTEXT_PREFIX), '')
sort_order = contextual_sort_data[raw_score_property]
if sort_order == 'desc':
score_script = "String id=doc['" + id_property + "'].value; " \
"return params.scores[id]['" + score_property + "'];"
else:
score_script = "String id=doc['" + id_property + "'].value; " \
"return 1 / params.scores[id]['" + score_property + "'];"
scores_query = {
'function_score': {
'functions': [{
'script_score': {
'script': {
'lang': "painless",
'params': {
'scores': context_index,
},
'source': score_script
}
}
}]
}
}
return scores_query
def get_request_for_chembl_ids(id_property, ids_list):
"""
creates a terms query with the ids given as a parameter for the id_property given as parameter
:param id_property: property that identifies the items
:param ids_list: list of ids to query
:return: the terms query to use
"""
query = {
'terms': {
id_property: ids_list
}
}
return query
......@@ -3,7 +3,7 @@ Module to test the context loader
"""
import unittest
from app.blueprints.es_proxy.controllers.helpers import context_loader
from app.blueprints.es_proxy.services.helpers import context_loader
class TestContextLoader(unittest.TestCase):
......
......@@ -68,6 +68,11 @@ def run_test(server_base_url, delayed_jobs_server_base_path):
assert len(hits) > 0, 'I should have gotten hits!'
metadata = response_json['metadata']
print('metadata: ', metadata)
assert metadata['total_results'] > 0, 'There should be more than 0 results!'
def submit_similarity_search_job(delayed_jobs_server_base_path):
"""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment