Commit 4693bc5e authored by David Mendez's avatar David Mendez
Browse files

Merge branch 'staging' into 'master'

EUbOPEN: Implement first version of autocomplete and search

See merge request !85
parents a8883d70 9d06bb87
......@@ -6,7 +6,7 @@ from flask import Flask
from flask_cors import CORS
from app.config import RUN_CONFIG
from app.cache import CACHE
from app.cache.cache import CACHE
from app.blueprints.swagger_description.swagger_description_blueprint import SWAGGER_BLUEPRINT
from app.blueprints.es_proxy.controllers.es_proxy_controller import ES_PROXY_BLUEPRINT
from app.blueprints.properties_config.controllers.properties_config_controller import PROPERTIES_CONFIG_BLUEPRINT
......
"""
Controller for handling the requests for the eubopen search
"""
from flask import Blueprint, jsonify
from flask import Blueprint, request
from app.request_validation.decorators import validate_url_params_with
from app.request_validation.decorators import validate_form_with
from app.blueprints.eubopen.search import marshmallow_schemas
from app.blueprints.eubopen.search import services
from app.http_cache import http_cache_utils
EUBOPEN_SEARCH_BLUEPRINT = Blueprint('eubopen_search', __name__)
@EUBOPEN_SEARCH_BLUEPRINT.route('/autocomplete/<term>', methods=['GET'])
@validate_url_params_with(marshmallow_schemas.EubopenAutocomplete)
def get_autocomplete_results(term):
@EUBOPEN_SEARCH_BLUEPRINT.route('/autocomplete', methods=['POST'])
@validate_form_with(marshmallow_schemas.EubopenAutocomplete)
def get_autocomplete_results():
"""
:param term: term for which to do the autocomplete
:return: the results for the autocomplete query
"""
json_response = services.get_autocomplete_results(term)
http_response = jsonify(json_response)
form_data = request.form
raw_term = form_data.get('term', '*')
term = '*' if raw_term == '' else raw_term
json_data = services.get_autocomplete_results(term)
http_response = http_cache_utils.get_json_response_with_http_cache_headers(json_data)
return http_response
@EUBOPEN_SEARCH_BLUEPRINT.route('/free_text', methods=['POST'])
@validate_form_with(marshmallow_schemas.EubopenSearch)
def get_free_text_search_queries():
"""
:return: the queries corresponding to the search term
"""
form_data = request.form
raw_term = form_data.get('term', '*')
term = '*' if raw_term == '' else raw_term
return services.get_search_results(term)
......@@ -11,3 +11,10 @@ class EubopenAutocomplete(CommonSchema):
Class that the defines the schema for the autocomplete in eubopen
"""
term = fields.String(required=True)
class EubopenSearch(CommonSchema):
"""
Class that the defines the schema for the search in eubopen
"""
term = fields.String(required=True)
"""
Services for the eubopen search
"""
import random
from app.eubopen.search.suggestions import suggestions
from app.eubopen.search.free_text import free_text
def get_autocomplete_results(term):
......@@ -9,42 +11,14 @@ def get_autocomplete_results(term):
:param term: term for which to do the autocomplete
:return: the results for the autocomplete query
"""
print('term: ', term)
all_suggestions = [
{
'key': random.randint(0, 1000),
'type': 'Compound',
'title': 'MRIA9: A Chemical Probe for SIK',
'description': 'Salt-inducible kinases (SIK1-3) are members of the AMP activated protein kinase '
'(AMPK) sub-family of the calcium/calmodulin-dependent kinase (CaMK) group'
},
{
'key': random.randint(0, 1000),
'type': 'Compound',
'title': 'BI-1942: A Chemical Probe for CMA1',
'description': 'CMA1 (chymase) is a chymotrypsin-like serine protease stored in a latent '
'form in the secretory granules of mast cells '
},
return suggestions.get_suggestions_for_term(term)
{
'key': random.randint(0, 1000),
'type': 'Target',
'title': 'CHEMBL4068: Chymase',
'description': 'SINGLE PROTEIN: Homo sapiens'
},
{
'key': random.randint(0, 1000),
'type': 'Target',
'title': 'CHEMBL1293289: Bromodomain-containing protein 2',
'description': 'SINGLE PROTEIN: Homo sapiens'
}
]
def get_search_results(term):
"""
:param term: term that is being searched
:return: the queries and details of the search results
"""
indexes = list(range(0, len(all_suggestions)))
random.shuffle(indexes)
picked_indexes = indexes[:random.randint(0, len(all_suggestions))]
return {
'suggestions': [all_suggestions[picked_idx] for picked_idx in picked_indexes]
}
return free_text.get_search_results(term)
......@@ -5,8 +5,7 @@ from app.visualisation_data.assay_classification import in_vivo
from app.visualisation_data.target_classification import go_slim
from app.visualisation_data.target_classification import organism_taxonomy
from app.visualisation_data.target_classification import protein_class
from app.cache import CACHE
from app.cache import make_memoize_cache_key
from app.cache.cache import CACHE, make_memoize_cache_key
from app.config import RUN_CONFIG
from app.es_data import es_data
......
"""
Module that defines the decorators for the app cache
"""
from functools import wraps
from app.config import RUN_CONFIG
from app import app_logging
from app.cache import cache
def return_if_cached_results(config):
"""
Decorator to return results without executing the function if results are cached.
After executing the function saves the results in cache.
:param config: configuration of the cache
"""
def wrap(func):
@wraps(func)
def wrapped_func(*args, **kwargs):
cache_key_generator = config['cache_key_generator']
base_cache_key = cache_key_generator(*args, **kwargs)
cache_key = f'{base_cache_key}-{RUN_CONFIG.get("cache_key_suffix")}'
print('cache_key: ', cache_key)
app_logging.debug(f'cache_key: {cache_key}')
cache_response = cache.fail_proof_get(key=cache_key)
if cache_response is not None:
app_logging.debug(f'{cache_key} was cached')
return cache_response
function_results = func(*args, **kwargs)
timeout = config['timeout']
cache.fail_proof_set(cache_key, function_results, timeout)
app_logging.debug(f'Saved {cache_key} in cache')
return function_results
return wrapped_func
return wrap
......@@ -6,7 +6,7 @@ import re
import requests
from app.config import RUN_CONFIG
from app import cache
from app.cache import cache
from app import app_logging
from utils import id_properties
......
......@@ -13,7 +13,7 @@ from app.url_shortening import url_shortener
from app.config import RUN_CONFIG
from app.usage_statistics import statistics_saver
from app import app_logging
from app import cache
from app.cache import cache
class EntitiesJoinerError(Exception):
......
......@@ -10,7 +10,7 @@ import elasticsearch
from elasticsearch.helpers import scan
from app.es_connection import ES
from app import cache
from app.cache import cache
from app.config import RUN_CONFIG
from app import app_logging
from app.usage_statistics import statistics_saver
......
......@@ -5,8 +5,7 @@ from app.es_connection import ES
from app import app_logging
from app.es_data import utils
from app.config import RUN_CONFIG
from app.cache import CACHE
from app.cache import make_memoize_cache_key
from app.cache.cache import CACHE, make_memoize_cache_key
from utils import dict_property_access
SIMPLE_MAPPINGS = {
......
......@@ -7,7 +7,7 @@ import hashlib
import base64
from app.es_data import es_data
from app.cache import CACHE
from app.cache.cache import CACHE
from app import create_app
from app.config import RUN_CONFIG
......
"""
Module that handles the free text search in EUbOPEN
"""
from copy import deepcopy
from app.config import RUN_CONFIG
from app.es_data import es_data
from app.cache.decorators import return_if_cached_results
@return_if_cached_results({
'cache_key_generator': lambda *args, **kwargs: f'EUbOPEN-search-text-{args[0]}',
'timeout': RUN_CONFIG.get('es_proxy_cache_seconds')
})
def get_search_results(term):
"""
:param term: search term
:return: the description of the search results, including the Es queries for each entity
"""
search_config = RUN_CONFIG.get('eubopen', {}).get('search', {}).get('free_text', {})
entities_config = search_config.get('entities', {})
results = {
"entities": {}
}
absolute_max_score = -1
highest_scoring_entity = None
for entity_key, entity_config in entities_config.items():
index_name = entity_config['index_name']
search_properties = entity_config['search_properties']
highlight_properties = entity_config['highlight_properties']
search_query = get_basic_search_query(term, search_properties)
max_score = get_search_query_max_score(index_name, search_query)
add_highlight_to_search_query(search_query, highlight_properties)
comp_score = -1 if max_score is None else max_score
if comp_score > absolute_max_score:
highest_scoring_entity = entity_key
absolute_max_score = comp_score
results["entities"][entity_key] = {
"es_query": search_query,
"max_score": max_score
}
results["highest_scoring_entity"] = highest_scoring_entity
return results
def get_basic_search_query(term, search_properties):
"""
:param term: search term
:param search_properties: properties to be included in the search
:return: the resulting query to apply for the search
"""
return {
"query": {
"bool": {
"should": [
{
"ids": {
"values": [
term
]
}
},
{
"query_string": {
"fields": search_properties,
"query": f'*{term}*'
}
},
{
"multi_match": {
"query": term,
"fields": search_properties,
"fuzziness": "AUTO"
}
}
]
}
},
}
def get_search_query_max_score(index_name, search_query):
"""
:param index_name: name of the index to search on
:param search_query: search query obtained for the entity
:return: the maximum score of the search, with can be used to sort the entities in the results.
"""
score_query = deepcopy(search_query)
score_query["_source"] = ""
search_response = es_data.get_es_response(index_name, score_query)
max_score = search_response.get('hits', {}).get('max_score', 0)
return max_score
def add_highlight_to_search_query(search_query, highlight_properties):
"""
Adds the highlight parameters to the query
:param search_query: search query for the current entity
:param highlight_properties: properties to highlight
"""
search_query["highlight"] = {
"fields": {}
}
for prop in highlight_properties:
search_query["highlight"]["fields"][prop] = {}
"""
Module with the functions to generate suggestions for eubopen
"""
from app.es_data import es_data
from app.config import RUN_CONFIG
from app.properties_configuration import properties_configuration_manager
from app.cache.decorators import return_if_cached_results
from utils import dict_property_access
@return_if_cached_results({
'cache_key_generator': lambda *args, **kwargs: f'EUbOPEN-search-suggestion-{args[0]}',
'timeout': RUN_CONFIG.get('es_proxy_cache_seconds')
})
def get_suggestions_for_term(term):
"""
:param term: term for which to do the suggestion
:return: the results for the suggestion query
"""
autocomplete_config = RUN_CONFIG.get('eubopen', {}).get('search', {}).get('suggestions', {}).get('entities', {})
entity_suggestions = {}
for entity_key, entity_config in autocomplete_config.items():
suggestions = get_suggestions(term, entity_config)
entity_suggestions[entity_key] = suggestions
return {
'entity_suggestions': entity_suggestions,
}
def get_suggestions(term, entity_config):
"""
:param term: term for which to get suggestions
:param entity_config: config of the entity for the suggestions
:return: the suggestions found
"""
index_name = entity_config['index_name']
size = entity_config['size']
suggestion_query = {
'size': 0,
'suggest': {
'autocomplete': {
'prefix': term,
'completion': {
'field': '_metadata.es_completion',
'size': size
}
}
}
}
raw_text_suggestions = es_data.get_es_response(index_name, suggestion_query)
raw_options = raw_text_suggestions["suggest"]["autocomplete"][0]["options"]
options = [parse_option(option, entity_config) for option in raw_options]
return options
def parse_option(raw_option, entity_config):
"""
:param raw_option: raw option to parse
:param entity_config: configuration of the entity autocomplete
:return: a simplified version of the suggestion.
"""
label_property = entity_config['label_property']
highlight_properties = entity_config['highlight_properties']
text = raw_option['text']
highlighted_property_label = None
highlighted_property_path = get_highlighted_property(raw_option['_source'], highlight_properties, text)
if highlighted_property_path is not None:
index_name = entity_config['index_name']
props_config_instance = properties_configuration_manager.get_property_configuration_instance()
highlighted_property_config = props_config_instance.get_config_for_prop(index_name, highlighted_property_path)
highlighted_property_label = highlighted_property_config['label']
return {
'_id': raw_option['_id'],
'item_label': dict_property_access.get_property_value(raw_option, f'_source.{label_property}'),
'text': text,
'highlighted_property': highlighted_property_label
}
def get_highlighted_property(doc_source, highlight_properties, text):
"""
:param doc_source: source for the document to check
:param highlight_properties: list of the properties that are candidates for highlighting
:param text: text matched
"""
for property_path in highlight_properties:
value = str(dict_property_access.get_property_value(doc_source, property_path))
if text in value:
return property_path
return None
......@@ -8,7 +8,7 @@ import arpeggio
from app.free_text_parsing.parser import PARSER
from app.free_text_parsing.query_builder.query_builder import QueryBuilder
from app.free_text_parsing.terms_visitor import TERMS_VISITOR
from app import cache
from app.cache import cache
from app import app_logging
from app.config import RUN_CONFIG
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment