Commit 76310699 authored by David Mendez's avatar David Mendez
Browse files

EUbOPEN search: add first version of free text search

parent 959e94cc
......@@ -6,7 +6,7 @@ from flask import Flask
from flask_cors import CORS
from app.config import RUN_CONFIG
from app.cache import CACHE
from app.cache.cache import CACHE
from app.blueprints.swagger_description.swagger_description_blueprint import SWAGGER_BLUEPRINT
from app.blueprints.es_proxy.controllers.es_proxy_controller import ES_PROXY_BLUEPRINT
from app.blueprints.properties_config.controllers.properties_config_controller import PROPERTIES_CONFIG_BLUEPRINT
......
......@@ -6,6 +6,7 @@ from flask import Blueprint, jsonify, request
from app.request_validation.decorators import validate_form_with
from app.blueprints.eubopen.search import marshmallow_schemas
from app.blueprints.eubopen.search import services
from app.http_cache import http_cache_utils
EUBOPEN_SEARCH_BLUEPRINT = Blueprint('eubopen_search', __name__)
......@@ -19,6 +20,17 @@ def get_autocomplete_results():
form_data = request.form
term = form_data.get('term')
json_response = services.get_autocomplete_results(term)
http_response = jsonify(json_response)
json_data = services.get_autocomplete_results(term)
http_response = http_cache_utils.get_json_response_with_http_cache_headers(json_data)
return http_response
@EUBOPEN_SEARCH_BLUEPRINT.route('/free_text', methods=['POST'])
@validate_form_with(marshmallow_schemas.EubopenSearch)
def get_free_text_search_queries():
"""
:return: the queries corresponding to the search term
"""
form_data = request.form
term = form_data.get('term')
return services.get_search_results(term)
......@@ -11,3 +11,10 @@ class EubopenAutocomplete(CommonSchema):
Class that the defines the schema for the autocomplete in eubopen
"""
term = fields.String(required=True)
class EubopenSearch(CommonSchema):
"""
Class that the defines the schema for the search in eubopen
"""
term = fields.String(required=True)
......@@ -3,6 +3,7 @@ Services for the eubopen search
"""
from app.eubopen.search.suggestions import suggestions
from app.eubopen.search.free_text import free_text
def get_autocomplete_results(term):
......@@ -12,3 +13,12 @@ def get_autocomplete_results(term):
"""
return suggestions.get_suggestions_for_term(term)
def get_search_results(term):
"""
:param term: term that is being searched
:return: the queries and details of the search results
"""
return free_text.get_search_results(term)
......@@ -5,8 +5,7 @@ from app.visualisation_data.assay_classification import in_vivo
from app.visualisation_data.target_classification import go_slim
from app.visualisation_data.target_classification import organism_taxonomy
from app.visualisation_data.target_classification import protein_class
from app.cache import CACHE
from app.cache import make_memoize_cache_key
from app.cache.cache import CACHE, make_memoize_cache_key
from app.config import RUN_CONFIG
from app.es_data import es_data
......
"""
Module that defines the decorators for the app cache
"""
from functools import wraps
from app.config import RUN_CONFIG
from app import app_logging
from app.cache import cache
def return_if_cached_results(config):
"""
Decorator to return results without executing the function if results are cached.
After executing the function saves the results in cache.
:param config: configuration of the cache
"""
def wrap(func):
@wraps(func)
def wrapped_func(*args, **kwargs):
cache_key_generator = config['cache_key_generator']
base_cache_key = cache_key_generator(*args, **kwargs)
cache_key = f'{base_cache_key}-{RUN_CONFIG.get("cache_key_suffix")}'
print('cache_key: ', cache_key)
app_logging.debug(f'cache_key: {cache_key}')
cache_response = cache.fail_proof_get(key=cache_key)
if cache_response is not None:
app_logging.debug(f'{cache_key} was cached')
return cache_response
function_results = func(*args, **kwargs)
timeout = config['timeout']
cache.fail_proof_set(cache_key, function_results, timeout)
app_logging.debug(f'Saved {cache_key} in cache')
return function_results
return wrapped_func
return wrap
\ No newline at end of file
......@@ -6,7 +6,7 @@ import re
import requests
from app.config import RUN_CONFIG
from app import cache
from app.cache import cache
from app import app_logging
from utils import id_properties
......
......@@ -13,7 +13,7 @@ from app.url_shortening import url_shortener
from app.config import RUN_CONFIG
from app.usage_statistics import statistics_saver
from app import app_logging
from app import cache
from app.cache import cache
class EntitiesJoinerError(Exception):
......
......@@ -10,7 +10,7 @@ import elasticsearch
from elasticsearch.helpers import scan
from app.es_connection import ES
from app import cache
from app.cache import cache
from app.config import RUN_CONFIG
from app import app_logging
from app.usage_statistics import statistics_saver
......
......@@ -5,8 +5,7 @@ from app.es_connection import ES
from app import app_logging
from app.es_data import utils
from app.config import RUN_CONFIG
from app.cache import CACHE
from app.cache import make_memoize_cache_key
from app.cache.cache import CACHE, make_memoize_cache_key
from utils import dict_property_access
SIMPLE_MAPPINGS = {
......
......@@ -7,7 +7,7 @@ import hashlib
import base64
from app.es_data import es_data
from app.cache import CACHE
from app.cache.cache import CACHE
from app import create_app
from app.config import RUN_CONFIG
......
"""
Module that handles the free text search in EUbOPEN
"""
from copy import deepcopy
from app.config import RUN_CONFIG
from app.es_data import es_data
from app.cache.decorators import return_if_cached_results
@return_if_cached_results({
'cache_key_generator': lambda *args, **kwargs: f'EUbOPEN-search-text-{args[0]}',
'timeout': RUN_CONFIG.get('es_proxy_cache_seconds')
})
def get_search_results(term):
"""
:param term: search term
:return: the description of the search results, including the Es queries for each entity
"""
search_config = RUN_CONFIG.get('eubopen', {}).get('search', {}).get('free_text', {})
entities_config = search_config.get('entities', {})
results = {
"entities": {}
}
absolute_max_score = -1
highest_scoring_entity = None
for entity_key, entity_config in entities_config.items():
index_name = entity_config['index_name']
search_properties = entity_config['search_properties']
highlight_properties = entity_config['highlight_properties']
search_query = get_basic_search_query(term, search_properties)
max_score = get_search_query_max_score(index_name, search_query)
add_highlight_to_search_query(search_query, highlight_properties)
comp_score = -1 if max_score is None else max_score
if comp_score > absolute_max_score:
highest_scoring_entity = entity_key
absolute_max_score = comp_score
results["entities"][entity_key] = {
"es_query": search_query,
"max_score": max_score
}
results["highest_scoring_entity"] = highest_scoring_entity
return results
def get_basic_search_query(term, search_properties):
"""
:param term: search term
:param search_properties: properties to be included in the search
:return: the resulting query to apply for the search
"""
return {
"query": {
"bool": {
"should": [
{
"ids": {
"values": [
term
]
}
},
{
"query_string": {
"fields": search_properties,
"query": f'*{term}*'
}
},
{
"multi_match": {
"query": term,
"fields": search_properties,
"fuzziness": "AUTO"
}
}
]
}
},
}
def get_search_query_max_score(index_name, search_query):
"""
:param index_name: name of the index to search on
:param search_query: search query obtained for the entity
:return: the maximum score of the search, with can be used to sort the entities in the results.
"""
score_query = deepcopy(search_query)
score_query["_source"] = ""
search_response = es_data.get_es_response(index_name, score_query)
max_score = search_response.get('hits', {}).get('max_score', 0)
return max_score
def add_highlight_to_search_query(search_query, highlight_properties):
"""
Adds the highlight parameters to the query
:param search_query: search query for the current entity
:param highlight_properties: properties to highlight
"""
search_query["highlight"] = {
"fields": {}
}
for prop in highlight_properties:
search_query["highlight"]["fields"][prop] = {}
......@@ -4,6 +4,7 @@ Module with the functions to generate suggestions for eubopen
from app.es_data import es_data
from app.config import RUN_CONFIG
from app.properties_configuration import properties_configuration_manager
from utils import dict_property_access
......
......@@ -8,7 +8,7 @@ import arpeggio
from app.free_text_parsing.parser import PARSER
from app.free_text_parsing.query_builder.query_builder import QueryBuilder
from app.free_text_parsing.terms_visitor import TERMS_VISITOR
from app import cache
from app.cache import cache
from app import app_logging
from app.config import RUN_CONFIG
......
......@@ -17,6 +17,7 @@ def add_cache_headers_to_response(response, hours=24):
response.headers.add('Cache-Control', 'public,max-age=%d' % int(3600 * hours))
response.add_etag()
def get_json_response_with_http_cache_headers(json_data):
"""
:param json_data: data to include in the response
......
......@@ -6,7 +6,7 @@ import os
import yaml
from app.properties_configuration import properties_configuration_manager
from app import cache
from app.cache import cache
from app import app_logging
from app.config import RUN_CONFIG
......
......@@ -6,7 +6,7 @@ import os
import yaml
from app import app_logging
from app import cache
from app.cache import cache
from app.config import RUN_CONFIG
from app.properties_configuration import properties_configuration_manager
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment