Commit c5661bd5 authored by David Mendez's avatar David Mendez
Browse files

Implement endpoint for target classification tree in eubopen

parent 2cba5a05
"""
Services for the eubopen visualisations
"""
from app.visualisation_data.target_classification import eubopen_protein_class
class VisualisationServiceError(Exception):
......@@ -13,6 +14,4 @@ def get_protein_target_classification():
"""
:return: the json response with the protein target classification
"""
return {
'msg': 'hello'
}
return eubopen_protein_class.get_classification_tree()
{
"aggs": {
"children": {
"terms": {
"field": "l1",
"size": 1000,
"order": {
"_count": "desc"
}
},
"aggs": {
"children": {
"terms": {
"field": "l2",
"size": 1000,
"order": {
"_count": "desc"
}
},
"aggs": {
"children": {
"terms": {
"field": "l3",
"size": 1000,
"order": {
"_count": "desc"
}
},
"aggs": {
"children": {
"terms": {
"field": "l4",
"size": 1000,
"order": {
"_count": "desc"
}
},
"aggs": {
"children": {
"terms": {
"field": "l5",
"size": 1000,
"order": {
"_count": "desc"
}
},
"aggs": {
"children": {
"terms": {
"field": "l6",
"size": 1000,
"order": {
"_count": "desc"
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
\ No newline at end of file
"""
Module that generates the protein target classification for eubopen
"""
from app import cache
from app import app_logging
from app.visualisation_data.shared.tree_generator import TargetHierarchyTreeGenerator
from app.config import RUN_CONFIG
from utils import json_files_loading
def get_classification_tree():
"""
:return: the protein target classification tree
"""
cache_key = f'target_classifications_protein_class_eubopen-{RUN_CONFIG.get("cache_key_suffix")}'
app_logging.debug(f'cache_key: {cache_key}')
cache_response = cache.fail_proof_get(key=cache_key)
if cache_response is not None:
app_logging.debug('results are in cache')
return cache_response
index_name = 'chembl_protein_class'
query_file_path = 'app/visualisation_data/target_classification/data/protein_class_tree_query.json'
es_query = json_files_loading.load_json_from_path(query_file_path)
def generate_count_query(path_to_node):
queries = []
level = 1
for node in path_to_node:
queries.append('_metadata.protein_classification.l{level}:("{class_name}")'.format(level=level,
class_name=node))
level += 1
classes_queries = ' AND '.join(queries)
related_eubopen_compounds = [
'CHEMBL3906203',
'CHEMBL3785432',
'CHEMBL3770173',
'CHEMBL2179387',
'CHEMBL3356143',
'CHEMBL4570969',
'CHEMBL1957266',
'CHEMBL3133807',
'CHEMBL2132936',
'CHEMBL4065619',
'CHEMBL3752151',
'CHEMBL3622373',
'CHEMBL4650212',
'CHEMBL4296718',
'CHEMBL4650213',
'CHEMBL3769507'
]
eubopen_query = f'_metadata.related_compounds.all_chembl_ids:({" OR ".join(related_eubopen_compounds)})'
final_query = f'({classes_queries} AND {eubopen_query})'
return final_query
tree_generator = TargetHierarchyTreeGenerator(index_name=index_name, es_query=es_query,
query_generator=generate_count_query,
count_index='chembl_target')
final_tree = tree_generator.get_classification_tree()
cache_time = int(3.154e7)
cache.fail_proof_set(key=cache_key, value=final_tree, timeout=cache_time)
return final_tree
......@@ -5,6 +5,7 @@ from app import cache
from app import app_logging
from app.visualisation_data.shared.tree_generator import TargetHierarchyTreeGenerator
from app.config import RUN_CONFIG
from utils import json_files_loading
def get_classification_tree():
......@@ -21,74 +22,8 @@ def get_classification_tree():
return cache_response
index_name = 'chembl_protein_class'
es_query = {
"aggs": {
"children": {
"terms": {
"field": "l1",
"size": 1000,
"order": {
"_count": "desc"
}
},
"aggs": {
"children": {
"terms": {
"field": "l2",
"size": 1000,
"order": {
"_count": "desc"
}
},
"aggs": {
"children": {
"terms": {
"field": "l3",
"size": 1000,
"order": {
"_count": "desc"
}
},
"aggs": {
"children": {
"terms": {
"field": "l4",
"size": 1000,
"order": {
"_count": "desc"
}
},
"aggs": {
"children": {
"terms": {
"field": "l5",
"size": 1000,
"order": {
"_count": "desc"
}
},
"aggs": {
"children": {
"terms": {
"field": "l6",
"size": 1000,
"order": {
"_count": "desc"
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
query_file_path = 'app/visualisation_data/target_classification/data/protein_class_tree_query.json'
es_query = json_files_loading.load_json_from_path(query_file_path)
def generate_count_query(path_to_node):
......
"""
Module with utils for json files
"""
import json
def load_json_from_path(file_path):
"""
Loads the json contained in the path indicated as parameter
:param file_path: path to load
:return: dict with the loaded content
"""
with open(file_path) as json_file:
json_dict = json.load(json_file)
return json_dict
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment