Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
ChEMBL
C
ChEMBL
Main Web Interface
Elasticsearch Proxy API
Commits
0f026994
Commit
0f026994
authored
Jun 25, 2021
by
David Mendez
Browse files
Entities Join: star to implement join query generation
parent
a8b612ac
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
173 additions
and
17 deletions
+173
-17
app/entities_joiner/entities_joiner.py
app/entities_joiner/entities_joiner.py
+57
-12
app/entities_joiner/ids_loader.py
app/entities_joiner/ids_loader.py
+22
-1
app/entities_joiner/standardisation.py
app/entities_joiner/standardisation.py
+65
-2
app/es_data/es_data.py
app/es_data/es_data.py
+20
-0
functional_tests/specific_tests/fun_test_entities_join_0.py
functional_tests/specific_tests/fun_test_entities_join_0.py
+9
-2
No files found.
app/entities_joiner/entities_joiner.py
View file @
0f026994
...
...
@@ -4,6 +4,7 @@ Module with functions to make joins of entities
import
json
from
app.entities_joiner
import
standardisation
from
app.entities_joiner
import
ids_loader
class
EntitiesJoinerError
(
Exception
):
...
...
@@ -11,31 +12,31 @@ class EntitiesJoinerError(Exception):
def
get_tiny_hash_to_related_items
(
destination_entity_browser_state_template
,
entity_from
,
entity_to
,
es_query
,
selection_description
):
entity_from
,
entity_to
,
raw_
es_query
,
raw_
selection_description
):
"""
:param destination_entity_browser_state_template: template for building the resulting browser url
:param entity_from: source entity of the items
:param entity_to: destination entity of the join
:param es_query: query in elasticsearch for the dataset
:param selection_description: stringifyed javascript object describing de selection of items in the dataset
:param
raw_
es_query:
stringifyed
query in elasticsearch for the dataset
:param
raw_
selection_description: stringifyed javascript object describing de selection of items in the dataset
:return: the hash to the link with the generated state
"""
parsed_from
=
None
parsed_from
_entity
=
None
try
:
parsed_from
=
standardisation
.
PossibleFrom
s
(
entity_from
)
parsed_from
_entity
=
standardisation
.
Possible
Entities
From
(
entity_from
)
except
ValueError
as
error
:
raise
EntitiesJoinerError
(
f
'entity_from:
{
str
(
error
)
}
. Possible values are
{
[
item
.
value
for
item
in
standardisation
.
PossibleFrom
s
]
}
'
)
f
'entity_from:
{
str
(
error
)
}
. Possible values are
{
[
item
.
value
for
item
in
standardisation
.
Possible
Entities
From
]
}
'
)
parsed_to
=
None
parsed_to
_entity
=
None
try
:
parsed_to
=
standardisation
.
PossibleTo
s
(
entity_to
)
parsed_to
_entity
=
standardisation
.
Possible
Entities
To
(
entity_to
)
except
ValueError
as
error
:
raise
EntitiesJoinerError
(
f
'entity_to:
{
str
(
error
)
}
. Possible values are
{
[
item
.
value
for
item
in
standardisation
.
PossibleTo
s
]
}
'
)
f
'entity_to:
{
str
(
error
)
}
. Possible values are
{
[
item
.
value
for
item
in
standardisation
.
Possible
Entities
To
]
}
'
)
selection_description_dict
=
json
.
loads
(
selection_description
)
selection_description_dict
=
json
.
loads
(
raw_
selection_description
)
try
:
parsed_selection_mode
=
standardisation
.
SelectionModes
(
selection_description_dict
[
'selectionMode'
])
if
parsed_selection_mode
==
standardisation
.
SelectionModes
.
NO_ITEMS_EXCEPT
:
...
...
@@ -49,5 +50,49 @@ def get_tiny_hash_to_related_items(destination_entity_browser_state_template,
if
entity_to
==
entity_from
:
raise
EntitiesJoinerError
(
f
'entity_to (
{
entity_to
}
) and entity_from (
{
entity_from
}
) cannot be the same!'
)
print
(
'get_tiny_hash_to_related_items'
)
print
(
'load ids:'
)
index_name
=
standardisation
.
get_index_name_for_from_entity
(
parsed_from_entity
)
if
index_name
is
None
:
raise
EntitiesJoinerError
(
f
'There is no index name configured for queries from
{
parsed_from_entity
.
value
}
'
)
print
(
'index_name: '
,
index_name
)
from_property
=
standardisation
.
get_from_property
(
parsed_from_entity
,
parsed_to_entity
)
if
from_property
is
None
:
raise
EntitiesJoinerError
(
f
'There is no from property configured for queries from
{
parsed_from_entity
.
value
}
'
)
print
(
'from_property: '
,
from_property
)
selection_description
=
json
.
loads
(
raw_selection_description
)
es_query
=
json
.
loads
(
raw_es_query
)
ids
=
ids_loader
.
load_ids_for_query
(
es_query
,
selection_description
,
from_property
,
index_name
)
to_property
=
standardisation
.
get_to_property
(
parsed_from_entity
,
parsed_to_entity
)
join_query
=
get_join_query
(
ids
,
to_property
)
print
(
'join_query: '
)
print
(
join_query
)
return
'holaaaa'
def
get_join_query
(
ids
,
to_property
):
"""
:param ids: list of its for the join
:param to_property: entity to which to do the join
:return: the query to use for the join
"""
print
(
'get_join_query'
)
print
(
'ids: '
)
print
(
ids
)
print
(
'to_property: '
)
print
(
to_property
)
return
{
"query"
:
{
"terms"
:
{
"molecule_chembl_id"
:
[
"CHEMBL2107495"
,
"CHEMBL1204165"
]
}
}
}
app/entities_joiner/ids_loader.py
View file @
0f026994
...
...
@@ -2,13 +2,34 @@
Module with functions that help to load the ids for the entities joiner
"""
from
app.entities_joiner
import
standardisation
from
app.es_data
import
es_data
from
utils
import
dict_property_access
def
load_ids_for_query
(
es_query
,
selection_description
,
from_property
,
index_name
):
"""
:param es_query: query for the dataset
:param selection_description: dict describing the selection
:param from_property: property to get to do the join
:param index_name: name of the index to query
:return: a list of ids for the query and the selection description indicated.
"""
ids
=
[]
ids_query
=
get_ids_query
(
es_query
,
selection_description
,
from_property
)
ids_scanner
=
es_data
.
get_es_scanner
(
index_name
,
ids_query
)
for
doc_i
in
ids_scanner
:
doc_source
=
doc_i
[
'_source'
]
from_property_value
=
dict_property_access
.
get_property_value
(
doc_source
,
from_property
)
ids
.
append
(
from_property_value
)
return
ids
def
get_ids_query
(
es_query
,
selection_description
,
from_property
):
"""
:param es_query: query for the dataset
:param selection_description: dict describing the selection
:param from_property: property to get to
t
o the join
:param from_property: property to get to
d
o the join
:return: the query to use to get the ids depending on the selection description
"""
selection_mode
=
selection_description
[
'selectionMode'
]
...
...
app/entities_joiner/standardisation.py
View file @
0f026994
...
...
@@ -4,7 +4,7 @@ Module that helps with the standardisation of the package
from
enum
import
Enum
class
PossibleFrom
s
(
Enum
):
class
Possible
Entities
From
(
Enum
):
"""
Enumeration with the possible froms allowed
"""
...
...
@@ -18,7 +18,7 @@ class PossibleFroms(Enum):
CHEMBL_DRUG_WARNINGS
=
'CHEMBL_DRUG_WARNINGS'
class
PossibleTo
s
(
Enum
):
class
Possible
Entities
To
(
Enum
):
"""
Enumeration with the possible to allowed
"""
...
...
@@ -38,3 +38,66 @@ class SelectionModes(Enum):
"""
ALL_ITEMS_EXCEPT
=
'allItemsExcept'
NO_ITEMS_EXCEPT
=
'noItemsExcept'
INDEX_NAMES_FOR_FROM_ENTITIES
=
{
PossibleEntitiesFrom
.
CHEMBL_ACTIVITIES
:
'chembl_activity'
,
PossibleEntitiesFrom
.
CHEMBL_COMPOUNDS
:
'chembl_molecule'
,
PossibleEntitiesFrom
.
CHEMBL_TARGETS
:
'chembl_target'
,
PossibleEntitiesFrom
.
CHEMBL_ASSAYS
:
'chembl_assay'
,
PossibleEntitiesFrom
.
CHEMBL_DOCUMENTS
:
'chembl_document'
,
PossibleEntitiesFrom
.
CHEMBL_CELL_LINES
:
'chembl_cell_line'
,
PossibleEntitiesFrom
.
CHEMBL_TISSUES
:
'chembl_tissue'
,
PossibleEntitiesFrom
.
CHEMBL_DRUG_WARNINGS
:
'chembl_drug_warning_by_parent'
,
}
def
get_index_name_for_from_entity
(
parsed_from_entity
):
"""
:param parsed_from_entity: entity 'from' parsed by the PossibleEntitiesFrom enum
:return: the index name corresponding to the from entity
"""
index_name
=
INDEX_NAMES_FOR_FROM_ENTITIES
.
get
(
parsed_from_entity
)
return
index_name
JOIN_PROPERTIES
=
{
'from'
:
{
PossibleEntitiesFrom
.
CHEMBL_DRUG_WARNINGS
:
{
'to'
:
{
PossibleEntitiesTo
.
CHEMBL_ACTIVITIES
:
{
'from_property'
:
'drug_warning.molecule_chembl_id'
,
'to_property'
:
'molecule_chembl_id'
}
}
}
}
}
def
get_from_property
(
parsed_from_entity
,
parsed_to_entity
):
"""
:param parsed_from_entity: entity 'from' parsed by the PossibleEntitiesFrom enum
:param parsed_to_entity: entity 'to' parsed by the PossibleEntitiesTo enum
:return: the property used in the from entity of the join
"""
return
JOIN_PROPERTIES
.
\
get
(
'from'
,
{}).
\
get
(
parsed_from_entity
,
{}).
\
get
(
'to'
,
{}).
\
get
(
parsed_to_entity
,
{}).
get
(
'from_property'
)
def
get_to_property
(
parsed_from_entity
,
parsed_to_entity
):
"""
:param parsed_from_entity: entity 'from' parsed by the PossibleEntitiesFrom enum
:param parsed_to_entity: entity 'to' parsed by the PossibleEntitiesTo enum
:return: the property used in the from entity of the join
"""
return
JOIN_PROPERTIES
.
\
get
(
'from'
,
{}).
\
get
(
parsed_from_entity
,
{}).
\
get
(
'to'
,
{}).
\
get
(
parsed_to_entity
,
{}).
get
(
'to_property'
)
app/es_data/es_data.py
View file @
0f026994
...
...
@@ -7,6 +7,7 @@ import base64
import
time
import
elasticsearch
from
elasticsearch.helpers
import
scan
from
app.es_connection
import
ES
from
app
import
cache
...
...
@@ -229,3 +230,22 @@ def record_that_response_not_cached(index_name, es_query, time_taken):
es_request_digest
=
get_es_request_digest
(
es_query
)
is_cached
=
False
statistics_saver
.
save_index_usage_record
(
index_name
,
es_query
,
es_request_digest
,
is_cached
,
time_taken
)
def
get_es_scanner
(
index_name
,
ids_query
):
"""
:param index_name: name of the index to query
:param ids_query: query to get the ids
:return: a elasticsearch scanner for the query and the parameters given
"""
print
(
'get_es_scanner'
)
print
(
'index_name: '
,
index_name
)
return
scan
(
ES
,
index
=
index_name
,
scroll
=
u
'1m'
,
size
=
1000
,
request_timeout
=
60
,
query
=
ids_query
)
functional_tests/specific_tests/fun_test_entities_join_0.py
View file @
0f026994
...
...
@@ -3,9 +3,10 @@
Module that tests the endpoints to do joins among entities selecting all ids
"""
import
json
import
requests
from
specific_tests
import
utils
def
run_test
(
server_base_url
,
delayed_jobs_server_base_path
):
"""
...
...
@@ -78,6 +79,12 @@ def run_test(server_base_url, delayed_jobs_server_base_path):
}
url
=
f
'
{
server_base_url
}
/entities_join/get_link_to_related_items'
print
(
'doing post
'
)
print
(
'doing post
: '
,
url
)
request
=
requests
.
post
(
url
,
data
=
join_params
)
print
(
'post done!'
)
status_code
=
request
.
status_code
print
(
f
'status_code:
{
status_code
}
'
)
response_text
=
request
.
text
utils
.
print_es_response
(
response_text
)
assert
status_code
==
200
,
'The request failed!'
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment