Commit 40fe5f62 authored by David Mendez's avatar David Mendez

Implement functions to handle mappings of properties

parent 99a4ac1a
Pipeline #92145 passed with stages
in 2 minutes and 48 seconds
"""
Module that helps to handle the mappings for the subset indexes
"""
from app.es_subset_generator import utils
class MappingsHandlingError(Exception):
"""
Class to define an error when handling the mappings
"""
def get_mapping_for_subset_index(base_index, required_source):
"""
:param base_index: base index from which to get the mappings
:param required_source: list of properties for which to copy the mapping
:return: the mapping to apply for a subset index based on another index and a source
"""
print(base_index)
print(required_source)
def parse_mapping_from_es_response(index_mapping, required_source):
"""
Reads the response of the mapping from ES and produces the mapping to use for the subset index
:param index_mapping: mapping dict obtained from es
:return: the mapping ready to be applied to the subset index
"""
for source_property in required_source:
print(f'locating: {source_property}')
print(index_mapping)
def get_mapping_to_apply_in_subset_index(index_mapping, property_path):
"""
:param index_mapping:
:param property_path:
:return: the mapping to apply in the subset index from the property configuration
"""
source_property_config = locate_source_config(index_mapping, property_path)
is_nested = source_property_config.get('properties') is not None
if not is_nested:
mapping_to_apply = {
'type': source_property_config['type']
}
return mapping_to_apply
mapping_to_apply = {
'properties': {}
}
for property_key, property_config in source_property_config.get('properties').items():
mapping_to_parse = {
'properties': {
property_key: property_config
}
}
sub_property_mapping = get_mapping_to_apply_in_subset_index(mapping_to_parse, property_key)
mapping_to_apply['properties'][property_key] = sub_property_mapping
return mapping_to_apply
def locate_source_config(index_mapping, property_path):
"""
:param index_mapping: mapping of the original index
:param property_path: path of the property to get
:return: the configuration of the property according to the mapping got
"""
parsed_property_path = f'properties.{property_path.replace(".", ".properties.")}'
property_config = utils.get_dict_value(index_mapping, parsed_property_path)
if property_config is None:
raise MappingsHandlingError(f'The property {property_path} does not exist.')
return property_config
This diff is collapsed.
"""
Module to test the handling of mappings for the subset indexes
"""
# pylint: disable=no-self-use
import unittest
import json
from app.es_subset_generator import mappings
class TestMappingsHandling(unittest.TestCase):
"""
Class to test the handling of mappings
"""
def test_parses_mapping_from_es_for_a_single_property(self):
"""
Tests that given a mapping from ES it produces a mapping to put to the subset index
"""
sample_mapping_path = 'app/es_subset_generator/test/data/sample_mapping_.json'
with open(sample_mapping_path, 'rt') as sample_mapping_file:
mapping_from_base_index = json.load(sample_mapping_file)
source = ['pref_name']
mappings.parse_mapping_from_es_response(mapping_from_base_index, source)
# FINISH THIS!
def test_locates_source_config_of_property(self):
"""
Test that it locates the config of a property
"""
sample_mapping_path = 'app/es_subset_generator/test/data/sample_mapping_.json'
with open(sample_mapping_path, 'rt') as sample_mapping_file:
mapping_from_base_index = json.load(sample_mapping_file)
property_path = 'molecule_properties.full_molformula'
source_config_got = mappings.locate_source_config(mapping_from_base_index, property_path)
source_config_must_be = {
"type": "keyword",
"fields": {
"alphanumeric_lowercase_keyword": {
"type": "text",
"term_vector": "with_positions_offsets",
"analyzer": "alphanumeric_lowercase_keyword"
},
"keyword": {
"type": "keyword"
}
}
}
self.assertEqual(source_config_got, source_config_must_be,
msg=f'The source config was not obtained correctly!')
def test_raises_error_when_source_property_does_not_exist(self):
"""
Tests that when locating the config of a property that does not exist it raises an error
"""
sample_mapping_path = 'app/es_subset_generator/test/data/sample_mapping_.json'
with open(sample_mapping_path, 'rt') as sample_mapping_file:
with self.assertRaises(mappings.MappingsHandlingError,
msg=f'It should have raised an error for a non existing property'):
mapping_from_base_index = json.load(sample_mapping_file)
property_path = 'does_not_exist'
mappings.locate_source_config(mapping_from_base_index, property_path)
def test_gets_property_mapping_from_source_config(self):
"""
Tests that given a property configuration from a mapping, it builds a the mapping to apply for the property
in ES
"""
sample_mapping_path = 'app/es_subset_generator/test/data/sample_mapping_.json'
with open(sample_mapping_path, 'rt') as sample_mapping_file:
mapping_from_base_index = json.load(sample_mapping_file)
property_path = 'molecule_properties.full_molformula'
mapping_to_apply_got = mappings.get_mapping_to_apply_in_subset_index(mapping_from_base_index, property_path)
mapping_to_apply_must_be = {
"type": "keyword"
}
self.assertEqual(mapping_to_apply_got, mapping_to_apply_must_be,
msg=f'The mapping was not generated correctly!')
def test_gets_property_mapping_for_a_nested_property(self):
"""
Gets the mapping of a property when the property is nested
"""
sample_mapping_path = 'app/es_subset_generator/test/data/sample_mapping_.json'
with open(sample_mapping_path, 'rt') as sample_mapping_file:
mapping_from_base_index = json.load(sample_mapping_file)
property_path = 'molecule_synonyms'
mapping_to_apply_got = mappings.get_mapping_to_apply_in_subset_index(mapping_from_base_index, property_path)
mapping_to_apply_must_be = {
'properties': {
'molecule_synonym': {
'type': 'keyword'
},
'syn_type': {
'type': 'keyword'
},
'synonyms': {
'type': 'keyword'
}
}
}
self.assertEqual(mapping_to_apply_got, mapping_to_apply_must_be,
msg=f'The mapping was not generated correctly!')
......@@ -42,3 +42,27 @@ def prepare_list_chunks_generator(chunk_size, original_list):
start_index = end_index
end_index += chunk_size
yield original_list[start_index:end_index]
def get_dict_value(dictionary, str_property, default_null_value=None):
"""
:param dictionary: dictionary for which to get the value
:param str_property: string path of the property, e.g '_metadata.assay_data.assay_subcellular_fraction'
:param default_null_value: value to return when the value in the dict is None.
For example, it can return '' if indicated
:return: the value of a property (separated by dots) in a dict.
such as '_metadata.assay_data.assay_subcellular_fraction'
"""
prop_parts = str_property.split('.')
current_prop = prop_parts[0]
if len(prop_parts) > 1:
current_obj = dictionary.get(current_prop)
if current_obj is None:
return default_null_value
return get_dict_value(current_obj, '.'.join(prop_parts[1::]))
value = dictionary.get(current_prop)
value = default_null_value if value is None else value
return value
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment