Commit 65b65e8a authored by Lukas Pravda's avatar Lukas Pravda
Browse files

allow optional sanitization when reading files

parent 11bb5a5f
...@@ -59,13 +59,14 @@ class CCDReaderResult(NamedTuple): ...@@ -59,13 +59,14 @@ class CCDReaderResult(NamedTuple):
component: Component component: Component
def read_pdb_cif_file(path_to_cif: str) -> CCDReaderResult: def read_pdb_cif_file(path_to_cif: str, sanitize: bool = True) -> CCDReaderResult:
""" """
Read in single wwPDB CCD CIF component and create its internal Read in single wwPDB CCD CIF component and create its internal
representation. representation.
Args: Args:
path_to_cif (str): Path to the cif file path_to_cif (str): Path to the cif file
sanitize (bool): [Defaults: True]
Raises: Raises:
ValueError: if file does not exist ValueError: if file does not exist
...@@ -79,10 +80,10 @@ def read_pdb_cif_file(path_to_cif: str) -> CCDReaderResult: ...@@ -79,10 +80,10 @@ def read_pdb_cif_file(path_to_cif: str) -> CCDReaderResult:
cif_dict = list(MMCIF2Dict().parse(path_to_cif).values())[0] cif_dict = list(MMCIF2Dict().parse(path_to_cif).values())[0]
return _parse_pdb_mmcif(cif_dict) return _parse_pdb_mmcif(cif_dict, sanitize)
def read_pdb_components_file(path_to_cif: str) -> Dict[str, CCDReaderResult]: def read_pdb_components_file(path_to_cif: str, sanitize: bool = True) -> Dict[str, CCDReaderResult]:
""" """
Process multiple compounds stored in the wwPDB CCD Process multiple compounds stored in the wwPDB CCD
`components.cif` file. `components.cif` file.
...@@ -90,6 +91,8 @@ def read_pdb_components_file(path_to_cif: str) -> Dict[str, CCDReaderResult]: ...@@ -90,6 +91,8 @@ def read_pdb_components_file(path_to_cif: str) -> Dict[str, CCDReaderResult]:
Args: Args:
path_to_cif (str): Path to the `components.cif` file with path_to_cif (str): Path to the `components.cif` file with
multiple ligands in it. multiple ligands in it.
sanitize (bool): Whether or not the components should be sanitized
Defaults to True.
Raises: Raises:
ValueError: if the file does not exist. ValueError: if the file does not exist.
...@@ -113,12 +116,14 @@ def read_pdb_components_file(path_to_cif: str) -> Dict[str, CCDReaderResult]: ...@@ -113,12 +116,14 @@ def read_pdb_components_file(path_to_cif: str) -> Dict[str, CCDReaderResult]:
# region parse mmcif # region parse mmcif
def _parse_pdb_mmcif(cif_dict): def _parse_pdb_mmcif(cif_dict, sanitize=True):
""" """
Create internal representation of the molecule from mmcif format. Create internal representation of the molecule from mmcif format.
Args: Args:
cif_dict (dict): mmcif category cif_dict (dict): mmcif category
sanitize (bool): Whether or not the rdkit component should
be sanitized. Defaults to True.
Returns: Returns:
CCDReaderResult: internal representation with the results CCDReaderResult: internal representation with the results
...@@ -143,7 +148,7 @@ def _parse_pdb_mmcif(cif_dict): ...@@ -143,7 +148,7 @@ def _parse_pdb_mmcif(cif_dict):
descriptors += _parse_pdb_descriptors(identifiers_dict, 'identifier') descriptors += _parse_pdb_descriptors(identifiers_dict, 'identifier')
properties = _parse_pdb_properties(properties_dict) properties = _parse_pdb_properties(properties_dict)
comp = Component(mol.GetMol(), cif_dict, properties, descriptors) comp = Component(mol.GetMol(), cif_dict, properties, descriptors, sanitize=sanitize)
reader_result = CCDReaderResult(warnings=warnings, errors=errors, component=comp) reader_result = CCDReaderResult(warnings=warnings, errors=errors, component=comp)
return reader_result return reader_result
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment