Commit 90903624 authored by Lukas Pravda's avatar Lukas Pravda
Browse files

enforce black formatting

parent 6f0031ef
__version__ = '0.5.7'
__version__ = "0.5.7"
......@@ -92,24 +92,32 @@ def compare_molecules(template, query, thresh=0.01, exact_match=False):
query_atoms = query.GetNumAtoms()
min_num_atoms = min(template_atoms, query_atoms)
max_sim_score = float(min_num_atoms) / float(template_atoms + query_atoms - min_num_atoms)
max_sim_score = float(min_num_atoms) / float(
template_atoms + query_atoms - min_num_atoms
)
if max_sim_score < thresh:
return ParityResult({}, 0.0)
if not exact_match:
mcs_graph = rdFMCS.FindMCS([template, query],
bondCompare=rdFMCS.BondCompare.CompareAny,
atomCompare=rdFMCS.AtomCompare.CompareAny,
timeout=40,
completeRingsOnly=True)
mcs_graph = rdFMCS.FindMCS(
[template, query],
bondCompare=rdFMCS.BondCompare.CompareAny,
atomCompare=rdFMCS.AtomCompare.CompareAny,
timeout=40,
completeRingsOnly=True,
)
else:
mcs_graph = rdFMCS.FindMCS([template, query],
bondCompare=rdFMCS.BondCompare.CompareOrderExact,
atomCompare=rdFMCS.AtomCompare.CompareElements,
timeout=40,
completeRingsOnly=True)
substructure, sim_score = _generate_sim_score(template, query, mcs_graph.smartsString)
mcs_graph = rdFMCS.FindMCS(
[template, query],
bondCompare=rdFMCS.BondCompare.CompareOrderExact,
atomCompare=rdFMCS.AtomCompare.CompareElements,
timeout=40,
completeRingsOnly=True,
)
substructure, sim_score = _generate_sim_score(
template, query, mcs_graph.smartsString
)
return ParityResult(substructure, sim_score)
......@@ -76,14 +76,16 @@ def read_pdb_cif_file(path_to_cif: str, sanitize: bool = True) -> CCDReaderResul
the internal representation of the component.
"""
if not os.path.isfile(path_to_cif):
raise ValueError('File \'{}\' does not exists'.format(path_to_cif))
raise ValueError("File '{}' does not exists".format(path_to_cif))
cif_dict = list(MMCIF2Dict().parse(path_to_cif).values())[0]
return _parse_pdb_mmcif(cif_dict, sanitize)
def read_pdb_components_file(path_to_cif: str, sanitize: bool = True) -> Dict[str, CCDReaderResult]:
def read_pdb_components_file(
path_to_cif: str, sanitize: bool = True
) -> Dict[str, CCDReaderResult]:
"""
Process multiple compounds stored in the wwPDB CCD
`components.cif` file.
......@@ -102,7 +104,7 @@ def read_pdb_components_file(path_to_cif: str, sanitize: bool = True) -> Dict[st
the components in the `components.cif` file.
"""
if not os.path.isfile(path_to_cif):
raise ValueError('File \'{}\' does not exists'.format(path_to_cif))
raise ValueError("File '{}' does not exists".format(path_to_cif))
result_bag = {}
......@@ -110,7 +112,10 @@ def read_pdb_components_file(path_to_cif: str, sanitize: bool = True) -> Dict[st
try:
result_bag[k] = _parse_pdb_mmcif(v)
except CCDUtilsError as e:
print(f'ERROR: Data block {k} not processed. Reason: ({str(e)}).', file=sys.stderr)
print(
f"ERROR: Data block {k} not processed. Reason: ({str(e)}).",
file=sys.stderr,
)
return result_bag
......@@ -133,19 +138,23 @@ def _parse_pdb_mmcif(cif_dict, sanitize=True):
errors = list()
mol = rdkit.Chem.RWMol()
atoms_dict = _preprocess_pdb_parser_output(cif_dict, '_chem_comp_atom', warnings)
bonds_dict = _preprocess_pdb_parser_output(cif_dict, '_chem_comp_bond', warnings)
identifiers_dict = _preprocess_pdb_parser_output(cif_dict, '_pdbx_chem_comp_identifier', warnings)
descriptors_dict = _preprocess_pdb_parser_output(cif_dict, '_pdbx_chem_comp_descriptor', warnings)
properties_dict = _preprocess_pdb_parser_output(cif_dict, '_chem_comp', warnings)
atoms_dict = _preprocess_pdb_parser_output(cif_dict, "_chem_comp_atom", warnings)
bonds_dict = _preprocess_pdb_parser_output(cif_dict, "_chem_comp_bond", warnings)
identifiers_dict = _preprocess_pdb_parser_output(
cif_dict, "_pdbx_chem_comp_identifier", warnings
)
descriptors_dict = _preprocess_pdb_parser_output(
cif_dict, "_pdbx_chem_comp_descriptor", warnings
)
properties_dict = _preprocess_pdb_parser_output(cif_dict, "_chem_comp", warnings)
_parse_pdb_atoms(mol, atoms_dict)
_parse_pdb_conformers(mol, atoms_dict)
_parse_pdb_bonds(mol, bonds_dict, atoms_dict, errors)
_handle_implicit_hydrogens(mol)
descriptors = _parse_pdb_descriptors(descriptors_dict, 'descriptor')
descriptors += _parse_pdb_descriptors(identifiers_dict, 'identifier')
descriptors = _parse_pdb_descriptors(descriptors_dict, "descriptor")
descriptors += _parse_pdb_descriptors(identifiers_dict, "identifier")
properties = _parse_pdb_properties(properties_dict)
comp = Component(mol.GetMol(), cif_dict, properties, descriptors, sanitize=sanitize)
......@@ -167,23 +176,23 @@ def _parse_pdb_atoms(mol, atoms):
if not atoms:
return
for i in range(len(atoms['atom_id'])):
element = atoms['type_symbol'][i]
for i in range(len(atoms["atom_id"])):
element = atoms["type_symbol"][i]
element = element if len(element) == 1 else element[0] + element[1].lower()
isotope = None
if element == 'D':
element = 'H'
if element == "D":
element = "H"
isotope = 2
elif element == 'X':
element = '*'
elif element == "X":
element = "*"
atom = rdkit.Chem.Atom(element)
#atom.SetChiralTag(_atom_chiral_tag(atoms['pdbx_stereo_config'][i]))
atom.SetProp('name', atoms['atom_id'][i])
atom.SetProp('alt_name', atoms['alt_atom_id'][i])
atom.SetBoolProp('leaving_atom', atoms['pdbx_leaving_atom_flag'][i] == "Y")
atom.SetFormalCharge(conversions.str_to_int(atoms['charge'][i]))
# atom.SetChiralTag(_atom_chiral_tag(atoms['pdbx_stereo_config'][i]))
atom.SetProp("name", atoms["atom_id"][i])
atom.SetProp("alt_name", atoms["alt_atom_id"][i])
atom.SetBoolProp("leaving_atom", atoms["pdbx_leaving_atom_flag"][i] == "Y")
atom.SetFormalCharge(conversions.str_to_int(atoms["charge"][i]))
if isotope is not None:
atom.SetIsotope(isotope)
......@@ -203,8 +212,8 @@ def _parse_pdb_conformers(mol, atoms):
if not atoms:
return
ideal = _setup_pdb_conformer(atoms, 'pdbx_model_Cartn_{}_ideal')
model = _setup_pdb_conformer(atoms, 'model_Cartn_{}')
ideal = _setup_pdb_conformer(atoms, "pdbx_model_Cartn_{}_ideal")
model = _setup_pdb_conformer(atoms, "model_Cartn_{}")
mol.AddConformer(ideal, assignId=True)
mol.AddConformer(model, assignId=True)
......@@ -224,12 +233,12 @@ def _setup_pdb_conformer(atoms, label):
if not atoms:
return
conformer = rdkit.Chem.Conformer(len(atoms['atom_id']))
conformer = rdkit.Chem.Conformer(len(atoms["atom_id"]))
for i in range(len(atoms['atom_id'])):
x = conversions.str_to_float(atoms[label.format(('x'))][i])
y = conversions.str_to_float(atoms[label.format(('y'))][i])
z = conversions.str_to_float(atoms[label.format(('z'))][i])
for i in range(len(atoms["atom_id"])):
x = conversions.str_to_float(atoms[label.format(("x"))][i])
y = conversions.str_to_float(atoms[label.format(("y"))][i])
z = conversions.str_to_float(atoms[label.format(("z"))][i])
atom_position = rdkit.Chem.rdGeometry.Point3D(x, y, z)
conformer.SetAtomPosition(i, atom_position)
......@@ -250,18 +259,26 @@ def _parse_pdb_bonds(mol, bonds, atoms, errors):
if not bonds:
return
for i in range(len(bonds['atom_id_1'])):
atom_1 = collection_ext.find_element_in_list(atoms['atom_id'], bonds['atom_id_1'][i])
atom_2 = collection_ext.find_element_in_list(atoms['atom_id'], bonds['atom_id_2'][i])
bond_order = _bond_pdb_order(bonds['value_order'][i])
for i in range(len(bonds["atom_id_1"])):
atom_1 = collection_ext.find_element_in_list(
atoms["atom_id"], bonds["atom_id_1"][i]
)
atom_2 = collection_ext.find_element_in_list(
atoms["atom_id"], bonds["atom_id_2"][i]
)
bond_order = _bond_pdb_order(bonds["value_order"][i])
if any(a is None for a in [atom_1, atom_2, bond_order]):
errors.append('Problem with the {}-th bond in the _chem_comp_bond group'.format(i + 1))
errors.append(
"Problem with the {}-th bond in the _chem_comp_bond group".format(i + 1)
)
try:
mol.AddBond(atom_1, atom_2, bond_order)
except RuntimeError:
errors.append(f'Duplicit bond {bonds["atom_id_1"][i]}-{bonds["atom_id_2"][i]}')
errors.append(
f'Duplicit bond {bonds["atom_id_1"][i]}-{bonds["atom_id_2"][i]}'
)
def _handle_implicit_hydrogens(mol):
......@@ -285,7 +302,7 @@ def _handle_implicit_hydrogens(mol):
atom.SetNoImplicit(no_Hs)
def _parse_pdb_descriptors(pdbx_chem_comp_descriptors, label='descriptor'):
def _parse_pdb_descriptors(pdbx_chem_comp_descriptors, label="descriptor"):
"""
Parse useful information from _pdbx_chem_comp_* category
......@@ -304,9 +321,11 @@ def _parse_pdb_descriptors(pdbx_chem_comp_descriptors, label='descriptor'):
return descriptors
for i in range(len(pdbx_chem_comp_descriptors[label])):
d = Descriptor(type=pdbx_chem_comp_descriptors['type'][i],
program=pdbx_chem_comp_descriptors['program'][i],
value=pdbx_chem_comp_descriptors[label][i])
d = Descriptor(
type=pdbx_chem_comp_descriptors["type"][i],
program=pdbx_chem_comp_descriptors["program"][i],
value=pdbx_chem_comp_descriptors[label][i],
)
descriptors.append(d)
return descriptors
......@@ -324,18 +343,26 @@ def _parse_pdb_properties(chem_comp):
"""
properties = None
if chem_comp:
mod_date = chem_comp['pdbx_modified_date'][0].split('-')
d = date(1970, 1, 1) if mod_date[0] == '?' else date(int(mod_date[0]), int(mod_date[1]), int(mod_date[2]))
rel_status = chem_comp['pdbx_release_status'][0]
rel_status = ReleaseStatus.from_str(chem_comp['pdbx_release_status'][0])
properties = CCDProperties(id=chem_comp['id'][0],
name=chem_comp['name'][0],
formula=chem_comp['formula'][0],
modified_date=d,
pdbx_release_status=rel_status,
weight=0.0 if chem_comp['formula_weight'][0] == '?' else float(chem_comp['formula_weight'][0]))
mod_date = chem_comp["pdbx_modified_date"][0].split("-")
d = (
date(1970, 1, 1)
if mod_date[0] == "?"
else date(int(mod_date[0]), int(mod_date[1]), int(mod_date[2]))
)
rel_status = chem_comp["pdbx_release_status"][0]
rel_status = ReleaseStatus.from_str(chem_comp["pdbx_release_status"][0])
properties = CCDProperties(
id=chem_comp["id"][0],
name=chem_comp["name"][0],
formula=chem_comp["formula"][0],
modified_date=d,
pdbx_release_status=rel_status,
weight=0.0
if chem_comp["formula_weight"][0] == "?"
else float(chem_comp["formula_weight"][0]),
)
return properties
......@@ -354,13 +381,15 @@ def _preprocess_pdb_parser_output(dictionary, label, warnings):
dict: unified dictionary for structure parsing.
"""
if label not in dictionary:
warnings.append('Namespace {} does not exist.'.format(label))
warnings.append("Namespace {} does not exist.".format(label))
return []
check_element = list(dictionary[label].keys())[0]
values = (dictionary[label]
if isinstance(dictionary[label][check_element], list)
else {k: [v] for k, v in dictionary[label].items()})
values = (
dictionary[label]
if isinstance(dictionary[label][check_element], list)
else {k: [v] for k, v in dictionary[label].items()}
)
return values
......@@ -374,11 +403,11 @@ def _bond_pdb_order(value_order):
Returns:
rdkit.Chem.rdchem.BondType: -- bond type
"""
if value_order == 'SING':
if value_order == "SING":
return rdkit.Chem.rdchem.BondType(1)
if value_order == 'DOUB':
if value_order == "DOUB":
return rdkit.Chem.rdchem.BondType(2)
if value_order == 'TRIP':
if value_order == "TRIP":
return rdkit.Chem.rdchem.BondType(3)
return None
......@@ -393,12 +422,14 @@ def _atom_chiral_tag(tag):
Returns:
rdkit.Chem.ChiralType: Chiral center in RDKit language.
"""
if tag == 'N':
if tag == "N":
return rdkit.Chem.ChiralType.CHI_UNSPECIFIED
if tag == 'S':
if tag == "S":
return rdkit.Chem.ChiralType.CHI_TETRAHEDRAL_CCW
if tag == 'R':
if tag == "R":
return rdkit.Chem.ChiralType.CHI_TETRAHEDRAL_CW
return rdkit.Chem.ChiralType.CHI_UNSPECIFIED
# endregion parse mmcif
......@@ -116,7 +116,7 @@ def to_pdb_str(
)
info = rdkit.Chem.rdchem.AtomPDBResidueInfo()
info.SetResidueName(f'{component.id:>3}')
info.SetResidueName(f"{component.id:>3}")
info.SetTempFactor(20.0)
info.SetOccupancy(1.0)
info.SetChainId("A")
......@@ -929,8 +929,7 @@ def _to_sdf_str_fallback(mol, ccd_id, mappings):
content += [
f"{ccd_id} - {k} conformer",
" RDKit 3D",
"\n"
f"{atom_count:>3}{bond_count:3} 0 0 0 0 0 0 0 0999 V2000",
"\n" f"{atom_count:>3}{bond_count:3} 0 0 0 0 0 0 0 0999 V2000",
]
for i in range(0, atom_count):
......
......@@ -88,7 +88,6 @@ class Component:
if sanitize:
self._sanitization_issues = self._sanitize()
if descriptors is not None:
self._descriptors = descriptors
......
......@@ -43,7 +43,11 @@ class DepictionManager:
PubChem templates can be downloaded using PubChemDownloader class.
"""
def __init__(self, pubchem_templates_path: str = '', general_templates_path: str = config.general_templates) -> None:
def __init__(
self,
pubchem_templates_path: str = "",
general_templates_path: str = config.general_templates,
) -> None:
"""
Initialize component which does the ligand depiction.
If Nones is provided as parameters just the defalt RDKit
......@@ -63,16 +67,20 @@ class DepictionManager:
self.coordgen_params.coordgenScaling = 50 / 1.5
self.coordgen_params.templateFileDir = config.coordgen_templates
self.pubchem_templates = pubchem_templates_path if os.path.isdir(pubchem_templates_path) else ''
self.pubchem_templates = (
pubchem_templates_path if os.path.isdir(pubchem_templates_path) else ""
)
self.templates: Dict[str, rdkit.Chem.rdchem.Mol] = OrderedDict()
if os.path.isdir(general_templates_path):
for k in sorted(os.listdir(general_templates_path)):
template = self._load_template(os.path.join(general_templates_path, k))
template_name = k.split('.')[0]
template_name = k.split(".")[0]
self.templates[template_name] = template
def depict_molecule(self, het_id: str, mol: rdkit.Chem.rdchem.Mol) -> DepictionResult:
def depict_molecule(
self, het_id: str, mol: rdkit.Chem.rdchem.Mol
) -> DepictionResult:
"""
Given input molecule tries to generate its depictions.
......@@ -95,7 +103,11 @@ class DepictionManager:
rdkitMol = Chem.RWMol(temp_mol).GetMol()
results = []
pubchem_res = self._get_2D_by_pubchem(het_id, pubchemMol) if self.pubchem_templates else None
pubchem_res = (
self._get_2D_by_pubchem(het_id, pubchemMol)
if self.pubchem_templates
else None
)
template_res = self._get_2D_by_template(templateMol) if self.templates else []
rdkit_res = self._get_2D_by_rdkit(rdkitMol)
......@@ -111,11 +123,12 @@ class DepictionManager:
if results:
to_return = results[0]
fix_conformer(to_return.mol.GetConformer(0))
return to_return
return DepictionResult(source=DepictionSource.Failed, template_name='', mol=None, score=1000)
return DepictionResult(
source=DepictionSource.Failed, template_name="", mol=None, score=1000
)
def _get_pubchem_template_path(self, het_id):
"""Get path to the PubChem template if it exists.
......@@ -126,9 +139,9 @@ class DepictionManager:
Returns:
str: Path to the PubChem layout.
"""
path = os.path.join(self.pubchem_templates, f'{het_id}.sdf')
path = os.path.join(self.pubchem_templates, f"{het_id}.sdf")
return path if os.path.isfile(path) else ''
return path if os.path.isfile(path) else ""
def _load_template(self, path):
"""
......@@ -145,14 +158,14 @@ class DepictionManager:
rdkit.Chem.rdchem.Mol: RDKit representation of the template
"""
mol = Chem.RWMol()
extension = os.path.basename(path).split('.')[1]
extension = os.path.basename(path).split(".")[1]
if extension == 'sdf':
if extension == "sdf":
mol = Chem.MolFromMolFile(path, sanitize=True, removeHs=True)
elif extension == 'pdb':
elif extension == "pdb":
mol = Chem.MolFromPDBFile(path, sanitize=True, removeHs=True)
else:
raise ValueError('Unsupported molecule type \'{}\''.format(extension))
raise ValueError("Unsupported molecule type '{}'".format(extension))
p = Chem.AdjustQueryParameters()
p.makeAtomsGeneric = True
......@@ -175,9 +188,13 @@ class DepictionManager:
try:
rdCoordGen.AddCoords(mol, self.coordgen_params)
flaws = DepictionValidator(mol).depiction_score()
return DepictionResult(source=DepictionSource.RDKit, template_name=None, mol=mol, score=flaws)
return DepictionResult(
source=DepictionSource.RDKit, template_name=None, mol=mol, score=flaws
)
except Exception:
return DepictionResult(source=DepictionSource.Failed, template_name=None, mol=None, score=1000)
return DepictionResult(
source=DepictionSource.Failed, template_name=None, mol=None, score=1000
)
def _get_2D_by_pubchem(self, code, mol):
"""
......@@ -198,11 +215,18 @@ class DepictionManager:
if mol.HasSubstructMatch(template):
AllChem.GenerateDepictionMatching2DStructure(mol, template)
flaws = DepictionValidator(mol).depiction_score()
return DepictionResult(source=DepictionSource.PubChem, template_name=code, mol=mol, score=flaws)
return DepictionResult(
source=DepictionSource.PubChem,
template_name=code,
mol=mol,
score=flaws,
)
except Exception as e:
print(str(e), file=sys.stderr)
return DepictionResult(source=DepictionSource.Failed, template_name=None, mol=None, score=1000)
return DepictionResult(
source=DepictionSource.Failed, template_name=None, mol=None, score=1000
)
def _get_2D_by_template(self, mol):
"""
......@@ -222,8 +246,14 @@ class DepictionManager:
if temp_mol.HasSubstructMatch(template):
AllChem.GenerateDepictionMatching2DStructure(temp_mol, template)
flaws = DepictionValidator(temp_mol).depiction_score()
results.append(DepictionResult(source=DepictionSource.Template,
template_name=key, mol=temp_mol, score=flaws))
results.append(
DepictionResult(
source=DepictionSource.Template,
template_name=key,
mol=temp_mol,
score=flaws,
)
)
except Exception:
pass # if it fails it fails, but generally it wont
......@@ -240,7 +270,10 @@ class DepictionValidator:
self.conformer = mol.GetConformer()
self.bonds = self.mol.GetBonds()
atoms = [self.conformer.GetAtomPosition(i) for i in range(0, self.conformer.GetNumAtoms())]
atoms = [
self.conformer.GetAtomPosition(i)
for i in range(0, self.conformer.GetNumAtoms())
]
atom_centers = [[atom.x, atom.y, atom.z] for atom in atoms]
self.kd_tree = KDTree(atom_centers)
......@@ -259,8 +292,13 @@ class DepictionValidator:
Returns:
bool: true if bonds share collide, false otherwise.
"""
atoms = [bondA.GetBeginAtom(), bondA.GetEndAtom(), bondB.GetBeginAtom(), bondB.GetEndAtom()]
names = [a.GetProp('name') for a in atoms]
atoms = [
bondA.GetBeginAtom(),
bondA.GetEndAtom(),
bondB.GetBeginAtom(),
bondB.GetEndAtom(),
]
names = [a.GetProp("name") for a in atoms]
points = [self.conformer.GetAtomPosition(a.GetIdx()) for a in atoms]
vecA = Geometry.Point2D(points[1].x - points[0].x, points[1].y - points[0].y)
......@@ -284,7 +322,7 @@ class DepictionValidator:
detP = (a * -vecB.y) - (b * -vecB.x)
p = round(detP / det, 3)
if (p < 0 or p > 1):
if p < 0 or p > 1:
return False
detR = (vecA.x * b) - (vecA.y * a)
......@@ -300,7 +338,7 @@ class DepictionValidator:
common atom.
Args:
names (list of str): List of atom names forming bonds
names (list of str): List of atom names forming bonds
[A, B, C, D] for AB and CD.
vecA (Geometry.Point2D): Vector representing AB bond.
vecB (Geometry.Point2D): Vector representing CD bond.
......@@ -413,6 +451,9 @@ class DepictionValidator:
bond_collisions = self.count_bond_collisions()
degenerated_atoms = self.count_suboptimal_atom_positions(0.0, 0.5)
score = collision_penalty * bond_collisions + degenerated_penalty * degenerated_atoms
score = (
collision_penalty * bond_collisions
+ degenerated_penalty * degenerated_atoms
)
return round(score, 1)
class CCDUtilsError(Exception):
"""Internal error of the pdbeccdutils package."""
pass
......@@ -28,13 +28,17 @@ from pdbeccdutils.core.models import FragmentEntry
class FragmentLibrary:
"""Implementation of fragment library.
"""
def __init__(self, path: str = config.fragment_library, header: bool = True,
delimiter: str = '\t', quotechar: str = '"') -> None:
"""Implementation of fragment library."""
def __init__(
self,
path: str = config.fragment_library,
header: bool = True,
delimiter: str = "\t",
quotechar: str = '"',
) -> None:
self.library: Dict[str, FragmentEntry] = {}
self.name = os.path.basename(path).split('.')[0]
self.name = os.path.basename(path).split(".")[0]
self._read_in_library(path, header, delimiter, quotechar)
def _read_in_library(self, path, header, delimiter, quotechar):
......@@ -46,22 +50,24 @@ class FragmentLibrary:
delimiter (str): Delimiter symbol.
quotechar (str): Quotechar symbol.
"""
rdkit.rdBase.DisableLog('rdApp.*')
rdkit.rdBase.DisableLog("rdApp.*")