Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
pdbe
ccdutils
Commits
0387f0d4
Commit
0387f0d4
authored
Oct 28, 2020
by
Lukas Pravda
Browse files
Merge branch 'dev'
parents
98ee0a75
ee3fe336
Changes
35
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
345 additions
and
216 deletions
+345
-216
.github/workflows/documentation.yml
.github/workflows/documentation.yml
+2
-3
.github/workflows/tests.yml
.github/workflows/tests.yml
+11
-4
Makefile
Makefile
+17
-0
README.md
README.md
+5
-5
pdbeccdutils/__init__.py
pdbeccdutils/__init__.py
+1
-1
pdbeccdutils/computations/parity_method.py
pdbeccdutils/computations/parity_method.py
+21
-13
pdbeccdutils/core/ccd_reader.py
pdbeccdutils/core/ccd_reader.py
+92
-61
pdbeccdutils/core/ccd_writer.py
pdbeccdutils/core/ccd_writer.py
+2
-3
pdbeccdutils/core/component.py
pdbeccdutils/core/component.py
+0
-1
pdbeccdutils/core/depictions.py
pdbeccdutils/core/depictions.py
+67
-26
pdbeccdutils/core/exceptions.py
pdbeccdutils/core/exceptions.py
+1
-0
pdbeccdutils/core/fragment_library.py
pdbeccdutils/core/fragment_library.py
+27
-20
pdbeccdutils/core/models.py
pdbeccdutils/core/models.py
+14
-7
pdbeccdutils/helpers/collection_ext.py
pdbeccdutils/helpers/collection_ext.py
+1
-1
pdbeccdutils/helpers/drawing.py
pdbeccdutils/helpers/drawing.py
+55
-52
pdbeccdutils/helpers/rdkit_fixtures.py
pdbeccdutils/helpers/rdkit_fixtures.py
+1
-1
pdbeccdutils/scripts/process_components_cif_cli.py
pdbeccdutils/scripts/process_components_cif_cli.py
+14
-9
pdbeccdutils/scripts/setup_pubchem_library_cli.py
pdbeccdutils/scripts/setup_pubchem_library_cli.py
+1
-2
pdbeccdutils/tests/components_cif/test_rdkit_fixtures.py
pdbeccdutils/tests/components_cif/test_rdkit_fixtures.py
+5
-5
pdbeccdutils/tests/conftest.py
pdbeccdutils/tests/conftest.py
+8
-2
No files found.
.github/workflows/documentation.yml
View file @
0387f0d4
...
...
@@ -33,9 +33,8 @@ jobs:
activate-conda
:
true
conda-channels
:
conda-forge
-
run
:
|
conda install rdkit
pip install -e .
pip install pytest-cov sphinx sphinx_rtd_theme recommonmark sphinx-autodoc-typehints sphinx-markdown-tables
conda install rdkit=2020.03.6
pip install -e ".[docs]"
-
run
:
|
cd doc
...
...
.github/workflows/tests.yml
View file @
0387f0d4
...
...
@@ -2,7 +2,15 @@
name
:
ccdutils tests
on
:
[
push
,
pull_request
]
on
:
push
:
branches
:
-
master
-
dev
pull_request
:
branches
:
-
master
-
dev
jobs
:
pytest
:
...
...
@@ -25,7 +33,6 @@ jobs:
activate-conda
:
true
conda-channels
:
conda-forge
-
run
:
|
conda install rdkit
pip install -e .
pip install pytest-cov
conda install rdkit=2020.03.6
pip install -e ".[tests]"
-
run
:
pytest --cov=pdbeccdutils
Makefile
0 → 100644
View file @
0387f0d4
clean
:
rm
-rf
build/
*
&&
rm
-rf
dist/
*
&&
rm
-rf
pdbeccdutils.egg-info
&&
rm
-rf
htmlcov
&&
rm
.coverage
test-report
:
pytest
--cov
=
pdbeccdutils
--cov-report
=
html
package
:
pytest
;
\
python setup.py bdist_wheel
;
\
pip uninstall pdbeccdutils
;
\
pip
install
-e
.
;
\
find dist
-name
"pdbeccdutils-*-py3*"
-exec
pip
install
{}
\;
make upload
:
twine upload dist/
*
--verbose
make pypi
:
package upload clean
README.md
View file @
0387f0d4
...
...
@@ -7,7 +7,7 @@
*
The tools use:
*
[
RDKit
](
http://www.rdkit.org/
)
for chemistry
*
[
PDBeCIF
](
https://git
la
b.com/
glenveegee/PDBeCIF.git
)
cif parser.
*
[
PDBeCIF
](
https://git
hu
b.com/
PDBeurope/pdbecif
)
cif parser.
*
[
scipy
](
https://www.scipy.org/
)
for depiction quality check.
*
[
numpy
](
https://www.numpy.org/
)
for molecular scaling.
...
...
@@ -37,15 +37,15 @@
*
Generation of 3D conformations.
*
Fragment library search.
*
Chemical scaffolds (Murcko scaffold, Murcko general, BRICS).
*
Lightweight implementation of parity method by Jon Tyczak.
*
Lightweight implementation of
[
parity method
](
https://doi.org/10.1016/j.str.2018.02.009
)
by Jon Tyczak.
*
RDKit molecular properties per component.
*
UniChem mapping.
## TODO list
*
Port rest of the important functionality implemented by Oliver
*
Add more unit/regression tests to get at least
100%
code coverage.
*
Further improvement of the documentation.
*
Add more unit/regression tests to get at least
higher
code coverage.
*
Further improvement
s
of the documentation.
## Notes
...
...
@@ -76,4 +76,4 @@ Use this to re-generate the documentation from the doc/ directory:
```
console
make html
```
\ No newline at end of file
```
pdbeccdutils/__init__.py
View file @
0387f0d4
__version__
=
'
0.5.
7'
__version__
=
"
0.5.
8"
pdbeccdutils/computations/parity_method.py
View file @
0387f0d4
...
...
@@ -92,24 +92,32 @@ def compare_molecules(template, query, thresh=0.01, exact_match=False):
query_atoms
=
query
.
GetNumAtoms
()
min_num_atoms
=
min
(
template_atoms
,
query_atoms
)
max_sim_score
=
float
(
min_num_atoms
)
/
float
(
template_atoms
+
query_atoms
-
min_num_atoms
)
max_sim_score
=
float
(
min_num_atoms
)
/
float
(
template_atoms
+
query_atoms
-
min_num_atoms
)
if
max_sim_score
<
thresh
:
return
ParityResult
({},
0.0
)
if
not
exact_match
:
mcs_graph
=
rdFMCS
.
FindMCS
([
template
,
query
],
bondCompare
=
rdFMCS
.
BondCompare
.
CompareAny
,
atomCompare
=
rdFMCS
.
AtomCompare
.
CompareAny
,
timeout
=
40
,
completeRingsOnly
=
True
)
mcs_graph
=
rdFMCS
.
FindMCS
(
[
template
,
query
],
bondCompare
=
rdFMCS
.
BondCompare
.
CompareAny
,
atomCompare
=
rdFMCS
.
AtomCompare
.
CompareAny
,
timeout
=
40
,
completeRingsOnly
=
True
,
)
else
:
mcs_graph
=
rdFMCS
.
FindMCS
([
template
,
query
],
bondCompare
=
rdFMCS
.
BondCompare
.
CompareOrderExact
,
atomCompare
=
rdFMCS
.
AtomCompare
.
CompareElements
,
timeout
=
40
,
completeRingsOnly
=
True
)
substructure
,
sim_score
=
_generate_sim_score
(
template
,
query
,
mcs_graph
.
smartsString
)
mcs_graph
=
rdFMCS
.
FindMCS
(
[
template
,
query
],
bondCompare
=
rdFMCS
.
BondCompare
.
CompareOrderExact
,
atomCompare
=
rdFMCS
.
AtomCompare
.
CompareElements
,
timeout
=
40
,
completeRingsOnly
=
True
,
)
substructure
,
sim_score
=
_generate_sim_score
(
template
,
query
,
mcs_graph
.
smartsString
)
return
ParityResult
(
substructure
,
sim_score
)
pdbeccdutils/core/ccd_reader.py
View file @
0387f0d4
...
...
@@ -76,14 +76,16 @@ def read_pdb_cif_file(path_to_cif: str, sanitize: bool = True) -> CCDReaderResul
the internal representation of the component.
"""
if
not
os
.
path
.
isfile
(
path_to_cif
):
raise
ValueError
(
'
File
\
'
{}
\
'
does not exists
'
.
format
(
path_to_cif
))
raise
ValueError
(
"
File '{}' does not exists
"
.
format
(
path_to_cif
))
cif_dict
=
list
(
MMCIF2Dict
().
parse
(
path_to_cif
).
values
())[
0
]
return
_parse_pdb_mmcif
(
cif_dict
,
sanitize
)
def
read_pdb_components_file
(
path_to_cif
:
str
,
sanitize
:
bool
=
True
)
->
Dict
[
str
,
CCDReaderResult
]:
def
read_pdb_components_file
(
path_to_cif
:
str
,
sanitize
:
bool
=
True
)
->
Dict
[
str
,
CCDReaderResult
]:
"""
Process multiple compounds stored in the wwPDB CCD
`components.cif` file.
...
...
@@ -102,7 +104,7 @@ def read_pdb_components_file(path_to_cif: str, sanitize: bool = True) -> Dict[st
the components in the `components.cif` file.
"""
if
not
os
.
path
.
isfile
(
path_to_cif
):
raise
ValueError
(
'
File
\
'
{}
\
'
does not exists
'
.
format
(
path_to_cif
))
raise
ValueError
(
"
File '{}' does not exists
"
.
format
(
path_to_cif
))
result_bag
=
{}
...
...
@@ -110,7 +112,10 @@ def read_pdb_components_file(path_to_cif: str, sanitize: bool = True) -> Dict[st
try
:
result_bag
[
k
]
=
_parse_pdb_mmcif
(
v
)
except
CCDUtilsError
as
e
:
print
(
f
'ERROR: Data block
{
k
}
not processed. Reason: (
{
str
(
e
)
}
).'
,
file
=
sys
.
stderr
)
print
(
f
"ERROR: Data block
{
k
}
not processed. Reason: (
{
str
(
e
)
}
)."
,
file
=
sys
.
stderr
,
)
return
result_bag
...
...
@@ -133,19 +138,23 @@ def _parse_pdb_mmcif(cif_dict, sanitize=True):
errors
=
list
()
mol
=
rdkit
.
Chem
.
RWMol
()
atoms_dict
=
_preprocess_pdb_parser_output
(
cif_dict
,
'_chem_comp_atom'
,
warnings
)
bonds_dict
=
_preprocess_pdb_parser_output
(
cif_dict
,
'_chem_comp_bond'
,
warnings
)
identifiers_dict
=
_preprocess_pdb_parser_output
(
cif_dict
,
'_pdbx_chem_comp_identifier'
,
warnings
)
descriptors_dict
=
_preprocess_pdb_parser_output
(
cif_dict
,
'_pdbx_chem_comp_descriptor'
,
warnings
)
properties_dict
=
_preprocess_pdb_parser_output
(
cif_dict
,
'_chem_comp'
,
warnings
)
atoms_dict
=
_preprocess_pdb_parser_output
(
cif_dict
,
"_chem_comp_atom"
,
warnings
)
bonds_dict
=
_preprocess_pdb_parser_output
(
cif_dict
,
"_chem_comp_bond"
,
warnings
)
identifiers_dict
=
_preprocess_pdb_parser_output
(
cif_dict
,
"_pdbx_chem_comp_identifier"
,
warnings
)
descriptors_dict
=
_preprocess_pdb_parser_output
(
cif_dict
,
"_pdbx_chem_comp_descriptor"
,
warnings
)
properties_dict
=
_preprocess_pdb_parser_output
(
cif_dict
,
"_chem_comp"
,
warnings
)
_parse_pdb_atoms
(
mol
,
atoms_dict
)
_parse_pdb_conformers
(
mol
,
atoms_dict
)
_parse_pdb_bonds
(
mol
,
bonds_dict
,
atoms_dict
,
errors
)
_handle_implicit_hydrogens
(
mol
)
descriptors
=
_parse_pdb_descriptors
(
descriptors_dict
,
'
descriptor
'
)
descriptors
+=
_parse_pdb_descriptors
(
identifiers_dict
,
'
identifier
'
)
descriptors
=
_parse_pdb_descriptors
(
descriptors_dict
,
"
descriptor
"
)
descriptors
+=
_parse_pdb_descriptors
(
identifiers_dict
,
"
identifier
"
)
properties
=
_parse_pdb_properties
(
properties_dict
)
comp
=
Component
(
mol
.
GetMol
(),
cif_dict
,
properties
,
descriptors
,
sanitize
=
sanitize
)
...
...
@@ -167,23 +176,23 @@ def _parse_pdb_atoms(mol, atoms):
if
not
atoms
:
return
for
i
in
range
(
len
(
atoms
[
'
atom_id
'
])):
element
=
atoms
[
'
type_symbol
'
][
i
]
for
i
in
range
(
len
(
atoms
[
"
atom_id
"
])):
element
=
atoms
[
"
type_symbol
"
][
i
]
element
=
element
if
len
(
element
)
==
1
else
element
[
0
]
+
element
[
1
].
lower
()
isotope
=
None
if
element
==
'D'
:
element
=
'H'
if
element
==
"D"
:
element
=
"H"
isotope
=
2
elif
element
==
'X'
:
element
=
'*'
elif
element
==
"X"
:
element
=
"*"
atom
=
rdkit
.
Chem
.
Atom
(
element
)
#atom.SetChiralTag(_atom_chiral_tag(atoms['pdbx_stereo_config'][i]))
atom
.
SetProp
(
'
name
'
,
atoms
[
'
atom_id
'
][
i
])
atom
.
SetProp
(
'
alt_name
'
,
atoms
[
'
alt_atom_id
'
][
i
])
atom
.
SetBoolProp
(
'
leaving_atom
'
,
atoms
[
'
pdbx_leaving_atom_flag
'
][
i
]
==
"Y"
)
atom
.
SetFormalCharge
(
conversions
.
str_to_int
(
atoms
[
'
charge
'
][
i
]))
#
atom.SetChiralTag(_atom_chiral_tag(atoms['pdbx_stereo_config'][i]))
atom
.
SetProp
(
"
name
"
,
atoms
[
"
atom_id
"
][
i
])
atom
.
SetProp
(
"
alt_name
"
,
atoms
[
"
alt_atom_id
"
][
i
])
atom
.
SetBoolProp
(
"
leaving_atom
"
,
atoms
[
"
pdbx_leaving_atom_flag
"
][
i
]
==
"Y"
)
atom
.
SetFormalCharge
(
conversions
.
str_to_int
(
atoms
[
"
charge
"
][
i
]))
if
isotope
is
not
None
:
atom
.
SetIsotope
(
isotope
)
...
...
@@ -203,8 +212,8 @@ def _parse_pdb_conformers(mol, atoms):
if
not
atoms
:
return
ideal
=
_setup_pdb_conformer
(
atoms
,
'
pdbx_model_Cartn_{}_ideal
'
)
model
=
_setup_pdb_conformer
(
atoms
,
'
model_Cartn_{}
'
)
ideal
=
_setup_pdb_conformer
(
atoms
,
"
pdbx_model_Cartn_{}_ideal
"
)
model
=
_setup_pdb_conformer
(
atoms
,
"
model_Cartn_{}
"
)
mol
.
AddConformer
(
ideal
,
assignId
=
True
)
mol
.
AddConformer
(
model
,
assignId
=
True
)
...
...
@@ -224,12 +233,12 @@ def _setup_pdb_conformer(atoms, label):
if
not
atoms
:
return
conformer
=
rdkit
.
Chem
.
Conformer
(
len
(
atoms
[
'
atom_id
'
]))
conformer
=
rdkit
.
Chem
.
Conformer
(
len
(
atoms
[
"
atom_id
"
]))
for
i
in
range
(
len
(
atoms
[
'
atom_id
'
])):
x
=
conversions
.
str_to_float
(
atoms
[
label
.
format
((
'x'
))][
i
])
y
=
conversions
.
str_to_float
(
atoms
[
label
.
format
((
'y'
))][
i
])
z
=
conversions
.
str_to_float
(
atoms
[
label
.
format
((
'z'
))][
i
])
for
i
in
range
(
len
(
atoms
[
"
atom_id
"
])):
x
=
conversions
.
str_to_float
(
atoms
[
label
.
format
((
"x"
))][
i
])
y
=
conversions
.
str_to_float
(
atoms
[
label
.
format
((
"y"
))][
i
])
z
=
conversions
.
str_to_float
(
atoms
[
label
.
format
((
"z"
))][
i
])
atom_position
=
rdkit
.
Chem
.
rdGeometry
.
Point3D
(
x
,
y
,
z
)
conformer
.
SetAtomPosition
(
i
,
atom_position
)
...
...
@@ -250,18 +259,26 @@ def _parse_pdb_bonds(mol, bonds, atoms, errors):
if
not
bonds
:
return
for
i
in
range
(
len
(
bonds
[
'atom_id_1'
])):
atom_1
=
collection_ext
.
find_element_in_list
(
atoms
[
'atom_id'
],
bonds
[
'atom_id_1'
][
i
])
atom_2
=
collection_ext
.
find_element_in_list
(
atoms
[
'atom_id'
],
bonds
[
'atom_id_2'
][
i
])
bond_order
=
_bond_pdb_order
(
bonds
[
'value_order'
][
i
])
for
i
in
range
(
len
(
bonds
[
"atom_id_1"
])):
atom_1
=
collection_ext
.
find_element_in_list
(
atoms
[
"atom_id"
],
bonds
[
"atom_id_1"
][
i
]
)
atom_2
=
collection_ext
.
find_element_in_list
(
atoms
[
"atom_id"
],
bonds
[
"atom_id_2"
][
i
]
)
bond_order
=
_bond_pdb_order
(
bonds
[
"value_order"
][
i
])
if
any
(
a
is
None
for
a
in
[
atom_1
,
atom_2
,
bond_order
]):
errors
.
append
(
'Problem with the {}-th bond in the _chem_comp_bond group'
.
format
(
i
+
1
))
errors
.
append
(
"Problem with the {}-th bond in the _chem_comp_bond group"
.
format
(
i
+
1
)
)
try
:
mol
.
AddBond
(
atom_1
,
atom_2
,
bond_order
)
except
RuntimeError
:
errors
.
append
(
f
'Duplicit bond
{
bonds
[
"atom_id_1"
][
i
]
}
-
{
bonds
[
"atom_id_2"
][
i
]
}
'
)
errors
.
append
(
f
'Duplicit bond
{
bonds
[
"atom_id_1"
][
i
]
}
-
{
bonds
[
"atom_id_2"
][
i
]
}
'
)
def
_handle_implicit_hydrogens
(
mol
):
...
...
@@ -285,7 +302,7 @@ def _handle_implicit_hydrogens(mol):
atom
.
SetNoImplicit
(
no_Hs
)
def
_parse_pdb_descriptors
(
pdbx_chem_comp_descriptors
,
label
=
'
descriptor
'
):
def
_parse_pdb_descriptors
(
pdbx_chem_comp_descriptors
,
label
=
"
descriptor
"
):
"""
Parse useful information from _pdbx_chem_comp_* category
...
...
@@ -304,9 +321,11 @@ def _parse_pdb_descriptors(pdbx_chem_comp_descriptors, label='descriptor'):
return
descriptors
for
i
in
range
(
len
(
pdbx_chem_comp_descriptors
[
label
])):
d
=
Descriptor
(
type
=
pdbx_chem_comp_descriptors
[
'type'
][
i
],
program
=
pdbx_chem_comp_descriptors
[
'program'
][
i
],
value
=
pdbx_chem_comp_descriptors
[
label
][
i
])
d
=
Descriptor
(
type
=
pdbx_chem_comp_descriptors
[
"type"
][
i
],
program
=
pdbx_chem_comp_descriptors
[
"program"
][
i
],
value
=
pdbx_chem_comp_descriptors
[
label
][
i
],
)
descriptors
.
append
(
d
)
return
descriptors
...
...
@@ -324,18 +343,26 @@ def _parse_pdb_properties(chem_comp):
"""
properties
=
None
if
chem_comp
:
mod_date
=
chem_comp
[
'pdbx_modified_date'
][
0
].
split
(
'-'
)
d
=
date
(
1970
,
1
,
1
)
if
mod_date
[
0
]
==
'?'
else
date
(
int
(
mod_date
[
0
]),
int
(
mod_date
[
1
]),
int
(
mod_date
[
2
]))
rel_status
=
chem_comp
[
'pdbx_release_status'
][
0
]
rel_status
=
ReleaseStatus
.
from_str
(
chem_comp
[
'pdbx_release_status'
][
0
])
properties
=
CCDProperties
(
id
=
chem_comp
[
'id'
][
0
],
name
=
chem_comp
[
'name'
][
0
],
formula
=
chem_comp
[
'formula'
][
0
],
modified_date
=
d
,
pdbx_release_status
=
rel_status
,
weight
=
0.0
if
chem_comp
[
'formula_weight'
][
0
]
==
'?'
else
float
(
chem_comp
[
'formula_weight'
][
0
]))
mod_date
=
chem_comp
[
"pdbx_modified_date"
][
0
].
split
(
"-"
)
d
=
(
date
(
1970
,
1
,
1
)
if
mod_date
[
0
]
==
"?"
else
date
(
int
(
mod_date
[
0
]),
int
(
mod_date
[
1
]),
int
(
mod_date
[
2
]))
)
rel_status
=
chem_comp
[
"pdbx_release_status"
][
0
]
rel_status
=
ReleaseStatus
.
from_str
(
chem_comp
[
"pdbx_release_status"
][
0
])
properties
=
CCDProperties
(
id
=
chem_comp
[
"id"
][
0
],
name
=
chem_comp
[
"name"
][
0
],
formula
=
chem_comp
[
"formula"
][
0
],
modified_date
=
d
,
pdbx_release_status
=
rel_status
,
weight
=
0.0
if
chem_comp
[
"formula_weight"
][
0
]
==
"?"
else
float
(
chem_comp
[
"formula_weight"
][
0
]),
)
return
properties
...
...
@@ -354,13 +381,15 @@ def _preprocess_pdb_parser_output(dictionary, label, warnings):
dict: unified dictionary for structure parsing.
"""
if
label
not
in
dictionary
:
warnings
.
append
(
'
Namespace {} does not exist.
'
.
format
(
label
))
warnings
.
append
(
"
Namespace {} does not exist.
"
.
format
(
label
))
return
[]
check_element
=
list
(
dictionary
[
label
].
keys
())[
0
]
values
=
(
dictionary
[
label
]
if
isinstance
(
dictionary
[
label
][
check_element
],
list
)
else
{
k
:
[
v
]
for
k
,
v
in
dictionary
[
label
].
items
()})
values
=
(
dictionary
[
label
]
if
isinstance
(
dictionary
[
label
][
check_element
],
list
)
else
{
k
:
[
v
]
for
k
,
v
in
dictionary
[
label
].
items
()}
)
return
values
...
...
@@ -374,11 +403,11 @@ def _bond_pdb_order(value_order):
Returns:
rdkit.Chem.rdchem.BondType: -- bond type
"""
if
value_order
==
'
SING
'
:
if
value_order
==
"
SING
"
:
return
rdkit
.
Chem
.
rdchem
.
BondType
(
1
)
if
value_order
==
'
DOUB
'
:
if
value_order
==
"
DOUB
"
:
return
rdkit
.
Chem
.
rdchem
.
BondType
(
2
)
if
value_order
==
'
TRIP
'
:
if
value_order
==
"
TRIP
"
:
return
rdkit
.
Chem
.
rdchem
.
BondType
(
3
)
return
None
...
...
@@ -393,12 +422,14 @@ def _atom_chiral_tag(tag):
Returns:
rdkit.Chem.ChiralType: Chiral center in RDKit language.
"""
if
tag
==
'N'
:
if
tag
==
"N"
:
return
rdkit
.
Chem
.
ChiralType
.
CHI_UNSPECIFIED
if
tag
==
'S'
:
if
tag
==
"S"
:
return
rdkit
.
Chem
.
ChiralType
.
CHI_TETRAHEDRAL_CCW
if
tag
==
'R'
:
if
tag
==
"R"
:
return
rdkit
.
Chem
.
ChiralType
.
CHI_TETRAHEDRAL_CW
return
rdkit
.
Chem
.
ChiralType
.
CHI_UNSPECIFIED
# endregion parse mmcif
pdbeccdutils/core/ccd_writer.py
View file @
0387f0d4
...
...
@@ -116,7 +116,7 @@ def to_pdb_str(
)
info
=
rdkit
.
Chem
.
rdchem
.
AtomPDBResidueInfo
()
info
.
SetResidueName
(
f
'
{
component
.
id
:
>
3
}
'
)
info
.
SetResidueName
(
f
"
{
component
.
id
:
>
3
}
"
)
info
.
SetTempFactor
(
20.0
)
info
.
SetOccupancy
(
1.0
)
info
.
SetChainId
(
"A"
)
...
...
@@ -929,8 +929,7 @@ def _to_sdf_str_fallback(mol, ccd_id, mappings):
content
+=
[
f
"
{
ccd_id
}
-
{
k
}
conformer"
,
" RDKit 3D"
,
"
\n
"
f
"
{
atom_count
:
>
3
}{
bond_count
:
3
}
0 0 0 0 0 0 0 0999 V2000"
,
"
\n
"
f
"
{
atom_count
:
>
3
}{
bond_count
:
3
}
0 0 0 0 0 0 0 0999 V2000"
,
]
for
i
in
range
(
0
,
atom_count
):
...
...
pdbeccdutils/core/component.py
View file @
0387f0d4
...
...
@@ -88,7 +88,6 @@ class Component:
if
sanitize
:
self
.
_sanitization_issues
=
self
.
_sanitize
()
if
descriptors
is
not
None
:
self
.
_descriptors
=
descriptors
...
...
pdbeccdutils/core/depictions.py
View file @
0387f0d4
...
...
@@ -43,7 +43,11 @@ class DepictionManager:
PubChem templates can be downloaded using PubChemDownloader class.
"""
def
__init__
(
self
,
pubchem_templates_path
:
str
=
''
,
general_templates_path
:
str
=
config
.
general_templates
)
->
None
:
def
__init__
(
self
,
pubchem_templates_path
:
str
=
""
,
general_templates_path
:
str
=
config
.
general_templates
,
)
->
None
:
"""
Initialize component which does the ligand depiction.
If Nones is provided as parameters just the defalt RDKit
...
...
@@ -63,16 +67,20 @@ class DepictionManager:
self
.
coordgen_params
.
coordgenScaling
=
50
/
1.5
self
.
coordgen_params
.
templateFileDir
=
config
.
coordgen_templates
self
.
pubchem_templates
=
pubchem_templates_path
if
os
.
path
.
isdir
(
pubchem_templates_path
)
else
''
self
.
pubchem_templates
=
(
pubchem_templates_path
if
os
.
path
.
isdir
(
pubchem_templates_path
)
else
""
)
self
.
templates
:
Dict
[
str
,
rdkit
.
Chem
.
rdchem
.
Mol
]
=
OrderedDict
()
if
os
.
path
.
isdir
(
general_templates_path
):
for
k
in
sorted
(
os
.
listdir
(
general_templates_path
)):
template
=
self
.
_load_template
(
os
.
path
.
join
(
general_templates_path
,
k
))
template_name
=
k
.
split
(
'.'
)[
0
]
template_name
=
k
.
split
(
"."
)[
0
]
self
.
templates
[
template_name
]
=
template
def
depict_molecule
(
self
,
het_id
:
str
,
mol
:
rdkit
.
Chem
.
rdchem
.
Mol
)
->
DepictionResult
:
def
depict_molecule
(
self
,
het_id
:
str
,
mol
:
rdkit
.
Chem
.
rdchem
.
Mol
)
->
DepictionResult
:
"""
Given input molecule tries to generate its depictions.
...
...
@@ -95,7 +103,11 @@ class DepictionManager:
rdkitMol
=
Chem
.
RWMol
(
temp_mol
).
GetMol
()
results
=
[]
pubchem_res
=
self
.
_get_2D_by_pubchem
(
het_id
,
pubchemMol
)
if
self
.
pubchem_templates
else
None
pubchem_res
=
(
self
.
_get_2D_by_pubchem
(
het_id
,
pubchemMol
)
if
self
.
pubchem_templates
else
None
)
template_res
=
self
.
_get_2D_by_template
(
templateMol
)
if
self
.
templates
else
[]
rdkit_res
=
self
.
_get_2D_by_rdkit
(
rdkitMol
)
...
...
@@ -111,11 +123,12 @@ class DepictionManager:
if
results
:
to_return
=
results
[
0
]
fix_conformer
(
to_return
.
mol
.
GetConformer
(
0
))
return
to_return
return
DepictionResult
(
source
=
DepictionSource
.
Failed
,
template_name
=
''
,
mol
=
None
,
score
=
1000
)
return
DepictionResult
(
source
=
DepictionSource
.
Failed
,
template_name
=
""
,
mol
=
None
,
score
=
1000
)
def
_get_pubchem_template_path
(
self
,
het_id
):
"""Get path to the PubChem template if it exists.
...
...
@@ -126,9 +139,9 @@ class DepictionManager:
Returns:
str: Path to the PubChem layout.
"""
path
=
os
.
path
.
join
(
self
.
pubchem_templates
,
f
'
{
het_id
}
.sdf
'
)
path
=
os
.
path
.
join
(
self
.
pubchem_templates
,
f
"
{
het_id
}
.sdf
"
)
return
path
if
os
.
path
.
isfile
(
path
)
else
''
return
path
if
os
.
path
.
isfile
(
path
)
else
""
def
_load_template
(
self
,
path
):
"""
...
...
@@ -145,14 +158,14 @@ class DepictionManager:
rdkit.Chem.rdchem.Mol: RDKit representation of the template
"""
mol
=
Chem
.
RWMol
()
extension
=
os
.
path
.
basename
(
path
).
split
(
'.'
)[
1
]
extension
=
os
.
path
.
basename
(
path
).
split
(
"."
)[
1
]
if
extension
==
'
sdf
'
:
if
extension
==
"
sdf
"
:
mol
=
Chem
.
MolFromMolFile
(
path
,
sanitize
=
True
,
removeHs
=
True
)
elif
extension
==
'
pdb
'
:
elif
extension
==
"
pdb
"
:
mol
=
Chem
.
MolFromPDBFile
(
path
,
sanitize
=
True
,
removeHs
=
True
)
else
:
raise
ValueError
(
'
Unsupported molecule type
\
'
{}
\'
'
.
format
(
extension
))
raise
ValueError
(
"
Unsupported molecule type '{}'
"
.
format
(
extension
))
p
=
Chem
.
AdjustQueryParameters
()
p
.
makeAtomsGeneric
=
True
...
...
@@ -175,9 +188,13 @@ class DepictionManager:
try
:
rdCoordGen
.
AddCoords
(
mol
,
self
.
coordgen_params
)
flaws
=
DepictionValidator
(
mol
).
depiction_score
()
return
DepictionResult
(
source
=
DepictionSource
.
RDKit
,
template_name
=
None
,
mol
=
mol
,
score
=
flaws
)
return
DepictionResult
(
source
=
DepictionSource
.
RDKit
,
template_name
=
None
,
mol
=
mol
,
score
=
flaws
)
except
Exception
:
return
DepictionResult
(
source
=
DepictionSource
.
Failed
,
template_name
=
None
,
mol
=
None
,
score
=
1000
)
return
DepictionResult
(
source
=
DepictionSource
.
Failed
,
template_name
=
None
,
mol
=
None
,
score
=
1000
)
def
_get_2D_by_pubchem
(
self
,
code
,
mol
):
"""
...
...
@@ -198,11 +215,18 @@ class DepictionManager:
if
mol
.
HasSubstructMatch
(
template
):
AllChem
.
GenerateDepictionMatching2DStructure
(
mol
,
template
)
flaws
=
DepictionValidator
(
mol
).
depiction_score
()
return
DepictionResult
(
source
=
DepictionSource
.
PubChem
,
template_name
=
code
,
mol
=
mol
,
score
=
flaws
)
return
DepictionResult
(
source
=
DepictionSource
.
PubChem
,
template_name
=
code
,
mol
=
mol
,
score
=
flaws
,
)
except
Exception
as
e
:
print
(
str
(
e
),
file
=
sys
.
stderr
)
return
DepictionResult
(
source
=
DepictionSource
.
Failed
,
template_name
=
None
,
mol
=
None
,
score
=
1000
)
return
DepictionResult
(
source
=
DepictionSource
.
Failed
,
template_name
=
None
,
mol
=
None
,
score
=
1000
)
def
_get_2D_by_template
(
self
,
mol
):
"""
...
...
@@ -222,8 +246,14 @@ class DepictionManager:
if
temp_mol
.
HasSubstructMatch
(
template
):
AllChem
.
GenerateDepictionMatching2DStructure
(
temp_mol
,
template
)
flaws
=
DepictionValidator
(
temp_mol
).
depiction_score
()
results
.
append
(
DepictionResult
(
source
=
DepictionSource
.
Template
,
template_name
=
key
,
mol
=
temp_mol
,
score
=
flaws
))
results
.
append
(
DepictionResult
(
source
=
DepictionSource
.
Template
,
template_name
=
key
,
mol
=
temp_mol
,
score
=
flaws
,
)
)
except
Exception
:
pass
# if it fails it fails, but generally it wont
...
...
@@ -240,7 +270,10 @@ class DepictionValidator:
self
.
conformer
=
mol
.
GetConformer
()
self
.
bonds
=
self
.
mol
.
GetBonds
()
atoms
=
[
self
.
conformer
.
GetAtomPosition
(
i