Commit 0673003f authored by Lukas Pravda's avatar Lukas Pravda
Browse files

fix leading H missing in certain json annotations

2N -> H2N
parent a36925f7
......@@ -23,13 +23,13 @@ import re
import xml.etree.ElementTree as ET
from collections import OrderedDict
from sys import platform
from numpy.core.multiarray import result_type
import rdkit
from PIL import Image, ImageDraw, ImageFont
from scipy.spatial import KDTree
svg_namespace = {"svg": ""}
def save_no_image(path_to_image, default_msg=None, width=200):
......@@ -62,9 +62,9 @@ def draw_molecule(mol, drawer, file_name, wedge_bonds, atom_highlight, bond_high
with parameters for drawing depiction.
file_name (str): Path where the depiction will be saved.
wedge_bonds (bool): Whether or not to wedge bonds.
atom_highlight (`obj`: Dict): Dictionary with atom id and RGB
atom_highlight (dict): Dictionary with atom id and RGB
mapping color mapping.
bond_highlight (`obj`: Dict): Dictionary with mapping of atom
bond_highlight (dict): Dictionary with mapping of atom
ids and RGB colors.
......@@ -109,7 +109,7 @@ def get_drawing_scale(mol):
mol (rdkit.Chem.rdchem.Mol): Rdkit mol object.
[:obj:`tuple` of :obj:`int`]: Dimension of the depictions.
[tuple[int,int]]: Dimension of the depictions (x, y).
cnf = mol.GetConformer()
......@@ -135,11 +135,11 @@ def convert_svg(svg_string, ccd_id, mol: rdkit.Chem.Mol):
svg_string (str): SVG as string.
ccd_id (str): CCD ID.
mol (rdkit.Chem.Mol): RDKit mol object used for depiction.
:obj:`dict` of :obj:`dict`: object with all the details for
json serialization.
dict: JSON-style information for the 2D depiction.
result_bag = OrderedDict(
......@@ -148,22 +148,22 @@ def convert_svg(svg_string, ccd_id, mol: rdkit.Chem.Mol):
svg_string = _fix_svg(svg_string)
svg = ET.fromstring(svg_string)
atom_elem = svg.findall("{}circle")
bond_elem = svg.findall("{}path")
dimensions_svg = svg.find("{}rect")
label_elem = svg.findall("{}text")
atom_elem = svg.findall("svg:circle", svg_namespace)
bond_elem = svg.findall("svg:path", svg_namespace)
dimensions_svg = svg.find("svg:rect", svg_namespace)
label_elem = svg.findall("svg:text", svg_namespace)
kd_tree = None
atoms = _parse_atoms_from_svg(atom_elem, mol)
atoms = _parse_atoms_from_svg(atom_elem, mol)
bonds = _parse_bonds_from_svg(bond_elem, mol)
atom_centers = [[atom["x"], atom["y"]] for atom in atoms]
kd_tree = KDTree(atom_centers)
_parse_labels_from_svg(label_elem, kd_tree, atoms)
result_bag['atoms'] = atoms
result_bag['bonds'] = bonds
result_bag["atoms"] = atoms
result_bag["bonds"] = bonds
result_bag["resolution"] = {
"x": float(dimensions_svg.attrib.get("width")),
......@@ -174,31 +174,31 @@ def convert_svg(svg_string, ccd_id, mol: rdkit.Chem.Mol):
def _parse_atoms_from_svg(atom_elements, mol: rdkit.Chem.Mol):
"""Extract atoms from the SVG atom elements
atom_elements ([type]): [description]
mol (rdkit.Chem.Mol): [description]
atom_elements (list[xml.etree.ElementTree.Element]): List of extracted XML elements
mol (rdkit.Chem.rdchem.Mol): RDkit molecule
[type]: [description]
list[dict]: list of JSON-style atom representation.
result = []
for atom_svg in atom_elements:
atom_id_str ="\d+", atom_svg.attrib.get("class")).group(0)
atom_id = int(atom_id_str)
atom_id_str ="\d+", atom_svg.attrib.get("class")).group(0)
atom_id = int(atom_id_str)
if atom_id >= mol.GetNumAtoms():
if atom_id >= mol.GetNumAtoms():
temp = {
"name": mol.GetAtomWithIdx(atom_id).GetProp("name"),
"labels": [],
"x": float(atom_svg.attrib.get("cx")),
"y": float(atom_svg.attrib.get("cy")),
temp = {
"name": mol.GetAtomWithIdx(atom_id).GetProp("name"),
"labels": [],
"x": float(atom_svg.attrib.get("cx")),
"y": float(atom_svg.attrib.get("cy")),
return result
......@@ -206,10 +206,11 @@ def _parse_labels_from_svg(label_elements, kd_tree, atoms):
"""Parse atom label information from the SVG.
label_elements ([type]):
label_elements (list[xml.etree.ElementTree.Element]):
List of SVG circle elements with atom information.
kd_tree (KDTree): Kdtree with atom proximities
atoms (list of dict): JSON representation of atoms
atoms (list[dict]): JSON-style representation of atoms.
for label_svg in label_elements:
x = label_svg.attrib.get("x")
y = label_svg.attrib.get("y")
......@@ -225,38 +226,41 @@ def _parse_labels_from_svg(label_elements, kd_tree, atoms):
"text-anchor": label_svg.attrib.get("text-anchor"),
"tspans": [],
filtered_tspans = filter(
lambda x: x.text is not None,
filtered_tspans = [
x for x in label_svg.findall("svg:tspan", svg_namespace) if x.text
for tspan in filtered_tspans:
if tspan.text == "H": # get rid of H as we do not have any connection to them anyway in 2D.
tspan_style = tspan.attrib.get("style")
if tspan.text == "H" and len(filtered_tspans) == 1:
# get rid of H's as we do not have any connection to them anyway in 2D.
tspan_item = {
"value": tspan.text,
"style": ""
if tspan.attrib.get("style") is None
else tspan.attrib.get("style"),
"style": tspan_style if tspan_style else "",
nearest_index = kd_tree.query([temp["x"], temp["y"]])[1]
if temp['tspans']:
if temp["tspans"]:
def _parse_bonds_from_svg(bond_elements, mol):
"""Extract bonding information from SVG elements
bond_elements ([type]): [description]
mol ([type]): [description]
bond_elements (list[xml.etree.ElementTree.Element]):
List of SVG path elements.
mol (rdkit.Chem.rdchem.Mol): [description]
[type]: [description]
list[dict]: JSON-style formated bond informations
result = []
for bond_svg in bond_elements:
if (
......@@ -277,11 +281,10 @@ def _parse_bonds_from_svg(bond_elements, mol):
"coords": bond_svg.attrib.get("d"),
"style": bond_svg.attrib.get("style"),
return result
return result
def _fix_svg(svg_string):
......@@ -4,10 +4,12 @@
import json
import os
import xml.etree.ElementTree as ET
import pytest
from pdbeccdutils.core import ccd_reader
from pdbeccdutils.core.component import Component
from pdbeccdutils.helpers.drawing import save_no_image, svg_namespace
from pdbeccdutils.core.depictions import DepictionManager, DepictionSource
from pdbeccdutils.tests.tst_utilities import cif_filename
......@@ -129,8 +131,10 @@ class TestWriteImg:
for atom in json_obj["atoms"]:
for l in atom["labels"]:
for t in l["tspans"]:
assert t['value'] != "H" # we do not have any H labels, because we dont have links to them.
h_tspans = sum(1 for x in l["tspans"] if x == "H")
assert h_tspans < len(
) # we do not have bare H labels, because we dont have links to them.
assert all(
bond["bgn"] in atom_names and bond["end"] in atom_names
......@@ -140,3 +144,23 @@ class TestWriteImg:
bond["coords"] for bond in json_obj["bonds"]
) # do we have coordinates?
assert all(bond["style"] for bond in json_obj["bonds"]) # and its stylling?
def test_no_image_svg(tmpdir):
value = "foo"
svg = str(tmpdir.join("test.svg"))
save_no_image(svg, default_msg="foo")
assert os.path.isfile(svg)
xml = ET.parse(svg)
text = xml.find("svg:text", svg_namespace)
assert text.text.strip() == value
def test_no_image_png(tmpdir):
png = str(tmpdir.join('test.png'))
assert os.path.join(png)
\ No newline at end of file
