Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix unbounded coordinates in MOL/SDF files #772

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions src/biotite/structure/io/mol/ctab.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from biotite.structure.atoms import AtomArray, AtomArrayStack
from biotite.structure.bonds import BondList, BondType
from biotite.structure.error import BadStructureError
from biotite.structure.io.util import number_of_integer_digits

BOND_TYPE_MAPPING = {
1: BondType.SINGLE,
Expand Down Expand Up @@ -297,6 +298,13 @@ def _write_structure_to_ctab_v2000(atoms, default_bond_type):
" 0 0 0 0 0 0 0 1 V2000"
)

for i, coord_name in enumerate(["x", "y", "z"]):
n_coord_digits = number_of_integer_digits(atoms.coord[:, i])
if n_coord_digits > 5:
raise BadStructureError(
f"5 pre-decimal columns for {coord_name}-coordinates are "
f"available, but array would require {n_coord_digits}"
)
atom_lines = [
f"{atoms.coord[i, 0]:>10.4f}"
f"{atoms.coord[i, 1]:>10.4f}"
Expand Down Expand Up @@ -341,6 +349,13 @@ def _write_structure_to_ctab_v3000(atoms, default_bond_type):

counts_line = f"COUNTS {atoms.array_length()} {atoms.bonds.get_bond_count()} 0 0 0"

for i, coord_name in enumerate(["x", "y", "z"]):
n_coord_digits = number_of_integer_digits(atoms.coord[:, i])
if n_coord_digits > 5:
raise BadStructureError(
f"5 pre-decimal columns for {coord_name}-coordinates are "
f"available, but array would require {n_coord_digits}"
)
atom_lines = [
f"{i + 1}"
f" {_quote(atoms.element[i].capitalize())}"
Expand Down
21 changes: 5 additions & 16 deletions src/biotite/structure/io/pdb/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
encode_hybrid36,
max_hybrid36_number,
)
from biotite.structure.io.util import number_of_integer_digits
from biotite.structure.repair import infer_elements
from biotite.structure.util import matrix_rotate

Expand Down Expand Up @@ -1248,43 +1249,31 @@ def _check_pdb_compatibility(array, hybrid36):
if any([len(name) > 4 for name in array.atom_name]):
raise BadStructureError("Some atom names exceed 4 characters")
for i, coord_name in enumerate(["x", "y", "z"]):
n_coord_digits = _number_of_integer_digits(array.coord[..., i])
n_coord_digits = number_of_integer_digits(array.coord[..., i])
if n_coord_digits > 4:
raise BadStructureError(
f"4 pre-decimal columns for {coord_name}-coordinates are "
f"available, but array would require {n_coord_digits}"
)
if "b_factor" in annot_categories:
n_b_factor_digits = _number_of_integer_digits(array.b_factor)
n_b_factor_digits = number_of_integer_digits(array.b_factor)
if n_b_factor_digits > 3:
raise BadStructureError(
"3 pre-decimal columns for B-factor are available, "
f"but array would require {n_b_factor_digits}"
)
if "occupancy" in annot_categories:
n_occupancy_digits = _number_of_integer_digits(array.occupancy)
n_occupancy_digits = number_of_integer_digits(array.occupancy)
if n_occupancy_digits > 3:
raise BadStructureError(
"3 pre-decimal columns for occupancy are available, "
f"but array would require {n_occupancy_digits}"
)
if "charge" in annot_categories:
# The sign can be omitted is it is put into the adjacent column
n_charge_digits = _number_of_integer_digits(np.abs(array.charge))
n_charge_digits = number_of_integer_digits(np.abs(array.charge))
if n_charge_digits > 1:
raise BadStructureError(
"1 column for charge is available, "
f"but array would require {n_charge_digits}"
)


def _number_of_integer_digits(values):
"""
Get the maximum number of characters needed to represent the
pre-decimal positions of the given numeric values.
"""
values = values.astype(int, copy=False)
n_digits = 0
n_digits = max(n_digits, len(str(np.min(values))))
n_digits = max(n_digits, len(str(np.max(values))))
return n_digits
38 changes: 38 additions & 0 deletions src/biotite/structure/io/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# This source code is part of the Biotite package and is distributed
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
# information.

"""
Common functions used by a number of subpackages.
"""

__name__ = "biotite.structure.io"
__author__ = "Patrick Kunzmann"
__all__ = ["number_of_integer_digits"]

import numpy as np


def number_of_integer_digits(values):
"""
Get the maximum number of characters needed to represent the
pre-decimal positions of the given numeric values.

Parameters
----------
values : ndarray, dtype=float
The values to be checked.

Returns
-------
n_digits : int
The maximum number of characters needed to represent the
pre-decimal positions of the given numeric values.
"""
if len(values) == 0:
return 0
values = values.astype(int, copy=False)
n_digits = 0
n_digits = max(n_digits, len(str(np.min(values))))
n_digits = max(n_digits, len(str(np.max(values))))
return n_digits
Loading