Skip to content

Commit

Permalink
Support V3000 CTAB
Browse files Browse the repository at this point in the history
  • Loading branch information
padix-key committed May 29, 2024
1 parent b762d00 commit 2e02536
Show file tree
Hide file tree
Showing 11 changed files with 937 additions and 133 deletions.
118 changes: 12 additions & 106 deletions src/biotite/structure/io/ctab.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,46 +2,20 @@
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
# information.

"""
Functions for parsing and writing an :class:`AtomArray` from/to
*MDL* connection tables (Ctab).
"""

__name__ = "biotite.structure.io"
__author__ = "Patrick Kunzmann"
__all__ = ["read_structure_from_ctab", "write_structure_to_ctab"]

import warnings
import numpy as np
from ..error import BadStructureError
from ..atoms import AtomArray, AtomArrayStack
from ..bonds import BondList, BondType

BOND_TYPE_MAPPING = {
1: BondType.SINGLE,
2: BondType.DOUBLE,
3: BondType.TRIPLE,
6: BondType.SINGLE,
7: BondType.DOUBLE,
8: BondType.ANY,
}
BOND_TYPE_MAPPING_REV = {
BondType.SINGLE: 1,
BondType.DOUBLE: 2,
BondType.TRIPLE: 3,
BondType.AROMATIC_SINGLE: 1,
BondType.AROMATIC_DOUBLE: 2,
BondType.ANY: 8,
}

CHARGE_MAPPING = {0: 0, 1: 3, 2: 2, 3: 1, 5: -1, 6: -2, 7: -3}
CHARGE_MAPPING_REV = {val: key for key, val in CHARGE_MAPPING.items()}
from ..bonds import BondType


def read_structure_from_ctab(ctab_lines):
"""
Parse a *MDL* connection table (Ctab) to obtain an
:class:`AtomArray`. :footcite:`Dalby1992`
:class:`AtomArray`. :footcite:`Dalby1992`.
DEPRECATED: Moved to :mod:`biotite.structure.io.mol.ctab`.
Parameters
----------
Expand All @@ -60,48 +34,18 @@ def read_structure_from_ctab(ctab_lines):
.. footbibliography::
"""
n_atoms, n_bonds = _get_counts(ctab_lines[0])
atom_lines = ctab_lines[1 : 1 + n_atoms]
bond_lines = ctab_lines[1 + n_atoms : 1 + n_atoms + n_bonds]

atoms = AtomArray(n_atoms)
atoms.add_annotation("charge", int)
for i, line in enumerate(atom_lines):
atoms.coord[i, 0] = float(line[0:10])
atoms.coord[i, 1] = float(line[10:20])
atoms.coord[i, 2] = float(line[20:30])
atoms.element[i] = line[31:34].strip().upper()
charge = CHARGE_MAPPING.get(int(line[36:39]))
if charge is None:
warnings.warn(
f"Cannot handle MDL charge type {int(line[36 : 39])}, "
f"0 is used instead"
)
charge = 0
atoms.charge[i] = charge

bond_array = np.zeros((n_bonds, 3), dtype=np.uint32)
for i, line in enumerate(bond_lines):
bond_type = BOND_TYPE_MAPPING.get(int(line[6:9]))
if bond_type is None:
warnings.warn(
f"Cannot handle MDL bond type {int(line[6 : 9])}, "
f"BondType.ANY is used instead"
)
bond_type = BondType.ANY
bond_array[i, 0] = int(line[0:3]) - 1
bond_array[i, 1] = int(line[3:6]) - 1
bond_array[i, 2] = bond_type
atoms.bonds = BondList(n_atoms, bond_array)

return atoms
warnings.warn("Moved to biotite.structure.io.mol.ctab", DeprecationWarning)
from biotite.structure.io.mol.ctab import read_structure_from_ctab
return read_structure_from_ctab(ctab_lines)


def write_structure_to_ctab(atoms, default_bond_type=BondType.ANY):
"""
Convert an :class:`AtomArray` into a
*MDL* connection table (Ctab). :footcite:`Dalby1992`
DEPRECATED: Moved to :mod:`biotite.structure.io.mol.ctab`.
Parameters
----------
atoms : AtomArray
Expand All @@ -123,44 +67,6 @@ def write_structure_to_ctab(atoms, default_bond_type=BondType.ANY):
.. footbibliography::
"""
if isinstance(atoms, AtomArrayStack):
raise TypeError(
"An 'AtomArrayStack' was given, "
"but only a single model can be written"
)
if atoms.bonds is None:
raise BadStructureError("Input AtomArray has no associated BondList")

try:
charge = atoms.charge
except AttributeError:
charge = np.zeros(atoms.array_length(), dtype=int)

atom_lines = [
f"{atoms.coord[i,0]:>10.5f}"
f"{atoms.coord[i,1]:>10.5f}"
f"{atoms.coord[i,2]:>10.5f}"
f" {atoms.element[i]:>3}"
f" {CHARGE_MAPPING_REV.get(charge[i], 0):>3d}" + f"{0:>3d}" * 10
for i in range(atoms.array_length())
]

default_bond_value = BOND_TYPE_MAPPING_REV[default_bond_type]

bond_lines = [
f"{i+1:>3d}{j+1:>3d}"
f"{BOND_TYPE_MAPPING_REV.get(bond_type, default_bond_value):>3d}"
+ f"{0:>3d}" * 4
for i, j, bond_type in atoms.bonds.as_array()
]

counts_line = (
f"{len(atom_lines):>3d}{len(bond_lines):>3d}"
" 0 0 0 0 0 0 0 1 V2000"
)

return [counts_line] + atom_lines + bond_lines + ["M END"]


def _get_counts(counts_line):
return int(counts_line[0:3]), int(counts_line[3:6])
warnings.warn("Moved to biotite.structure.io.mol.ctab", DeprecationWarning)
from biotite.structure.io.mol.ctab import write_structure_to_ctab
return write_structure_to_ctab(atoms, default_bond_type)
13 changes: 10 additions & 3 deletions src/biotite/structure/io/mol/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ def get_structure(mol_file):
return mol_file.get_structure()


def set_structure(mol_file, atoms, default_bond_type=BondType.ANY):
def set_structure(mol_file, atoms, default_bond_type=BondType.ANY,
version=None):
"""
Set the :class:`AtomArray` for the MOL file.
Expand All @@ -46,6 +47,12 @@ def set_structure(mol_file, atoms, default_bond_type=BondType.ANY):
array : AtomArray
The array to be saved into this file.
Must have an associated :class:`BondList`.
version : {"V2000", "V3000"}, optional
The version of the CTAB format.
``"V2000"`` uses the *Atom* and *Bond* block, while ``"V3000"``
uses the *Properties* block.
By default, ``"V2000"`` is used unless the number of atoms or
bonds exceed the fixed size columns in the table, in which case
``"V3000"`` is used.
"""
mol_file.set_structure(atoms, default_bond_type)
mol_file.set_structure(atoms, default_bond_type, version)
Loading

0 comments on commit 2e02536

Please sign in to comment.