From 2e02536d83e797c09a4a896b5041e6b4c455a56d Mon Sep 17 00:00:00 2001
From: Patrick Kunzmann <padix.key@gmail.com>
Date: Mon, 20 May 2024 12:16:00 +0200
Subject: [PATCH] Support V3000 CTAB

---
 src/biotite/structure/io/ctab.py              | 118 +-----
 src/biotite/structure/io/mol/convert.py       |  13 +-
 src/biotite/structure/io/mol/ctab.py          | 368 ++++++++++++++++++
 src/biotite/structure/io/mol/file.py          |  22 +-
 tests/structure/data/molecules/03F.v3000.sdf  | 285 ++++++++++++++
 tests/structure/data/molecules/CYN.v3000.sdf  |  16 +
 tests/structure/data/molecules/HWB.v3000.sdf  |  79 ++++
 tests/structure/data/molecules/README.rst     |   5 +-
 tests/structure/data/molecules/TYR.v3000.sdf  |  61 +++
 .../data/molecules/create_v3000_sdf.py        |  14 +
 tests/structure/test_mol.py                   |  89 ++++-
 11 files changed, 937 insertions(+), 133 deletions(-)
 create mode 100644 src/biotite/structure/io/mol/ctab.py
 create mode 100644 tests/structure/data/molecules/03F.v3000.sdf
 create mode 100644 tests/structure/data/molecules/CYN.v3000.sdf
 create mode 100644 tests/structure/data/molecules/HWB.v3000.sdf
 create mode 100644 tests/structure/data/molecules/TYR.v3000.sdf
 create mode 100644 tests/structure/data/molecules/create_v3000_sdf.py

diff --git a/src/biotite/structure/io/ctab.py b/src/biotite/structure/io/ctab.py
index ed253c069..f2dc61982 100644
--- a/src/biotite/structure/io/ctab.py
+++ b/src/biotite/structure/io/ctab.py
@@ -2,46 +2,20 @@
 # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
 # information.
 
-"""
-Functions for parsing and writing an :class:`AtomArray` from/to
-*MDL* connection tables (Ctab).
-"""
-
 __name__ = "biotite.structure.io"
 __author__ = "Patrick Kunzmann"
 __all__ = ["read_structure_from_ctab", "write_structure_to_ctab"]
 
 import warnings
-import numpy as np
-from ..error import BadStructureError
-from ..atoms import AtomArray, AtomArrayStack
-from ..bonds import BondList, BondType
-
-BOND_TYPE_MAPPING = {
-    1: BondType.SINGLE,
-    2: BondType.DOUBLE,
-    3: BondType.TRIPLE,
-    6: BondType.SINGLE,
-    7: BondType.DOUBLE,
-    8: BondType.ANY,
-}
-BOND_TYPE_MAPPING_REV = {
-    BondType.SINGLE: 1,
-    BondType.DOUBLE: 2,
-    BondType.TRIPLE: 3,
-    BondType.AROMATIC_SINGLE: 1,
-    BondType.AROMATIC_DOUBLE: 2,
-    BondType.ANY: 8,
-}
-
-CHARGE_MAPPING = {0: 0, 1: 3, 2: 2, 3: 1, 5: -1, 6: -2, 7: -3}
-CHARGE_MAPPING_REV = {val: key for key, val in CHARGE_MAPPING.items()}
+from ..bonds import BondType
 
 
 def read_structure_from_ctab(ctab_lines):
     """
     Parse a *MDL* connection table (Ctab) to obtain an
-    :class:`AtomArray`. :footcite:`Dalby1992`
+    :class:`AtomArray`. :footcite:`Dalby1992`.
+
+    DEPRECATED: Moved to :mod:`biotite.structure.io.mol.ctab`.
 
     Parameters
     ----------
@@ -60,41 +34,9 @@ def read_structure_from_ctab(ctab_lines):
 
     .. footbibliography::
     """
-    n_atoms, n_bonds = _get_counts(ctab_lines[0])
-    atom_lines = ctab_lines[1 : 1 + n_atoms]
-    bond_lines = ctab_lines[1 + n_atoms : 1 + n_atoms + n_bonds]
-
-    atoms = AtomArray(n_atoms)
-    atoms.add_annotation("charge", int)
-    for i, line in enumerate(atom_lines):
-        atoms.coord[i, 0] = float(line[0:10])
-        atoms.coord[i, 1] = float(line[10:20])
-        atoms.coord[i, 2] = float(line[20:30])
-        atoms.element[i] = line[31:34].strip().upper()
-        charge = CHARGE_MAPPING.get(int(line[36:39]))
-        if charge is None:
-            warnings.warn(
-                f"Cannot handle MDL charge type {int(line[36 : 39])}, "
-                f"0 is used instead"
-            )
-            charge = 0
-        atoms.charge[i] = charge
-
-    bond_array = np.zeros((n_bonds, 3), dtype=np.uint32)
-    for i, line in enumerate(bond_lines):
-        bond_type = BOND_TYPE_MAPPING.get(int(line[6:9]))
-        if bond_type is None:
-            warnings.warn(
-                f"Cannot handle MDL bond type {int(line[6 : 9])}, "
-                f"BondType.ANY is used instead"
-            )
-            bond_type = BondType.ANY
-        bond_array[i, 0] = int(line[0:3]) - 1
-        bond_array[i, 1] = int(line[3:6]) - 1
-        bond_array[i, 2] = bond_type
-    atoms.bonds = BondList(n_atoms, bond_array)
-
-    return atoms
+    warnings.warn("Moved to biotite.structure.io.mol.ctab", DeprecationWarning)
+    from biotite.structure.io.mol.ctab import read_structure_from_ctab
+    return read_structure_from_ctab(ctab_lines)
 
 
 def write_structure_to_ctab(atoms, default_bond_type=BondType.ANY):
@@ -102,6 +44,8 @@ def write_structure_to_ctab(atoms, default_bond_type=BondType.ANY):
     Convert an :class:`AtomArray` into a
     *MDL* connection table (Ctab). :footcite:`Dalby1992`
 
+    DEPRECATED: Moved to :mod:`biotite.structure.io.mol.ctab`.
+
     Parameters
     ----------
     atoms : AtomArray
@@ -123,44 +67,6 @@ def write_structure_to_ctab(atoms, default_bond_type=BondType.ANY):
 
     .. footbibliography::
     """
-    if isinstance(atoms, AtomArrayStack):
-        raise TypeError(
-            "An 'AtomArrayStack' was given, "
-            "but only a single model can be written"
-        )
-    if atoms.bonds is None:
-        raise BadStructureError("Input AtomArray has no associated BondList")
-
-    try:
-        charge = atoms.charge
-    except AttributeError:
-        charge = np.zeros(atoms.array_length(), dtype=int)
-
-    atom_lines = [
-        f"{atoms.coord[i,0]:>10.5f}"
-        f"{atoms.coord[i,1]:>10.5f}"
-        f"{atoms.coord[i,2]:>10.5f}"
-        f" {atoms.element[i]:>3}"
-        f"  {CHARGE_MAPPING_REV.get(charge[i], 0):>3d}" + f"{0:>3d}" * 10
-        for i in range(atoms.array_length())
-    ]
-
-    default_bond_value = BOND_TYPE_MAPPING_REV[default_bond_type]
-
-    bond_lines = [
-        f"{i+1:>3d}{j+1:>3d}"
-        f"{BOND_TYPE_MAPPING_REV.get(bond_type, default_bond_value):>3d}"
-        + f"{0:>3d}" * 4
-        for i, j, bond_type in atoms.bonds.as_array()
-    ]
-
-    counts_line = (
-        f"{len(atom_lines):>3d}{len(bond_lines):>3d}"
-        "  0     0  0  0  0  0  0  1 V2000"
-    )
-
-    return [counts_line] + atom_lines + bond_lines + ["M  END"]
-
-
-def _get_counts(counts_line):
-    return int(counts_line[0:3]), int(counts_line[3:6])
+    warnings.warn("Moved to biotite.structure.io.mol.ctab", DeprecationWarning)
+    from biotite.structure.io.mol.ctab import write_structure_to_ctab
+    return write_structure_to_ctab(atoms, default_bond_type)
diff --git a/src/biotite/structure/io/mol/convert.py b/src/biotite/structure/io/mol/convert.py
index dc6d450b3..32ac1180a 100644
--- a/src/biotite/structure/io/mol/convert.py
+++ b/src/biotite/structure/io/mol/convert.py
@@ -32,7 +32,8 @@ def get_structure(mol_file):
     return mol_file.get_structure()
 
 
-def set_structure(mol_file, atoms, default_bond_type=BondType.ANY):
+def set_structure(mol_file, atoms, default_bond_type=BondType.ANY,
+                  version=None):
     """
     Set the :class:`AtomArray` for the MOL file.
 
@@ -46,6 +47,12 @@ def set_structure(mol_file, atoms, default_bond_type=BondType.ANY):
     array : AtomArray
         The array to be saved into this file.
         Must have an associated :class:`BondList`.
-
+    version : {"V2000", "V3000"}, optional
+        The version of the CTAB format.
+        ``"V2000"`` uses the *Atom* and *Bond* block, while ``"V3000"``
+        uses the *Properties* block.
+        By default, ``"V2000"`` is used unless the number of atoms or
+        bonds exceed the fixed size columns in the table, in which case
+        ``"V3000"`` is used.
     """
-    mol_file.set_structure(atoms, default_bond_type)
+    mol_file.set_structure(atoms, default_bond_type, version)
diff --git a/src/biotite/structure/io/mol/ctab.py b/src/biotite/structure/io/mol/ctab.py
new file mode 100644
index 000000000..54eef19b4
--- /dev/null
+++ b/src/biotite/structure/io/mol/ctab.py
@@ -0,0 +1,368 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+
+"""
+Functions for parsing and writing an :class:`AtomArray` from/to
+*MDL* connection tables (Ctab).
+"""
+
+__name__ = "biotite.structure.io.mol"
+__author__ = "Patrick Kunzmann"
+__all__ = ["read_structure_from_ctab", "write_structure_to_ctab"]
+
+import warnings
+import shlex
+import numpy as np
+from ....file import InvalidFileError
+from ...error import BadStructureError
+from ...atoms import AtomArray, AtomArrayStack
+from ...bonds import BondList, BondType
+
+BOND_TYPE_MAPPING = {
+    1: BondType.SINGLE,
+    2: BondType.DOUBLE,
+    3: BondType.TRIPLE,
+    5: BondType.ANY,
+    6: BondType.SINGLE,
+    7: BondType.DOUBLE,
+    8: BondType.ANY,
+}
+BOND_TYPE_MAPPING_REV = {
+    BondType.SINGLE: 1,
+    BondType.DOUBLE: 2,
+    BondType.TRIPLE: 3,
+    BondType.AROMATIC_SINGLE: 1,
+    BondType.AROMATIC_DOUBLE: 2,
+    BondType.ANY: 8,
+}
+
+CHARGE_MAPPING = {0: 0, 1: 3, 2: 2, 3: 1, 5: -1, 6: -2, 7: -3}
+CHARGE_MAPPING_REV = {val: key for key, val in CHARGE_MAPPING.items()}
+
+V2000_COMPATIBILITY_LINE = "  0  0  0  0  0  0  0  0  0  0999 V3000"
+
+
+def read_structure_from_ctab(ctab_lines):
+    """
+    Parse a *MDL* connection table (Ctab) to obtain an
+    :class:`AtomArray`.
+    :footcite:`Dalby1992`
+
+    Parameters
+    ----------
+    ctab_lines : lines of str
+        The lines containing the *ctab*.
+        Must begin with the *counts* line and end with the `M END` line
+
+    Returns
+    -------
+    atoms : AtomArray
+        This :class:`AtomArray` contains the optional ``charge``
+        annotation and has an associated :class:`BondList`.
+
+    References
+    ----------
+
+    ``V3000`` specification was taken from
+    `<https://discover.3ds.com/sites/default/files/2020-08/biovia_ctfileformats_2020.pdf>`_.
+
+    .. footbibliography::
+
+    """
+    match _get_version(ctab_lines[0]):
+        case "V2000":
+            return _read_structure_from_ctab_v2000(ctab_lines)
+        case "V3000":
+            return _read_structure_from_ctab_v3000(ctab_lines)
+        case "":
+            raise InvalidFileError("CTAB counts line misses version")
+        case unkown_version:
+            raise InvalidFileError(f"Unknown CTAB version '{unkown_version}'")
+
+
+def write_structure_to_ctab(atoms, default_bond_type=BondType.ANY,
+                            version=None):
+    """
+    Convert an :class:`AtomArray` into a
+    *MDL* connection table (Ctab).
+    :footcite:`Dalby1992`
+
+    Parameters
+    ----------
+    atoms : AtomArray
+        The array must have an associated :class:`BondList`.
+
+    Returns
+    -------
+    ctab_lines : lines of str
+        The lines containing the *ctab*.
+        The lines begin with the *counts* line and end with the `M END`
+        .line
+    default_bond_type : BondType
+        Bond type fallback in the *Bond block* if a bond has no bond_type
+        defined in *atoms* array. By default, each bond is treated as
+        :attr:`BondType.ANY`.
+    version : {"V2000", "V3000"}, optional
+        The version of the CTAB format.
+        ``"V2000"`` uses the *Atom* and *Bond* block, while ``"V3000"``
+        uses the *Properties* block.
+        By default, ``"V2000"`` is used unless the number of atoms or
+        bonds exceeds 1000, in which case ``"V3000"`` is used.
+
+    References
+    ----------
+
+    ``V3000`` specification was taken from
+    `<https://discover.3ds.com/sites/default/files/2020-08/biovia_ctfileformats_2020.pdf>`_.
+
+    .. footbibliography::
+
+    """
+    if isinstance(atoms, AtomArrayStack):
+        raise TypeError(
+            "An 'AtomArrayStack' was given, "
+            "but only a single model can be written"
+        )
+    if atoms.bonds is None:
+        raise BadStructureError("Input AtomArray has no associated BondList")
+    if np.isnan(atoms.coord).any():
+        raise BadStructureError("Input AtomArray has NaN coordinates")
+
+    match version:
+        case None:
+            if _is_v2000_compatible(
+                atoms.array_length(), atoms.bonds.get_bond_count()
+            ):
+                return _write_structure_to_ctab_v2000(atoms, default_bond_type)
+            else:
+                return _write_structure_to_ctab_v3000(atoms, default_bond_type)
+        case "V2000":
+            if not _is_v2000_compatible(
+                atoms.array_length(), atoms.bonds.get_bond_count()
+            ):
+                raise ValueError(
+                    "The given number of atoms or bonds is too large "
+                    "for V2000 format"
+                )
+            return _write_structure_to_ctab_v2000(atoms, default_bond_type)
+        case "V3000":
+            return _write_structure_to_ctab_v3000(atoms, default_bond_type)
+        case unkown_version:
+            raise ValueError(f"Unknown CTAB version '{unkown_version}'")
+
+
+def _read_structure_from_ctab_v2000(ctab_lines):
+    n_atoms, n_bonds = _get_counts_v2000(ctab_lines[0])
+    atom_lines = ctab_lines[1 : 1 + n_atoms]
+    bond_lines = ctab_lines[1 + n_atoms : 1 + n_atoms + n_bonds]
+
+    atoms = AtomArray(n_atoms)
+    atoms.add_annotation("charge", int)
+    for i, line in enumerate(atom_lines):
+        atoms.coord[i, 0] = float(line[0:10])
+        atoms.coord[i, 1] = float(line[10:20])
+        atoms.coord[i, 2] = float(line[20:30])
+        atoms.element[i] = line[31:34].strip().upper()
+        charge = CHARGE_MAPPING.get(int(line[36:39]))
+        if charge is None:
+            warnings.warn(
+                f"Cannot handle MDL charge type {int(line[36 : 39])}, "
+                f"0 is used instead"
+            )
+            charge = 0
+        atoms.charge[i] = charge
+
+    bond_array = np.zeros((n_bonds, 3), dtype=np.uint32)
+    for i, line in enumerate(bond_lines):
+        bond_type = BOND_TYPE_MAPPING.get(int(line[6:9]))
+        if bond_type is None:
+            warnings.warn(
+                f"Cannot handle MDL bond type {int(line[6 : 9])}, "
+                f"BondType.ANY is used instead"
+            )
+            bond_type = BondType.ANY
+        bond_array[i, 0] = int(line[0:3]) - 1
+        bond_array[i, 1] = int(line[3:6]) - 1
+        bond_array[i, 2] = bond_type
+    atoms.bonds = BondList(n_atoms, bond_array)
+
+    return atoms
+
+def _read_structure_from_ctab_v3000(ctab_lines):
+    v30_lines = [
+        line[6:].strip() for line in ctab_lines if line.startswith("M  V30")
+    ]
+
+    atom_lines = _get_block_v3000(v30_lines, "ATOM")
+    if len(atom_lines) == 0:
+        raise InvalidFileError("ATOM block is empty")
+    atoms = AtomArray(len(atom_lines))
+    atoms.add_annotation("charge", int)
+    # The V3000 atom index does not necessarily count from 1 to n,
+    # but allows arbitrary positive integers
+    # Hence, a mapping from V3000 atom index to AtomArray index is
+    # needed to get the correct index for a bond
+    v30_atom_indices = {}
+    for i, line in enumerate(atom_lines):
+        if "'" in line or '"' in line:
+            columns = shlex.split(line)
+        else:
+            columns = line.split()
+        v30_index = int(columns[0])
+        v30_type = columns[1]
+        if v30_type == "R#":
+            raise NotImplementedError("Rgroup atoms are not supported")
+        v30_coord = np.array(columns[2:5], dtype=float)
+        v30_properties = create_property_dict_v3000(columns[6:])
+
+        v30_atom_indices[v30_index] = i
+        atoms.coord[i] = v30_coord
+        atoms.element[i] = v30_type.upper()
+        atoms.charge[i] = int(v30_properties.get("CHG", 0))
+
+    bond_lines = _get_block_v3000(v30_lines, "BOND")
+    bond_array = np.zeros((len(bond_lines), 3), dtype=np.uint32)
+    for i, line in enumerate(bond_lines):
+        columns = line.split()
+        v30_type = int(columns[1])
+        v30_atom_index_1 = int(columns[2])
+        v30_atom_index_2 = int(columns[3])
+
+        bond_type = BOND_TYPE_MAPPING.get(v30_type)
+        if bond_type is None:
+            warnings.warn(
+                f"Cannot handle MDL bond type {v30_type}, "
+                f"BondType.ANY is used instead"
+            )
+            bond_type = BondType.ANY
+        bond_array[i, 0] = v30_atom_indices[v30_atom_index_1]
+        bond_array[i, 1] = v30_atom_indices[v30_atom_index_2]
+        bond_array[i, 2] = bond_type
+    atoms.bonds = BondList(atoms.array_length(), bond_array)
+
+    return atoms
+
+def _get_version(counts_line):
+    return counts_line[33:39].strip()
+
+def _is_v2000_compatible(n_atoms, n_bonds):
+    # The format uses a maximum of 3 digits for the atom and bond count
+    return n_atoms < 1000 and n_bonds < 1000
+
+def _get_counts_v2000(counts_line):
+    return int(counts_line[0:3]), int(counts_line[3:6])
+
+def _get_block_v3000(v30_lines, block_name):
+    block_lines = []
+    in_block = False
+    for line in v30_lines:
+        if line.startswith(f"BEGIN {block_name}"):
+            in_block = True
+        elif line.startswith(f"END {block_name}"):
+            if in_block:
+                return block_lines
+            else:
+                raise InvalidFileError(
+                    f"Block '{block_name}' ended before it began"
+                )
+        elif in_block:
+            block_lines.append(line)
+    return block_lines
+
+def create_property_dict_v3000(property_strings):
+    properties = {}
+    for prop in property_strings:
+        key, value = prop.split("=")
+        properties[key] = value
+    return properties
+
+
+def _write_structure_to_ctab_v2000(atoms, default_bond_type):
+    try:
+        charge = atoms.charge
+    except AttributeError:
+        charge = np.zeros(atoms.array_length(), dtype=int)
+
+    counts_line = (
+        f"{atoms.array_length():>3d}{atoms.bonds.get_bond_count():>3d}"
+        "  0     0  0  0  0  0  0  1 V2000"
+    )
+
+    atom_lines = [
+        f"{atoms.coord[i,0]:>10.5f}"
+        f"{atoms.coord[i,1]:>10.5f}"
+        f"{atoms.coord[i,2]:>10.5f}"
+        f" {atoms.element[i].capitalize():>3}"
+        f"  {CHARGE_MAPPING_REV.get(charge[i], 0):>3d}" + f"{0:>3d}" * 10
+        for i in range(atoms.array_length())
+    ]
+
+    default_bond_value = BOND_TYPE_MAPPING_REV[default_bond_type]
+    bond_lines = [
+        f"{i+1:>3d}{j+1:>3d}"
+        f"{BOND_TYPE_MAPPING_REV.get(bond_type, default_bond_value):>3d}"
+        + f"{0:>3d}" * 4
+        for i, j, bond_type in atoms.bonds.as_array()
+    ]
+
+    return [counts_line] + atom_lines + bond_lines + ["M  END"]
+
+
+def _write_structure_to_ctab_v3000(atoms, default_bond_type):
+    try:
+        charges = atoms.charge
+    except AttributeError:
+        charges = np.zeros(atoms.array_length(), dtype=int)
+
+    counts_line = (
+        f"COUNTS {atoms.array_length()} {atoms.bonds.get_bond_count()} 0 0 0"
+    )
+
+    atom_lines = [
+        f"{i + 1}"
+        f" {_quote(atoms.element[i].capitalize())}"
+        f" {atoms.coord[i,0]:.5f}"
+        f" {atoms.coord[i,1]:.5f}"
+        f" {atoms.coord[i,2]:.5f}"
+        # 'aamap' is unused
+        f" 0"
+        f" {_to_property(charges[i])}"
+        for i in range(atoms.array_length())
+    ]
+
+    default_bond_value = BOND_TYPE_MAPPING_REV[default_bond_type]
+    bond_lines = [
+        f"{k + 1}"
+        f" {BOND_TYPE_MAPPING_REV.get(bond_type, default_bond_value)}"
+        f" {i + 1}"
+        f" {j + 1}"
+        for k, (i, j, bond_type) in enumerate(atoms.bonds.as_array())
+    ]
+
+    lines = (
+        ["BEGIN CTAB"] +
+        [counts_line] +
+        ["BEGIN ATOM"] +
+        atom_lines +
+        ["END ATOM"] +
+        ["BEGIN BOND"] +
+        bond_lines +
+        ["END BOND"] +
+        ["END CTAB"]
+    )
+    # Mark lines as V3000 CTAB
+    lines = ["M  V30 " + line for line in lines]
+    return [V2000_COMPATIBILITY_LINE] + lines + ["M  END"]
+
+def _to_property(charge):
+    if charge == 0:
+        return ""
+    else:
+        return f"CHG={charge}"
+
+def _quote(string):
+    if " " in string or len(string) == 0:
+        return f'"{string}"'
+    else:
+        return string
\ No newline at end of file
diff --git a/src/biotite/structure/io/mol/file.py b/src/biotite/structure/io/mol/file.py
index 18ae01ea5..feaa44092 100644
--- a/src/biotite/structure/io/mol/file.py
+++ b/src/biotite/structure/io/mol/file.py
@@ -7,12 +7,8 @@
 __all__ = ["MOLFile"]
 
 import datetime
-from warnings import warn
-import numpy as np
-from ...atoms import AtomArray
 from ....file import TextFile, InvalidFileError
-from ...error import BadStructureError
-from ..ctab import read_structure_from_ctab, write_structure_to_ctab
+from .ctab import read_structure_from_ctab, write_structure_to_ctab
 from ...bonds import BondType
 
 
@@ -24,7 +20,8 @@
 class MOLFile(TextFile):
     """
     This class represents a file in MOL format, that is used to store
-    structure information for small molecules. :footcite:`Dalby1992`
+    structure information for small molecules.
+    :footcite:`Dalby1992`
 
     Since its use is intended for single small molecules, it stores
     less atom annotation information than the macromolecular structure
@@ -179,7 +176,8 @@ def get_structure(self):
         return read_structure_from_ctab(ctab_lines)
 
 
-    def set_structure(self, atoms, default_bond_type=BondType.ANY):
+    def set_structure(self, atoms, default_bond_type=BondType.ANY,
+                      version=None):
         """
         Set the :class:`AtomArray` for the file.
 
@@ -192,13 +190,19 @@ def set_structure(self, atoms, default_bond_type=BondType.ANY):
             Bond type fallback in the *Bond block* if a bond has no bond_type
             defined in *atoms* array. By default, each bond is treated as
             :attr:`BondType.ANY`.
+        version : {"V2000", "V3000"}, optional
+            The version of the CTAB format.
+            ``"V2000"`` uses the *Atom* and *Bond* block, while
+            ``"V3000"`` uses the *Properties* block.
+            By default, ``"V2000"`` is used unless the number of atoms
+            or bonds exceeds 1000, in which case ``"V3000"`` is used.
         """
         self.lines = self.lines[:N_HEADER] + write_structure_to_ctab(
-            atoms,
-            default_bond_type
+            atoms, default_bond_type, version
         )
 
 
+
 def _get_ctab_lines(lines):
     for i, line in enumerate(lines):
         if line.startswith("M  END"):
diff --git a/tests/structure/data/molecules/03F.v3000.sdf b/tests/structure/data/molecules/03F.v3000.sdf
new file mode 100644
index 000000000..57bb8287d
--- /dev/null
+++ b/tests/structure/data/molecules/03F.v3000.sdf
@@ -0,0 +1,285 @@
+03F
+     RDKit          3D
+
+  0  0  0  0  0  0  0  0  0  0999 V3000
+M  V30 BEGIN CTAB
+M  V30 COUNTS 130 130 0 0 1
+M  V30 BEGIN ATOM
+M  V30 1 C -3.597000 3.707000 0.561000 0
+M  V30 2 C -3.174000 2.348000 -0.002000 0 CFG=2
+M  V30 3 N -1.783000 2.418000 -0.455000 0
+M  V30 4 C -4.077000 1.981000 -1.181000 0 CFG=2
+M  V30 5 O -5.442000 1.997000 -0.759000 0
+M  V30 6 C -3.721000 0.603000 -1.677000 0
+M  V30 7 C -4.645000 -0.322000 -1.761000 0
+M  V30 8 C -4.288000 -1.700000 -2.257000 0
+M  V30 9 C -4.718000 -2.741000 -1.221000 0
+M  V30 10 C -4.356000 -4.140000 -1.724000 0
+M  V30 11 C -4.786000 -5.181000 -0.689000 0
+M  V30 12 C -4.424000 -6.580000 -1.192000 0
+M  V30 13 C -4.853000 -7.621000 -0.157000 0
+M  V30 14 C -4.492000 -9.020000 -0.660000 0
+M  V30 15 C -4.921000 -10.061000 0.375000 0
+M  V30 16 C -4.559000 -11.460000 -0.128000 0
+M  V30 17 C -4.989000 -12.502000 0.908000 0
+M  V30 18 C -4.627000 -13.901000 0.405000 0
+M  V30 19 C -5.057000 -14.942000 1.440000 0
+M  V30 20 C -4.695000 -16.341000 0.937000 0
+M  V30 21 C -3.983000 5.987000 -0.069000 0 CFG=1
+M  V30 22 O -3.600000 4.676000 -0.489000 0
+M  V30 23 C -3.833000 6.963000 -1.239000 0 CFG=2
+M  V30 24 O -2.461000 7.025000 -1.636000 0
+M  V30 25 C -4.301000 8.353000 -0.797000 0 CFG=1
+M  V30 26 O -4.231000 9.255000 -1.904000 0
+M  V30 27 C -5.747000 8.259000 -0.301000 0 CFG=2
+M  V30 28 O -6.171000 9.536000 0.181000 0
+M  V30 29 O -7.331000 6.221000 2.427000 0
+M  V30 30 C -5.824000 7.233000 0.833000 0 CFG=1
+M  V30 31 C -7.275000 7.089000 1.294000 0
+M  V30 32 O -5.345000 5.971000 0.364000 0
+M  V30 33 C -0.778000 2.223000 0.422000 0
+M  V30 34 O -1.026000 1.989000 1.586000 0
+M  V30 35 C 0.653000 2.295000 -0.044000 0
+M  V30 36 C 1.589000 2.038000 1.139000 0
+M  V30 37 C 3.042000 2.112000 0.666000 0
+M  V30 38 C 3.977000 1.855000 1.849000 0
+M  V30 39 C 5.430000 1.928000 1.376000 0
+M  V30 40 C 6.365000 1.671000 2.559000 0
+M  V30 41 C 7.819000 1.745000 2.086000 0
+M  V30 42 C 8.740000 1.492000 3.251000 0
+M  V30 43 C 9.674000 0.578000 3.161000 0
+M  V30 44 C 9.929000 -0.113000 1.846000 0
+M  V30 45 C 11.411000 0.004000 1.487000 0
+M  V30 46 C 11.670000 -0.697000 0.152000 0
+M  V30 47 C 13.152000 -0.581000 -0.207000 0
+M  V30 48 C 13.410000 -1.282000 -1.542000 0
+M  V30 49 C 14.893000 -1.165000 -1.901000 0
+M  V30 50 C 15.151000 -1.866000 -3.236000 0
+M  V30 51 C 16.634000 -1.749000 -3.595000 0
+M  V30 52 H -6.143000 10.236000 -0.485000 0
+M  V30 53 H -3.266000 1.589000 0.776000 0
+M  V30 54 H -3.343000 9.357000 -2.272000 0
+M  V30 55 H -2.096000 6.177000 -1.923000 0
+M  V30 56 H -3.937000 2.704000 -1.985000 0
+M  V30 57 H -2.704000 0.378000 -1.962000 0
+M  V30 58 H -4.937000 -6.771000 -2.134000 0
+M  V30 59 H -3.347000 -6.643000 -1.346000 0
+M  V30 60 H -4.597000 3.629000 0.987000 0
+M  V30 61 H -4.340000 -7.430000 0.785000 0
+M  V30 62 H -2.895000 4.014000 1.336000 0
+M  V30 63 H -5.931000 -7.558000 -0.003000 0
+M  V30 64 H -3.344000 6.306000 0.755000 0
+M  V30 65 H -5.005000 -9.211000 -1.602000 0
+M  V30 66 H -3.414000 -9.084000 -0.814000 0
+M  V30 67 H 16.818000 -2.249000 -4.546000 0
+M  V30 68 H -4.440000 6.622000 -2.077000 0
+M  V30 69 H -4.407000 -9.870000 1.318000 0
+M  V30 70 H -5.998000 -9.998000 0.529000 0
+M  V30 71 H -3.662000 8.715000 0.008000 0
+M  V30 72 H -5.073000 -11.652000 -1.070000 0
+M  V30 73 H -3.482000 -11.524000 -0.282000 0
+M  V30 74 H -6.394000 7.946000 -1.121000 0
+M  V30 75 H -4.475000 -12.311000 1.850000 0
+M  V30 76 H -5.661000 -0.097000 -1.475000 0
+M  V30 77 H -6.066000 -12.438000 1.062000 0
+M  V30 78 H -8.224000 6.083000 2.774000 0
+M  V30 79 H -5.208000 7.568000 1.668000 0
+M  V30 80 H -4.802000 -1.891000 -3.199000 0
+M  V30 81 H -5.141000 -14.092000 -0.538000 0
+M  V30 82 H -3.211000 -1.763000 -2.411000 0
+M  V30 83 H -3.550000 -13.964000 0.251000 0
+M  V30 84 H -4.204000 -2.550000 -0.279000 0
+M  V30 85 H -4.543000 -14.751000 2.382000 0
+M  V30 86 H -5.795000 -2.677000 -1.067000 0
+M  V30 87 H -6.134000 -14.878000 1.594000 0
+M  V30 88 H -4.870000 -4.331000 -2.667000 0
+M  V30 89 H -5.001000 -17.083000 1.674000 0
+M  V30 90 H -3.279000 -4.203000 -1.878000 0
+M  V30 91 H -5.208000 -16.532000 -0.005000 0
+M  V30 92 H -3.618000 -16.404000 0.783000 0
+M  V30 93 H -4.272000 -4.990000 0.253000 0
+M  V30 94 H -5.863000 -5.118000 -0.535000 0
+M  V30 95 H -7.874000 6.670000 0.485000 0
+M  V30 96 H -7.669000 8.068000 1.566000 0
+M  V30 97 H 0.824000 1.541000 -0.812000 0
+M  V30 98 H 0.852000 3.284000 -0.456000 0
+M  V30 99 H 1.418000 2.793000 1.907000 0
+M  V30 100 H 1.390000 1.049000 1.551000 0
+M  V30 101 H 3.212000 1.357000 -0.102000 0
+M  V30 102 H 3.240000 3.101000 0.254000 0
+M  V30 103 H 3.807000 2.609000 2.617000 0
+M  V30 104 H 3.779000 0.865000 2.261000 0
+M  V30 105 H 5.601000 1.174000 0.608000 0
+M  V30 106 H 5.629000 2.918000 0.964000 0
+M  V30 107 H 6.195000 2.426000 3.327000 0
+M  V30 108 H 6.167000 0.682000 2.971000 0
+M  V30 109 H 7.989000 0.990000 1.317000 0
+M  V30 110 H 8.017000 2.734000 1.673000 0
+M  V30 111 H 8.630000 2.064000 4.160000 0
+M  V30 112 H 10.262000 0.319000 4.030000 0
+M  V30 113 H 9.657000 -1.165000 1.930000 0
+M  V30 114 H 9.328000 0.356000 1.067000 0
+M  V30 115 H 11.683000 1.056000 1.403000 0
+M  V30 116 H 12.012000 -0.465000 2.266000 0
+M  V30 117 H 11.398000 -1.750000 0.236000 0
+M  V30 118 H 11.069000 -0.228000 -0.627000 0
+M  V30 119 H 13.424000 0.472000 -0.291000 0
+M  V30 120 H -1.585000 2.605000 -1.386000 0
+M  V30 121 H 13.753000 -1.050000 0.572000 0
+M  V30 122 H 13.139000 -2.334000 -1.458000 0
+M  V30 123 H 12.810000 -0.812000 -2.321000 0
+M  V30 124 H -5.623000 1.437000 0.008000 0
+M  V30 125 H 15.165000 -0.113000 -1.985000 0
+M  V30 126 H 15.494000 -1.634000 -1.122000 0
+M  V30 127 H 14.880000 -2.919000 -3.152000 0
+M  V30 128 H 14.551000 -1.397000 -4.015000 0
+M  V30 129 H 16.906000 -0.697000 -3.679000 0
+M  V30 130 H 17.235000 -2.219000 -2.816000 0
+M  V30 END ATOM
+M  V30 BEGIN BOND
+M  V30 1 1 1 62
+M  V30 2 1 1 2
+M  V30 3 1 1 60
+M  V30 4 1 1 22
+M  V30 5 1 2 4
+M  V30 6 1 2 53 CFG=1
+M  V30 7 1 2 3
+M  V30 8 1 3 33
+M  V30 9 1 3 120
+M  V30 10 1 4 5
+M  V30 11 1 4 6
+M  V30 12 1 4 56 CFG=3
+M  V30 13 1 5 124
+M  V30 14 1 6 57
+M  V30 15 2 6 7
+M  V30 16 1 7 8
+M  V30 17 1 7 76
+M  V30 18 1 8 82
+M  V30 19 1 8 9
+M  V30 20 1 8 80
+M  V30 21 1 9 84
+M  V30 22 1 9 10
+M  V30 23 1 9 86
+M  V30 24 1 10 90
+M  V30 25 1 10 11
+M  V30 26 1 10 88
+M  V30 27 1 11 93
+M  V30 28 1 11 12
+M  V30 29 1 11 94
+M  V30 30 1 12 58
+M  V30 31 1 12 13
+M  V30 32 1 12 59
+M  V30 33 1 13 63
+M  V30 34 1 13 14
+M  V30 35 1 13 61
+M  V30 36 1 14 66
+M  V30 37 1 14 65
+M  V30 38 1 14 15
+M  V30 39 1 15 16
+M  V30 40 1 15 69
+M  V30 41 1 15 70
+M  V30 42 1 16 72
+M  V30 43 1 16 73
+M  V30 44 1 16 17
+M  V30 45 1 17 18
+M  V30 46 1 17 77
+M  V30 47 1 17 75
+M  V30 48 1 18 81
+M  V30 49 1 18 83
+M  V30 50 1 18 19
+M  V30 51 1 19 85
+M  V30 52 1 19 20
+M  V30 53 1 19 87
+M  V30 54 1 20 91
+M  V30 55 1 20 92
+M  V30 56 1 20 89
+M  V30 57 1 21 22
+M  V30 58 1 21 32
+M  V30 59 1 21 64 CFG=1
+M  V30 60 1 21 23
+M  V30 61 1 23 68 CFG=3
+M  V30 62 1 23 24
+M  V30 63 1 23 25
+M  V30 64 1 24 55
+M  V30 65 1 25 27
+M  V30 66 1 25 71 CFG=1
+M  V30 67 1 25 26
+M  V30 68 1 26 54
+M  V30 69 1 27 30
+M  V30 70 1 27 74 CFG=3
+M  V30 71 1 27 28
+M  V30 72 1 28 52
+M  V30 73 1 29 78
+M  V30 74 1 29 31
+M  V30 75 1 30 32
+M  V30 76 1 30 31
+M  V30 77 1 30 79 CFG=1
+M  V30 78 1 31 95
+M  V30 79 1 31 96
+M  V30 80 2 33 34
+M  V30 81 1 33 35
+M  V30 82 1 35 36
+M  V30 83 1 35 97
+M  V30 84 1 35 98
+M  V30 85 1 36 100
+M  V30 86 1 36 99
+M  V30 87 1 36 37
+M  V30 88 1 37 38
+M  V30 89 1 37 101
+M  V30 90 1 37 102
+M  V30 91 1 38 104
+M  V30 92 1 38 103
+M  V30 93 1 38 39
+M  V30 94 1 39 40
+M  V30 95 1 39 105
+M  V30 96 1 39 106
+M  V30 97 1 40 107
+M  V30 98 1 40 108
+M  V30 99 1 40 41
+M  V30 100 1 41 42
+M  V30 101 1 41 110
+M  V30 102 1 41 109
+M  V30 103 1 42 111
+M  V30 104 2 42 43
+M  V30 105 1 43 112
+M  V30 106 1 43 44
+M  V30 107 1 44 113
+M  V30 108 1 44 114
+M  V30 109 1 44 45
+M  V30 110 1 45 115
+M  V30 111 1 45 116
+M  V30 112 1 45 46
+M  V30 113 1 46 118
+M  V30 114 1 46 117
+M  V30 115 1 46 47
+M  V30 116 1 47 119
+M  V30 117 1 47 121
+M  V30 118 1 47 48
+M  V30 119 1 48 123
+M  V30 120 1 48 122
+M  V30 121 1 48 49
+M  V30 122 1 49 125
+M  V30 123 1 49 126
+M  V30 124 1 49 50
+M  V30 125 1 50 128
+M  V30 126 1 50 127
+M  V30 127 1 50 51
+M  V30 128 1 51 130
+M  V30 129 1 51 67
+M  V30 130 1 51 129
+M  V30 END BOND
+M  V30 END CTAB
+M  END
+>  <OPENEYE_ISO_SMILES>  (1) 
+CCCCCCCCCCCCC/C=C/[C@H]([C@H](CO[C@H]1[C@@H]([C@H]([C@@H]([C@H](O1)CO)O)O)O)NC(=O)CCCCCCC/C=C\CCCCCCCC)O
+
+>  <OPENEYE_INCHI>  (1) 
+InChI=1S/C42H79NO8/c1-3-5-7-9-11-13-15-17-18-20-22-24-26-28-30-32-38(46)43-35(34-50-42-41(49)40(48)39(47)37(33-44)51-42)36(45)31-29-27-25-23-21-19-16-14-12-10-8-6-4-2/h17-18,29,31,35-37,39-42,44-45,47-49H,3-16,19-28,30,32-34H2,1-2H3,(H,43,46)/b18-17-,31-29+/t35-,36+,37+,39+,40-,41+,42+/m0/s1
+
+>  <OPENEYE_INCHIKEY>  (1) 
+MVGFIPNJBNBHNC-HVFXMTMESA-N
+
+>  <FORMULA>  (1) 
+C42H79NO8
+
+$$$$
diff --git a/tests/structure/data/molecules/CYN.v3000.sdf b/tests/structure/data/molecules/CYN.v3000.sdf
new file mode 100644
index 000000000..d7f507ae8
--- /dev/null
+++ b/tests/structure/data/molecules/CYN.v3000.sdf
@@ -0,0 +1,16 @@
+CYN - Ideal conformer
+     RDKit          3D
+
+  0  0  0  0  0  0  0  0  0  0999 V3000
+M  V30 BEGIN CTAB
+M  V30 COUNTS 2 1 0 0 0
+M  V30 BEGIN ATOM
+M  V30 1 C 0.000000 0.000000 -0.611000 0 CHG=-1 VAL=3
+M  V30 2 N 0.000000 0.000000 0.524000 0
+M  V30 END ATOM
+M  V30 BEGIN BOND
+M  V30 1 3 1 2
+M  V30 END BOND
+M  V30 END CTAB
+M  END
+$$$$
diff --git a/tests/structure/data/molecules/HWB.v3000.sdf b/tests/structure/data/molecules/HWB.v3000.sdf
new file mode 100644
index 000000000..705c068a1
--- /dev/null
+++ b/tests/structure/data/molecules/HWB.v3000.sdf
@@ -0,0 +1,79 @@
+HWB - Ideal conformer
+     RDKit          3D
+
+  0  0  0  0  0  0  0  0  0  0999 V3000
+M  V30 BEGIN CTAB
+M  V30 COUNTS 32 34 0 0 0
+M  V30 BEGIN ATOM
+M  V30 1 O -4.652000 -2.732000 0.069000 0
+M  V30 2 C -3.892000 -1.607000 0.030000 0
+M  V30 3 C -4.512000 -0.363000 -0.094000 0
+M  V30 4 C -3.768000 0.784000 -0.135000 0
+M  V30 5 O -4.379000 1.990000 -0.254000 0
+M  V30 6 C -2.366000 0.706000 -0.052000 0
+M  V30 7 C -1.559000 1.862000 -0.089000 0
+M  V30 8 C -0.202000 1.643000 0.003000 0
+M  V30 9 O 0.679000 2.678000 -0.021000 0
+M  V30 10 C 0.242000 0.313000 0.122000 0
+M  V30 11 O -0.515000 -0.630000 0.147000 0 CHG=1 VAL=3
+M  V30 12 C -1.728000 -0.554000 0.074000 0
+M  V30 13 C -2.520000 -1.711000 0.118000 0
+M  V30 14 C 1.696000 0.058000 0.221000 0
+M  V30 15 C 2.314000 0.013000 1.471000 0
+M  V30 16 C 3.670000 -0.226000 1.562000 0
+M  V30 17 C 4.422000 -0.420000 0.412000 0
+M  V30 18 O 5.757000 -0.654000 0.507000 0
+M  V30 19 C 3.810000 -0.375000 -0.839000 0
+M  V30 20 O 4.549000 -0.566000 -1.965000 0
+M  V30 21 C 2.451000 -0.143000 -0.936000 0
+M  V30 22 H -4.823000 -3.121000 -0.800000 0
+M  V30 23 H -5.589000 -0.305000 -0.157000 0
+M  V30 24 H -4.604000 2.402000 0.591000 0
+M  V30 25 H -1.977000 2.854000 -0.183000 0
+M  V30 26 H 0.885000 3.039000 0.852000 0
+M  V30 27 H -2.056000 -2.681000 0.213000 0
+M  V30 28 H 1.732000 0.164000 2.368000 0
+M  V30 29 H 4.147000 -0.260000 2.530000 0
+M  V30 30 H 5.987000 -1.591000 0.579000 0
+M  V30 31 H 4.608000 -1.490000 -2.245000 0
+M  V30 32 H 1.976000 -0.109000 -1.905000 0
+M  V30 END ATOM
+M  V30 BEGIN BOND
+M  V30 1 1 9 8
+M  V30 2 2 8 7
+M  V30 3 1 8 10
+M  V30 4 1 7 6
+M  V30 5 1 5 4
+M  V30 6 1 20 19
+M  V30 7 2 21 19
+M  V30 8 1 21 14
+M  V30 9 1 10 14
+M  V30 10 2 10 11
+M  V30 11 2 6 4
+M  V30 12 1 6 12
+M  V30 13 1 19 17
+M  V30 14 1 4 3
+M  V30 15 2 14 15
+M  V30 16 1 11 12
+M  V30 17 1 17 18
+M  V30 18 2 17 16
+M  V30 19 2 12 13
+M  V30 20 1 15 16
+M  V30 21 2 3 2
+M  V30 22 1 13 2
+M  V30 23 1 2 1
+M  V30 24 1 1 22
+M  V30 25 1 3 23
+M  V30 26 1 5 24
+M  V30 27 1 7 25
+M  V30 28 1 9 26
+M  V30 29 1 13 27
+M  V30 30 1 15 28
+M  V30 31 1 16 29
+M  V30 32 1 18 30
+M  V30 33 1 20 31
+M  V30 34 1 21 32
+M  V30 END BOND
+M  V30 END CTAB
+M  END
+$$$$
diff --git a/tests/structure/data/molecules/README.rst b/tests/structure/data/molecules/README.rst
index b68cf3ebc..42cbe22d7 100644
--- a/tests/structure/data/molecules/README.rst
+++ b/tests/structure/data/molecules/README.rst
@@ -4,4 +4,7 @@ Test structures
 CYN: Caynide  - Contains negatively charged atom and triple bond
 HWB: Cyanidin - Contains positively charged atom
 TYR: Tyrosine - common amino acid
-03F: Large molecule - More than 100 bonds -> requires 3 characters in CTAB header
\ No newline at end of file
+03F: Large molecule - More than 100 bonds -> requires 3 characters in CTAB header
+
+The `.v3000.sdf` variants are the same structures but in V3000 format.
+They were created with the `create_v3000_sdf.py` script.
\ No newline at end of file
diff --git a/tests/structure/data/molecules/TYR.v3000.sdf b/tests/structure/data/molecules/TYR.v3000.sdf
new file mode 100644
index 000000000..61f343c6d
--- /dev/null
+++ b/tests/structure/data/molecules/TYR.v3000.sdf
@@ -0,0 +1,61 @@
+TYR - Ideal conformer
+     RDKit          3D
+
+  0  0  0  0  0  0  0  0  0  0999 V3000
+M  V30 BEGIN CTAB
+M  V30 COUNTS 24 24 0 0 0
+M  V30 BEGIN ATOM
+M  V30 1 N 1.320000 0.952000 1.428000 0
+M  V30 2 C -0.018000 0.429000 1.734000 0 CFG=2
+M  V30 3 C -0.103000 0.094000 3.201000 0
+M  V30 4 O 0.886000 -0.254000 3.799000 0
+M  V30 5 C -0.274000 -0.831000 0.907000 0
+M  V30 6 C -0.189000 -0.496000 -0.559000 0
+M  V30 7 C 1.022000 -0.589000 -1.219000 0
+M  V30 8 C -1.324000 -0.102000 -1.244000 0
+M  V30 9 C 1.103000 -0.282000 -2.563000 0
+M  V30 10 C -1.247000 0.210000 -2.587000 0
+M  V30 11 C -0.032000 0.118000 -3.252000 0
+M  V30 12 O 0.044000 0.420000 -4.574000 0
+M  V30 13 O -1.279000 0.184000 3.842000 0
+M  V30 14 H 1.977000 0.225000 1.669000 0
+M  V30 15 H 1.365000 1.063000 0.426000 0
+M  V30 16 H -0.767000 1.183000 1.489000 0
+M  V30 17 H 0.473000 -1.585000 1.152000 0
+M  V30 18 H -1.268000 -1.219000 1.134000 0
+M  V30 19 H 1.905000 -0.902000 -0.683000 0
+M  V30 20 H -2.269000 -0.031000 -0.727000 0
+M  V30 21 H 2.049000 -0.354000 -3.078000 0
+M  V30 22 H -2.132000 0.523000 -3.121000 0
+M  V30 23 H -0.123000 -0.399000 -5.059000 0
+M  V30 24 H -1.333000 -0.030000 4.784000 0
+M  V30 END ATOM
+M  V30 BEGIN BOND
+M  V30 1 1 1 2
+M  V30 2 1 1 14
+M  V30 3 1 1 15
+M  V30 4 1 2 3
+M  V30 5 1 2 5
+M  V30 6 1 2 16 CFG=3
+M  V30 7 2 3 4
+M  V30 8 1 3 13
+M  V30 9 1 5 6
+M  V30 10 1 5 17
+M  V30 11 1 5 18
+M  V30 12 2 6 7
+M  V30 13 1 6 8
+M  V30 14 1 7 9
+M  V30 15 1 7 19
+M  V30 16 2 8 10
+M  V30 17 1 8 20
+M  V30 18 2 9 11
+M  V30 19 1 9 21
+M  V30 20 1 10 11
+M  V30 21 1 10 22
+M  V30 22 1 11 12
+M  V30 23 1 12 23
+M  V30 24 1 13 24
+M  V30 END BOND
+M  V30 END CTAB
+M  END
+$$$$
diff --git a/tests/structure/data/molecules/create_v3000_sdf.py b/tests/structure/data/molecules/create_v3000_sdf.py
new file mode 100644
index 000000000..dc313722f
--- /dev/null
+++ b/tests/structure/data/molecules/create_v3000_sdf.py
@@ -0,0 +1,14 @@
+from pathlib import Path
+from rdkit import Chem
+
+SCRIPT_PATH = Path(__file__).parent
+
+for sdf_path in SCRIPT_PATH.glob("*.sdf"):
+    if "v3000" in str(sdf_path):
+        continue
+    supplier = Chem.SDMolSupplier(sdf_path, removeHs=False)
+    writer = Chem.SDWriter(sdf_path.with_suffix(".v3000.sdf"))
+    writer.SetForceV3000(True)
+    for molecule in supplier:
+        writer.write(molecule)
+    writer.close()
\ No newline at end of file
diff --git a/tests/structure/test_mol.py b/tests/structure/test_mol.py
index d4337c1b0..6fbd3b211 100644
--- a/tests/structure/test_mol.py
+++ b/tests/structure/test_mol.py
@@ -5,17 +5,39 @@
 import datetime
 import glob
 import itertools
-from os.path import join, split, splitext
+from os.path import join, splitext
 from tempfile import TemporaryFile
 import numpy as np
 import pytest
+import biotite.structure as struc
 import biotite.structure.io.mol as mol
 import biotite.structure.io.pdbx as pdbx
 from biotite.structure.bonds import BondType
-from biotite.structure.io.ctab import BOND_TYPE_MAPPING_REV
+from biotite.structure.io.mol.ctab import BOND_TYPE_MAPPING_REV
 from ..util import data_dir
 
 
+def list_v2000_sdf_files():
+    return [
+        path for path
+        in glob.glob(join(data_dir("structure"), "molecules", "*.sdf"))
+        if not "v3000" in path
+    ]
+
+def list_v3000_sdf_files():
+    return glob.glob(join(data_dir("structure"), "molecules", "*v3000.sdf"))
+
+
+def toy_atom_array(n_atoms):
+    atoms = struc.AtomArray(n_atoms)
+    atoms.coord[:] = 1.0
+    atoms.element[:] = "H"
+    atoms.add_annotation("charge", dtype=int)
+    atoms.charge[:] = 0
+    atoms.bonds = struc.BondList(n_atoms)
+    return atoms
+
+
 def test_header_conversion():
     """
     Write known example data to the header of a MOL file and expect
@@ -29,7 +51,6 @@ def test_header_conversion():
 
     mol_file = mol.MOLFile()
     mol_file.set_header(*ref_header)
-    print(mol_file)
     temp = TemporaryFile("w+")
     mol_file.write(temp)
 
@@ -42,13 +63,14 @@ def test_header_conversion():
 
 
 @pytest.mark.parametrize(
-    "path, omit_charge",
+    "path, version, omit_charge",
     itertools.product(
-        glob.glob(join(data_dir("structure"), "molecules", "*.sdf")),
+        list_v2000_sdf_files(),
+        ["V2000", "V3000"],
         [False, True]
     )
 )
-def test_structure_conversion(path, omit_charge):
+def test_structure_conversion(path, version, omit_charge):
     """
     After reading a MOL file, writing the structure back to a new file
     and reading it again should give the same structure.
@@ -58,12 +80,11 @@ def test_structure_conversion(path, omit_charge):
     """
     mol_file = mol.MOLFile.read(path)
     ref_atoms = mol.get_structure(mol_file)
-    print(ref_atoms.charge)
     if omit_charge:
         ref_atoms.del_annotation("charge")
 
     mol_file = mol.MOLFile()
-    mol.set_structure(mol_file, ref_atoms)
+    mol.set_structure(mol_file, ref_atoms, version=version)
     temp = TemporaryFile("w+")
     mol_file.write(temp)
 
@@ -78,9 +99,8 @@ def test_structure_conversion(path, omit_charge):
     assert test_atoms == ref_atoms
 
 
-
 @pytest.mark.parametrize(
-    "path", glob.glob(join(data_dir("structure"), "molecules", "*.sdf")),
+    "path", list_v2000_sdf_files() + list_v3000_sdf_files()
 )
 def test_pdbx_consistency(path):
     """
@@ -90,7 +110,8 @@ def test_pdbx_consistency(path):
     In this case an SDF file is used, but it is compatible with the
     MOL format.
     """
-    cif_path = splitext(path)[0] + ".cif"
+    # Remove '.sdf' and optional '.v3000' suffix
+    cif_path = splitext(splitext(path)[0])[0] + ".cif"
 
     pdbx_file = pdbx.CIFFile.read(cif_path)
     ref_atoms = pdbx.get_component(pdbx_file)
@@ -109,9 +130,8 @@ def test_pdbx_consistency(path):
     assert set(tuple(bond) for bond in test_atoms.bonds.as_array()) \
         == set(tuple(bond) for bond in  ref_atoms.bonds.as_array())
 
-@pytest.mark.parametrize(
-    "path", glob.glob(join(data_dir("structure"), "molecules", "*.sdf")),
-)
+
+@pytest.mark.parametrize("path", list_v2000_sdf_files())
 def test_structure_bond_type_fallback(path):
     """
     Check if a bond with a type not supported by MOL files will be translated
@@ -148,3 +168,44 @@ def test_structure_bond_type_fallback(path):
     ].pop()
     assert int(updated_line[8]) == \
         BOND_TYPE_MAPPING_REV[BondType.SINGLE]
+
+
+@pytest.mark.parametrize("atom_type", ["", " ", "A ", " A"])
+def test_quoted_atom_types(atom_type):
+    """
+    Check if V3000 MOL files can handle atom types (aka elements) with
+    empty strings or whitespaces.
+    """
+    ref_atoms = toy_atom_array(1)
+    ref_atoms.element[0] = atom_type
+    mol_file = mol.MOLFile()
+    mol_file.set_structure(ref_atoms, version="V3000")
+    temp = TemporaryFile("w+")
+    mol_file.write(temp)
+
+    temp.seek(0)
+    mol_file = mol.MOLFile.read(temp)
+    test_atoms = mol_file.get_structure()
+    assert test_atoms.element[0] == atom_type
+    # Also check if the rest of the structure was parsed correctly
+    assert test_atoms == ref_atoms
+
+
+def test_large_structure():
+    """
+    Check if MOL files automatically switch to V3000 format if the
+    number of atoms exceeds the fixed size columns in the table.
+    """
+    ref_atoms = toy_atom_array(1000)
+    mol_file = mol.MOLFile()
+    # Let the MOL file automatically switch to V3000 format
+    mol_file.set_structure(ref_atoms, version=None)
+    temp = TemporaryFile("w+")
+    mol_file.write(temp)
+
+    temp.seek(0)
+    mol_file = mol.MOLFile.read(temp)
+    test_atoms = mol_file.get_structure()
+    # Check if file is written in V3000 format
+    assert "V3000" in str(mol_file)
+    assert test_atoms == ref_atoms
\ No newline at end of file