From 972547cd37a1d4d581cbe670fc9459d3b9d05478 Mon Sep 17 00:00:00 2001 From: David Grayson Date: Fri, 27 Dec 2024 15:12:59 -0800 Subject: [PATCH 1/5] Allow ccd residues with missing coords Provides an exception so that residues with missing coords in the ccd can still be returned from the ideal coordinates. --- src/biotite/structure/io/pdbx/convert.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/biotite/structure/io/pdbx/convert.py b/src/biotite/structure/io/pdbx/convert.py index 5367c74bd..051c1e274 100644 --- a/src/biotite/structure/io/pdbx/convert.py +++ b/src/biotite/structure/io/pdbx/convert.py @@ -1299,7 +1299,7 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non else: raise array.coord = _parse_component_coordinates( - [atom_category[field] for field in alt_coord_fields] + [atom_category[field] for field in alt_coord_fields], keep_missing=True ) try: @@ -1330,14 +1330,20 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non return array -def _parse_component_coordinates(coord_columns): +def _parse_component_coordinates(coord_columns, keep_missing=False): coord = np.zeros((len(coord_columns[0]), 3), dtype=np.float32) for i, column in enumerate(coord_columns): if column.mask is not None and column.mask.array.any(): - raise ValueError( - "Missing coordinates for some atoms", - ) - coord[:, i] = column.as_array(np.float32) + if not keep_missing: + raise ValueError( + "Missing coordinates for some atoms", + ) + else: + warnings.warn( + "Missing coordinates for some atoms. Those will be set to nan" + UserWarning, + ) + coord[:, i] = column.as_array(np.float32, masked_value=np.nan) return coord From 2a7fcbdf94e382c6590bcf8a167f1382c94e83f6 Mon Sep 17 00:00:00 2001 From: David Grayson Date: Fri, 27 Dec 2024 15:38:37 -0800 Subject: [PATCH 2/5] fix formatting --- src/biotite/structure/io/pdbx/convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/biotite/structure/io/pdbx/convert.py b/src/biotite/structure/io/pdbx/convert.py index 051c1e274..8014448d7 100644 --- a/src/biotite/structure/io/pdbx/convert.py +++ b/src/biotite/structure/io/pdbx/convert.py @@ -1340,7 +1340,7 @@ def _parse_component_coordinates(coord_columns, keep_missing=False): ) else: warnings.warn( - "Missing coordinates for some atoms. Those will be set to nan" + "Missing coordinates for some atoms. Those will be set to nan", UserWarning, ) coord[:, i] = column.as_array(np.float32, masked_value=np.nan) From f7d79192e9d7152e7ee36984fa4c5bab528e14ec Mon Sep 17 00:00:00 2001 From: Simon Mathis Date: Thu, 2 Jan 2025 10:53:12 +0000 Subject: [PATCH 3/5] feat(pdbx): expose flag to allow missing coorindates in `get_component` chore: ruff --- src/biotite/structure/io/pdbx/convert.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/biotite/structure/io/pdbx/convert.py b/src/biotite/structure/io/pdbx/convert.py index 8014448d7..2d421ae28 100644 --- a/src/biotite/structure/io/pdbx/convert.py +++ b/src/biotite/structure/io/pdbx/convert.py @@ -1181,7 +1181,13 @@ def _filter_canonical_links(array, bond_array): ) # fmt: skip -def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=None): +def get_component( + pdbx_file, + data_block=None, + use_ideal_coord=True, + res_name=None, + allow_missing_coords=False, +): """ Create an :class:`AtomArray` for a chemical component from the ``chem_comp_atom`` and, if available, the ``chem_comp_bond`` @@ -1209,6 +1215,11 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non In this case, the component with the given residue name is read. By default, all rows would be read in this case. + allow_missing_coords: bool + Whether to allow missing coordinate values in components. + If `True`, these will be represented as `nan` values. + If `False`, a `ValueError` is raised when missing coordinates + are encountered. Returns ------- @@ -1299,7 +1310,8 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non else: raise array.coord = _parse_component_coordinates( - [atom_category[field] for field in alt_coord_fields], keep_missing=True + [atom_category[field] for field in alt_coord_fields], + keep_missing=allow_missing_coords, ) try: From c519913f10cf9d36c991496d14095059b953f9d9 Mon Sep 17 00:00:00 2001 From: David Grayson Date: Mon, 6 Jan 2025 23:52:13 +0000 Subject: [PATCH 4/5] propagate allow_missing_coord and add test --- src/biotite/structure/info/atoms.py | 13 ++++++++-- src/biotite/structure/io/pdbx/convert.py | 22 ++++++++--------- tests/structure/test_info.py | 30 ++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 13 deletions(-) diff --git a/src/biotite/structure/info/atoms.py b/src/biotite/structure/info/atoms.py index a404b39bc..3e78b776d 100644 --- a/src/biotite/structure/info/atoms.py +++ b/src/biotite/structure/info/atoms.py @@ -18,7 +18,7 @@ # fmt: on -def residue(res_name): +def residue(res_name, allow_missing_coord=False): """ Get an atom array, representing the residue with the given name. @@ -30,6 +30,11 @@ def residue(res_name): ---------- res_name : str The up to 3-letter name of the residue. + allow_missing_coord: bool, optional + Whether to allow missing coordinate values in the residue. + If ``True``, these will be represented as ``nan`` values. + If ``False``, a ``ValueError`` is raised when missing coordinates + are encountered. Returns ------- @@ -74,7 +79,11 @@ def residue(res_name): from biotite.structure.io.pdbx import get_component try: - component = get_component(get_ccd(), res_name=res_name) + component = get_component( + get_ccd(), + res_name=res_name, + allow_missing_coord=allow_missing_coord, + ) except KeyError: raise KeyError(f"No atom information found for residue '{res_name}' in CCD") component.hetero[:] = res_name not in NON_HETERO_RESIDUES diff --git a/src/biotite/structure/io/pdbx/convert.py b/src/biotite/structure/io/pdbx/convert.py index 2d421ae28..7a1078104 100644 --- a/src/biotite/structure/io/pdbx/convert.py +++ b/src/biotite/structure/io/pdbx/convert.py @@ -1186,7 +1186,7 @@ def get_component( data_block=None, use_ideal_coord=True, res_name=None, - allow_missing_coords=False, + allow_missing_coord=False, ): """ Create an :class:`AtomArray` for a chemical component from the @@ -1215,10 +1215,10 @@ def get_component( In this case, the component with the given residue name is read. By default, all rows would be read in this case. - allow_missing_coords: bool + allow_missing_coord: bool, optional Whether to allow missing coordinate values in components. - If `True`, these will be represented as `nan` values. - If `False`, a `ValueError` is raised when missing coordinates + If ``True``, these will be represented as ``nan`` values. + If ``False``, a ``ValueError`` is raised when missing coordinates are encountered. Returns @@ -1311,7 +1311,7 @@ def get_component( raise array.coord = _parse_component_coordinates( [atom_category[field] for field in alt_coord_fields], - keep_missing=allow_missing_coords, + allow_missing=allow_missing_coord, ) try: @@ -1342,19 +1342,19 @@ def get_component( return array -def _parse_component_coordinates(coord_columns, keep_missing=False): +def _parse_component_coordinates(coord_columns, allow_missing=False): coord = np.zeros((len(coord_columns[0]), 3), dtype=np.float32) for i, column in enumerate(coord_columns): if column.mask is not None and column.mask.array.any(): - if not keep_missing: - raise ValueError( - "Missing coordinates for some atoms", - ) - else: + if allow_missing: warnings.warn( "Missing coordinates for some atoms. Those will be set to nan", UserWarning, ) + else: + raise ValueError( + "Missing coordinates for some atoms", + ) coord[:, i] = column.as_array(np.float32, masked_value=np.nan) return coord diff --git a/tests/structure/test_info.py b/tests/structure/test_info.py index 90b9cbc90..99068f60a 100644 --- a/tests/structure/test_info.py +++ b/tests/structure/test_info.py @@ -192,3 +192,33 @@ def test_set_ccd_path(fake_ccd_path): # The new fake CCD has only a single compound assert strucinfo.all_residues() == ["FOO"] + + +@pytest.mark.parametrize( + "res_name, allow_missing_coord, should_raise", + [ + ("ALA", False, False), + ("ALA", True, False), + ("A1IQW", True, False), + ("A1IQW", False, True), + ("RRE", True, False), + ("RRE", False, True), + ], +) +def test_residue(res_name, allow_missing_coord, should_raise): + """ + Test if the residue function returns an atom array or not. + ALA --> standard amino acid, yes in both conditions + A1IQW --> yes only with allow_missing_coord=True + RRE --> yes only with allow_missing_coord=True + Make sure correct exceptions are raised when the non-standard residue + is used with allow_missing_coord=False. + """ + if should_raise: + with pytest.raises(ValueError): + strucinfo.residue(res_name, allow_missing_coord=allow_missing_coord) + else: + result = strucinfo.residue(res_name, allow_missing_coord=allow_missing_coord) + assert isinstance(result, struc.AtomArray) + assert result.array_length() > 0 + assert np.all(result.res_name == res_name) From 4e931fba881b2213641ef4a99bbbf68008b56bd9 Mon Sep 17 00:00:00 2001 From: David Grayson Date: Tue, 7 Jan 2025 01:07:37 +0000 Subject: [PATCH 5/5] future-proof residue info test with missing coord --- tests/structure/test_info.py | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/tests/structure/test_info.py b/tests/structure/test_info.py index 99068f60a..5650fda7c 100644 --- a/tests/structure/test_info.py +++ b/tests/structure/test_info.py @@ -195,30 +195,21 @@ def test_set_ccd_path(fake_ccd_path): @pytest.mark.parametrize( - "res_name, allow_missing_coord, should_raise", + "res_name, allow_missing_coord", [ - ("ALA", False, False), - ("ALA", True, False), - ("A1IQW", True, False), - ("A1IQW", False, True), - ("RRE", True, False), - ("RRE", False, True), + ("ALA", False), + ("A1IQW", True), + ("RRE", True), ], ) -def test_residue(res_name, allow_missing_coord, should_raise): +def test_residue(res_name, allow_missing_coord): """ Test if the residue function returns an atom array or not. - ALA --> standard amino acid, yes in both conditions - A1IQW --> yes only with allow_missing_coord=True - RRE --> yes only with allow_missing_coord=True - Make sure correct exceptions are raised when the non-standard residue - is used with allow_missing_coord=False. + ALA --> standard amino acid, yes even when allow_missing_coord=False + A1IQW --> yes only with allow_missing_coord=True (as of Jan 6, 2025) + RRE --> yes only with allow_missing_coord=True (as of Jan 6, 2025) """ - if should_raise: - with pytest.raises(ValueError): - strucinfo.residue(res_name, allow_missing_coord=allow_missing_coord) - else: - result = strucinfo.residue(res_name, allow_missing_coord=allow_missing_coord) - assert isinstance(result, struc.AtomArray) - assert result.array_length() > 0 - assert np.all(result.res_name == res_name) + result = strucinfo.residue(res_name, allow_missing_coord=allow_missing_coord) + assert isinstance(result, struc.AtomArray) + assert result.array_length() > 0 + assert np.all(result.res_name == res_name)