diff --git a/src/biotite/structure/info/atoms.py b/src/biotite/structure/info/atoms.py index a404b39bc..3e78b776d 100644 --- a/src/biotite/structure/info/atoms.py +++ b/src/biotite/structure/info/atoms.py @@ -18,7 +18,7 @@ # fmt: on -def residue(res_name): +def residue(res_name, allow_missing_coord=False): """ Get an atom array, representing the residue with the given name. @@ -30,6 +30,11 @@ def residue(res_name): ---------- res_name : str The up to 3-letter name of the residue. + allow_missing_coord: bool, optional + Whether to allow missing coordinate values in the residue. + If ``True``, these will be represented as ``nan`` values. + If ``False``, a ``ValueError`` is raised when missing coordinates + are encountered. Returns ------- @@ -74,7 +79,11 @@ def residue(res_name): from biotite.structure.io.pdbx import get_component try: - component = get_component(get_ccd(), res_name=res_name) + component = get_component( + get_ccd(), + res_name=res_name, + allow_missing_coord=allow_missing_coord, + ) except KeyError: raise KeyError(f"No atom information found for residue '{res_name}' in CCD") component.hetero[:] = res_name not in NON_HETERO_RESIDUES diff --git a/src/biotite/structure/io/pdbx/convert.py b/src/biotite/structure/io/pdbx/convert.py index 1da846172..ed76276d1 100644 --- a/src/biotite/structure/io/pdbx/convert.py +++ b/src/biotite/structure/io/pdbx/convert.py @@ -1185,7 +1185,13 @@ def _filter_canonical_links(array, bond_array): ) # fmt: skip -def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=None): +def get_component( + pdbx_file, + data_block=None, + use_ideal_coord=True, + res_name=None, + allow_missing_coord=False, +): """ Create an :class:`AtomArray` for a chemical component from the ``chem_comp_atom`` and, if available, the ``chem_comp_bond`` @@ -1213,6 +1219,11 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non In this case, the component with the given residue name is read. By default, all rows would be read in this case. + allow_missing_coord: bool, optional + Whether to allow missing coordinate values in components. + If ``True``, these will be represented as ``nan`` values. + If ``False``, a ``ValueError`` is raised when missing coordinates + are encountered. Returns ------- @@ -1303,7 +1314,8 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non else: raise array.coord = _parse_component_coordinates( - [atom_category[field] for field in alt_coord_fields] + [atom_category[field] for field in alt_coord_fields], + allow_missing=allow_missing_coord, ) try: @@ -1334,14 +1346,20 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non return array -def _parse_component_coordinates(coord_columns): +def _parse_component_coordinates(coord_columns, allow_missing=False): coord = np.zeros((len(coord_columns[0]), 3), dtype=np.float32) for i, column in enumerate(coord_columns): if column.mask is not None and column.mask.array.any(): - raise ValueError( - "Missing coordinates for some atoms", - ) - coord[:, i] = column.as_array(np.float32) + if allow_missing: + warnings.warn( + "Missing coordinates for some atoms. Those will be set to nan", + UserWarning, + ) + else: + raise ValueError( + "Missing coordinates for some atoms", + ) + coord[:, i] = column.as_array(np.float32, masked_value=np.nan) return coord diff --git a/tests/structure/test_info.py b/tests/structure/test_info.py index 90b9cbc90..5650fda7c 100644 --- a/tests/structure/test_info.py +++ b/tests/structure/test_info.py @@ -192,3 +192,24 @@ def test_set_ccd_path(fake_ccd_path): # The new fake CCD has only a single compound assert strucinfo.all_residues() == ["FOO"] + + +@pytest.mark.parametrize( + "res_name, allow_missing_coord", + [ + ("ALA", False), + ("A1IQW", True), + ("RRE", True), + ], +) +def test_residue(res_name, allow_missing_coord): + """ + Test if the residue function returns an atom array or not. + ALA --> standard amino acid, yes even when allow_missing_coord=False + A1IQW --> yes only with allow_missing_coord=True (as of Jan 6, 2025) + RRE --> yes only with allow_missing_coord=True (as of Jan 6, 2025) + """ + result = strucinfo.residue(res_name, allow_missing_coord=allow_missing_coord) + assert isinstance(result, struc.AtomArray) + assert result.array_length() > 0 + assert np.all(result.res_name == res_name)