diff --git a/package/AUTHORS b/package/AUTHORS index cbe9a5c6343..41bfd346411 100644 --- a/package/AUTHORS +++ b/package/AUTHORS @@ -149,6 +149,7 @@ Chronological list of authors - Marcello Sega - Edis Jakupovic - Nicholas Craven + - Mieczyslaw Torchala External code ------------- diff --git a/package/CHANGELOG b/package/CHANGELOG index 36d658a2b19..15984aeae91 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -14,12 +14,15 @@ The rules for this file: ------------------------------------------------------------------------------ ??/??/?? tylerjereddy, richardjgowers, IAlibay, hmacdope, orbeckst, cbouy, - lilyminium, daveminh, jbarnoud, yuxuanzhuang, VOD555, ianmkenney, - calcraven, xiki-tempula + lilyminium, daveminh, jbarnoud, yuxuanzhuang, VOD555, ianmkenney, + calcraven,xiki-tempula, mieczyslaw * 2.0.0 Fixes + * Instead of using ATOM for both ATOM and HETATM, HETATM record type + keyword is properly written out by PDB writer (Issue #1753, PR #2880). + * Change referenced PDB standard version to 3.3 (Issue #2906). * Fixed reading in masses and charges from a hoomdxml file (Issue #2888, PR #2889) * Bond attribute is no longer set if PDB file contains no CONECT records diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py index fa1fac453c3..8b90c1bda9e 100644 --- a/package/MDAnalysis/coordinates/PDB.py +++ b/package/MDAnalysis/coordinates/PDB.py @@ -55,7 +55,7 @@ W.write(calphas) It is important to *always close the trajectory* when done because only at this -step is the final END_ record written, which is required by the `PDB 3.2 +step is the final END_ record written, which is required by the `PDB 3.3 standard`_. Using the writer as a context manager ensures that this always happens. @@ -65,7 +65,7 @@ A pure-Python implementation for PDB files commonly encountered in MD simulations comes under the names :class:`PDBReader` and :class:`PDBWriter`. It -only implements a subset of the `PDB 3.2 standard`_ and also allows some +only implements a subset of the `PDB 3.3 standard`_ and also allows some typical enhancements such as 4-letter resids (introduced by CHARMM/NAMD). The :class:`PDBReader` can read multi-frame PDB files and represents @@ -136,8 +136,8 @@ :inherited-members: -.. _`PDB 3.2 standard`: - http://www.wwpdb.org/documentation/file-format-content/format32/v3.2.html +.. _`PDB 3.3 standard`: + http://www.wwpdb.org/documentation/file-format-content/format33/v3.3.html """ from io import StringIO, BytesIO @@ -177,11 +177,11 @@ class PDBReader(base.ReaderBase): Reads multi-`MODEL`_ PDB files as trajectories. .. _PDB-formatted: - http://www.wwpdb.org/documentation/file-format-content/format32/v3.2.html + http://www.wwpdb.org/documentation/file-format-content/format33/v3.3.html .. _PDB coordinate section: - http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html + http://www.wwpdb.org/documentation/file-format-content/format33/sect9.html .. _MODEL: - http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#MODEL + http://www.wwpdb.org/documentation/file-format-content/format33/sect9.html#MODEL ============= ============ =========== ============================================= COLUMNS DATA TYPE FIELD DEFINITION @@ -459,7 +459,7 @@ def close(self): class PDBWriter(base.WriterBase): - """PDB writer that implements a subset of the `PDB 3.2 standard`_ . + """PDB writer that implements a subset of the `PDB 3.3 standard`_ . PDB format as used by NAMD/CHARMM: 4-letter resnames and segID are allowed, altLoc is written. @@ -470,12 +470,19 @@ class PDBWriter(base.WriterBase): PDB "movie" (multi frame mode, *multiframe* = ``True``), consisting of multiple models (using the MODEL_ and ENDMDL_ records). - .. _`PDB 3.2 standard`: - http://www.wwpdb.org/documentation/file-format-content/format32/v3.2.html - .. _MODEL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#MODEL - .. _ENDMDL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ENDMDL - .. _CONECT: http://www.wwpdb.org/documentation/file-format-content/format32/sect10.html#CONECT - + .. _`PDB 3.3 standard`: + http://www.wwpdb.org/documentation/file-format-content/format33/v3.3.html + .. _ATOM: http://www.wwpdb.org/documentation/file-format-content/format33/sect9.html#ATOM + .. _COMPND: http://www.wwpdb.org/documentation/file-format-content/format33/sect2.html#COMPND + .. _CONECT: http://www.wwpdb.org/documentation/file-format-content/format33/sect10.html#CONECT + .. _END: http://www.wwpdb.org/documentation/file-format-content/format33/sect11.html#END + .. _ENDMDL: http://www.wwpdb.org/documentation/file-format-content/format33/sect9.html#ENDMDL + .. _HEADER: http://www.wwpdb.org/documentation/file-format-content/format33/sect2.html#HEADER + .. _HETATM: http://www.wwpdb.org/documentation/file-format-content/format33/sect9.html#HETATM + .. _MODEL: http://www.wwpdb.org/documentation/file-format-content/format33/sect9.html#MODEL + .. _NUMMDL: http://www.wwpdb.org/documentation/file-format-content/format33/sect2.html#NUMMDL + .. _REMARKS: http://www.wwpdb.org/documentation/file-format-content/format33/remarks.html + .. _TITLE: http://www.wwpdb.org/documentation/file-format-content/format33/sect2.html#TITLE Note ---- @@ -493,6 +500,11 @@ class PDBWriter(base.WriterBase): are not set (:code:`None` or :code:`np.zeros(6)`, see Issue #2698). + When the :attr:`record_types` attribute is present (e.g. Universe object + was created by loading a PDB file), ATOM_ and HETATM_ record type + keywords are written out accordingly. Otherwise, the ATOM_ record type + is the default output. + See Also -------- This class is identical to :class:`MultiPDBWriter` with the one @@ -528,6 +540,11 @@ class PDBWriter(base.WriterBase): "{chainID:1s}{resSeq:4d}{iCode:1s}" " {pos[0]:8.3f}{pos[1]:8.3f}{pos[2]:8.3f}{occupancy:6.2f}" "{tempFactor:6.2f} {segID:<4s}{element:>2s}\n"), + 'HETATM': ( + "HETATM{serial:5d} {name:<4s}{altLoc:<1s}{resName:<4s}" + "{chainID:1s}{resSeq:4d}{iCode:1s}" + " {pos[0]:8.3f}{pos[1]:8.3f}{pos[2]:8.3f}{occupancy:6.2f}" + "{tempFactor:6.2f} {segID:<4s}{element:>2s}\n"), 'REMARK': "REMARK {0}\n", 'COMPND': "COMPND {0}\n", 'HEADER': "HEADER {0}\n", @@ -608,10 +625,6 @@ def __init__(self, filename, bonds="conect", n_atoms=None, start=0, step=1, multi frame PDB file in which frames are written as MODEL_ ... ENDMDL_ records. If ``None``, then the class default is chosen. [``None``] - .. _CONECT: http://www.wwpdb.org/documentation/file-format-content/format32/sect10.html#CONECT - .. _MODEL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#MODEL - .. _ENDMDL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ENDMDL - """ # n_atoms = None : dummy keyword argument # (not used, but Writer() always provides n_atoms as the second argument) @@ -668,9 +681,6 @@ def _write_pdb_header(self): unitary values (cubic box with sides of 1 Å) if unit cell dimensions are not set. - .. _COMPND: http://www.wwpdb.org/documentation/file-format-content/format33/sect2.html#COMPND - .. _REMARKS: http://www.wwpdb.org/documentation/file-format-content/format33/remarks.html - .. versionchanged: 1.0.0 Fix writing of PDB file without unit cell dimensions (Issue #2679). If cell dimensions are not found, unitary values (cubic box with @@ -981,19 +991,21 @@ def _write_timestep(self, ts, multiframe=False): :class:`PDBWriter` is in single frame mode and no MODEL_ records are written. - .. _MODEL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#MODEL - .. _ENDMDL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ENDMDL - .. _NUMMDL: http://www.wwpdb.org/documentation/file-format-content/format32/sect2.html#NUMMDL - .. versionchanged:: 0.7.6 The *multiframe* keyword was added, which completely determines if - MODEL records are written. (Previously, this was decided based on the - underlying trajectory and only if ``len(traj) > 1`` would MODEL records - have been written.) + MODEL_ records are written. (Previously, this was decided based on + the underlying trajectory and only if ``len(traj) > 1`` would + MODEL records have been written.) .. versionchanged:: 1.0.0 - ChainID now comes from the last character of segid, as stated in the documentation. - An indexing issue meant it previously used the first charater (Issue #2224) + ChainID now comes from the last character of segid, as stated in + the documentation. An indexing issue meant it previously used the + first charater (Issue #2224) + + .. versionchanged:: 2.0.0 + When only :attr:`record_types` attribute is present, instead of + using ATOM_ for both ATOM_ and HETATM_, HETATM_ record + types are properly written out (Issue #1753). """ atoms = self.obj.atoms @@ -1028,6 +1040,7 @@ def get_attr(attrname, default): occupancies = get_attr('occupancies', 1.0) tempfactors = get_attr('tempfactors', 0.0) atomnames = get_attr('names', 'X') + record_types = get_attr('record_types', 'ATOM') for i, atom in enumerate(atoms): vals = {} @@ -1044,8 +1057,14 @@ def get_attr(attrname, default): vals['segID'] = segids[i][:4] vals['element'] = guess_atom_element(atomnames[i].strip())[:2] - # .. _ATOM: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ATOM - self.pdbfile.write(self.fmt['ATOM'].format(**vals)) + # record_type attribute, if exists, can be ATOM or HETATM + try: + self.pdbfile.write(self.fmt[record_types[i]].format(**vals)) + except KeyError: + errmsg = (f"Found {record_types[i]} for the record type, but " + f"only allowed types are ATOM or HETATM") + raise ValueError(errmsg) from None + if multiframe: self.ENDMDL() self.frames_written += 1 @@ -1053,8 +1072,6 @@ def get_attr(attrname, default): def HEADER(self, trajectory): """Write HEADER_ record. - .. _HEADER: http://www.wwpdb.org/documentation/file-format-content/format32/sect2.html#HEADER - .. versionchanged:: 0.20.0 Strip `trajectory.header` since it can be modified by the user and should be sanitized (Issue #2324) @@ -1067,23 +1084,16 @@ def HEADER(self, trajectory): def TITLE(self, *title): """Write TITLE_ record. - .. _TITLE: http://www.wwpdb.org/documentation/file-format-content/format32/sect2.html - """ line = " ".join(title) # TODO: should do continuation automatically self.pdbfile.write(self.fmt['TITLE'].format(line)) def REMARK(self, *remarks): - """Write generic REMARK_ record (without number). + """Write generic REMARKS_ record (without number). - Each string provided in *remarks* is written as a separate REMARK_ + Each string provided in *remarks* is written as a separate REMARKS_ record. - See also `REMARK (update)`_. - - .. _REMARK: http://www.wwpdb.org/documentation/file-format-content/format32/remarks1.html - .. _REMARK (update): http://www.wwpdb.org/documentation/file-format-content/format32/remarks2.html - """ for remark in remarks: self.pdbfile.write(self.fmt['REMARK'].format(remark)) @@ -1113,10 +1123,6 @@ def MODEL(self, modelnumber): standard (i.e., 4 digits). If frame numbers are larger than 9999, they will wrap around, i.e., 9998, 9999, 0, 1, 2, ... - - .. _MODEL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#MODEL - - .. versionchanged:: 0.19.0 Maximum model number is enforced. @@ -1131,8 +1137,6 @@ def END(self): method right before closing the file it is recommended to *not* call :meth:`~PDBWriter.END` explicitly. - .. _END: http://www.wwpdb.org/documentation/file-format-content/format32/sect11.html#END - """ if not self.has_END: # only write a single END record @@ -1142,16 +1146,12 @@ def END(self): def ENDMDL(self): """Write the ENDMDL_ record. - .. _ENDMDL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ENDMDL - """ self.pdbfile.write(self.fmt['ENDMDL']) def CONECT(self, conect): """Write CONECT_ record. - .. _CONECT: http://www.wwpdb.org/documentation/file-format-content/format32/sect10.html#CONECT - """ conect = ["{0:5d}".format(entry + 1) for entry in conect] conect = "".join(conect) @@ -1161,7 +1161,7 @@ def CONECT(self, conect): class ExtendedPDBReader(PDBReader): """PDBReader that reads a PDB-formatted file with five-digit residue numbers. - This reader does not conform to the `PDB 3.2 standard`_ because it allows + This reader does not conform to the `PDB 3.3 standard`_ because it allows five-digit residue numbers that may take up columns 23 to 27 (inclusive) instead of being confined to 23-26 (with column 27 being reserved for the insertion code in the PDB standard). PDB files in this format are written @@ -1180,7 +1180,7 @@ class ExtendedPDBReader(PDBReader): class MultiPDBWriter(PDBWriter): - """PDB writer that implements a subset of the `PDB 3.2 standard`_ . + """PDB writer that implements a subset of the `PDB 3.3 standard`_ . PDB format as used by NAMD/CHARMM: 4-letter resnames and segID, altLoc is written. @@ -1190,9 +1190,9 @@ class MultiPDBWriter(PDBWriter): and ENDMDL_ records). - .. _MODEL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#MODEL - .. _ENDMDL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ENDMDL - .. _CONECT: http://www.wwpdb.org/documentation/file-format-content/format32/sect10.html#CONECT + .. _MODEL: http://www.wwpdb.org/documentation/file-format-content/format33/sect9.html#MODEL + .. _ENDMDL: http://www.wwpdb.org/documentation/file-format-content/format33/sect9.html#ENDMDL + .. _CONECT: http://www.wwpdb.org/documentation/file-format-content/format33/sect10.html#CONECT See Also diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py index c337d54dbba..9cd3fbc5989 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_pdb.py +++ b/testsuite/MDAnalysisTests/coordinates/test_pdb.py @@ -20,12 +20,12 @@ # MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # -import pytest -from io import StringIO import os +from io import StringIO import MDAnalysis as mda import numpy as np +import pytest from MDAnalysisTests import make_Universe from MDAnalysisTests.coordinates.base import _SingleFrameReader from MDAnalysisTests.coordinates.reference import (RefAdKSmall, @@ -36,7 +36,8 @@ INC_PDB, PDB_xlserial, ALIGN, ENT, PDB_cm, PDB_cm_gz, PDB_cm_bz2, PDB_mc, PDB_mc_gz, PDB_mc_bz2, - PDB_CRYOEM_BOX, MMTF_NOCRYST) + PDB_CRYOEM_BOX, MMTF_NOCRYST, + PDB_HOLE, mol2_molecule) from numpy.testing import (assert_equal, assert_array_almost_equal, assert_almost_equal) @@ -188,6 +189,14 @@ def universe2(self): def universe3(self): return mda.Universe(PDB) + @pytest.fixture + def universe4(self): + return mda.Universe(PDB_HOLE) + + @pytest.fixture + def universe5(self): + return mda.Universe(mol2_molecule) + @pytest.fixture(params=[ [PDB_CRYOEM_BOX, np.zeros(6)], [MMTF_NOCRYST, None] @@ -221,7 +230,6 @@ def u_no_resids(self): def u_no_names(self): return make_Universe(['resids', 'resnames'], trajectory=True) - def test_writer(self, universe, outfile): "Test writing from a single frame PDB file to a PDB file." "" universe.atoms.write(outfile) @@ -446,6 +454,65 @@ def test_stringio_outofrange(self, universe3): with mda.coordinates.PDB.PDBWriter(outstring) as writer: writer.write(u.atoms) + def test_hetatm_written(self, universe4, tmpdir, outfile): + """ + Checks that HETATM record types are written. + """ + + u = universe4 + u_hetatms = u.select_atoms("resname ETA and record_type HETATM") + assert_equal(len(u_hetatms), 8) + + u.atoms.write(outfile) + written = mda.Universe(outfile) + written_atoms = written.select_atoms("resname ETA and " + "record_type HETATM") + + assert len(u_hetatms) == len(written_atoms), \ + "mismatched HETATM number" + assert_almost_equal(u_hetatms.atoms.positions, + written_atoms.atoms.positions) + + def test_default_atom_record_type_written(self, universe5, tmpdir, + outfile): + """ + Checks that ATOM record types are written when there is no + record_type attribute. + """ + + u = universe5 + + expected_msg = ("Found no information for attr: " + "'record_types' Using default value of 'ATOM'") + + with pytest.warns(UserWarning, match=expected_msg): + u.atoms.write(outfile) + + written = mda.Universe(outfile) + assert len(u.atoms) == len(written.atoms), \ + "mismatched number of atoms" + + atms = written.select_atoms("record_type ATOM") + assert len(atms.atoms) == len(u.atoms), \ + "mismatched ATOM number" + + hetatms = written.select_atoms("record_type HETATM") + assert len(hetatms.atoms) == 0, "mismatched HETATM number" + + def test_abnormal_record_type(self, universe5, tmpdir, outfile): + """ + Checks whether KeyError is raised when record type is + neither ATOM or HETATM. + """ + u = universe5 + u.add_TopologyAttr('record_type', ['ABNORM']*len(u.atoms)) + + expected_msg = ("Found ABNORM for the record type, but only " + "allowed types are ATOM or HETATM") + + with pytest.raises(ValueError, match=expected_msg): + u.atoms.write(outfile) + class TestMultiPDBReader(object): @staticmethod