From e708aa760a4d872f6cc3691dbd6185da45909a89 Mon Sep 17 00:00:00 2001 From: Mieczyslaw Torchala Date: Thu, 30 Jul 2020 18:33:42 +0100 Subject: [PATCH 01/14] added HETATM fmt and conditional --- package/MDAnalysis/coordinates/PDB.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py index fa1fac453c3..1c48fba39bc 100644 --- a/package/MDAnalysis/coordinates/PDB.py +++ b/package/MDAnalysis/coordinates/PDB.py @@ -528,6 +528,11 @@ class PDBWriter(base.WriterBase): "{chainID:1s}{resSeq:4d}{iCode:1s}" " {pos[0]:8.3f}{pos[1]:8.3f}{pos[2]:8.3f}{occupancy:6.2f}" "{tempFactor:6.2f} {segID:<4s}{element:>2s}\n"), + 'HETATM': ( + "HETATM{serial:5d} {name:<4s}{altLoc:<1s}{resName:<4s}" + "{chainID:1s}{resSeq:4d}{iCode:1s}" + " {pos[0]:8.3f}{pos[1]:8.3f}{pos[2]:8.3f}{occupancy:6.2f}" + "{tempFactor:6.2f} {segID:<4s}{element:>2s}\n"), 'REMARK': "REMARK {0}\n", 'COMPND': "COMPND {0}\n", 'HEADER': "HEADER {0}\n", @@ -1044,8 +1049,11 @@ def get_attr(attrname, default): vals['segID'] = segids[i][:4] vals['element'] = guess_atom_element(atomnames[i].strip())[:2] - # .. _ATOM: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ATOM - self.pdbfile.write(self.fmt['ATOM'].format(**vals)) + if atom.record_type == 'ATOM': + # .. _ATOM: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ATOM + self.pdbfile.write(self.fmt['ATOM'].format(**vals)) + elif atom.record_type == 'HETATM': + self.pdbfile.write(self.fmt['HETATM'].format(**vals)) if multiframe: self.ENDMDL() self.frames_written += 1 From 884d613a11cce993c8adcdde6481313f5be7ef50 Mon Sep 17 00:00:00 2001 From: Mieczyslaw Torchala Date: Thu, 30 Jul 2020 19:35:57 +0100 Subject: [PATCH 02/14] fix no attribute case --- package/MDAnalysis/coordinates/PDB.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py index 1c48fba39bc..326e979527e 100644 --- a/package/MDAnalysis/coordinates/PDB.py +++ b/package/MDAnalysis/coordinates/PDB.py @@ -1034,6 +1034,7 @@ def get_attr(attrname, default): tempfactors = get_attr('tempfactors', 0.0) atomnames = get_attr('names', 'X') + no_record_type = False for i, atom in enumerate(atoms): vals = {} vals['serial'] = util.ltruncate_int(i + 1, 5) # check for overflow here? @@ -1049,11 +1050,21 @@ def get_attr(attrname, default): vals['segID'] = segids[i][:4] vals['element'] = guess_atom_element(atomnames[i].strip())[:2] - if atom.record_type == 'ATOM': + if hasattr(atom, 'record_type'): + if atom.record_type == 'HETATM': + # .. _HETATM: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#HETATM + self.pdbfile.write(self.fmt['HETATM'].format(**vals)) + else: + # .. _ATOM: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ATOM + self.pdbfile.write(self.fmt['ATOM'].format(**vals)) + else: # .. _ATOM: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ATOM + no_record_type = True self.pdbfile.write(self.fmt['ATOM'].format(**vals)) - elif atom.record_type == 'HETATM': - self.pdbfile.write(self.fmt['HETATM'].format(**vals)) + + if no_record_type: + warnings.warn("PDBWriter: No record type found for an atom - HETATM may be written out as ATOM") + if multiframe: self.ENDMDL() self.frames_written += 1 From 46115cec1c1bb437d6ddd59e21ca92e83c55f1c8 Mon Sep 17 00:00:00 2001 From: Mieczyslaw Torchala Date: Sun, 2 Aug 2020 23:32:25 +0100 Subject: [PATCH 03/14] address reviewer's comments --- package/AUTHORS | 1 + package/CHANGELOG | 2 + package/MDAnalysis/coordinates/PDB.py | 33 +++++---- .../MDAnalysisTests/coordinates/test_pdb.py | 71 ++++++++++++++++++- 4 files changed, 89 insertions(+), 18 deletions(-) diff --git a/package/AUTHORS b/package/AUTHORS index 0d729be658a..76012d0df45 100644 --- a/package/AUTHORS +++ b/package/AUTHORS @@ -147,6 +147,7 @@ Chronological list of authors - Andrea Rizzi - William Glass - Marcello Sega + - Mieczyslaw Torchala External code ------------- diff --git a/package/CHANGELOG b/package/CHANGELOG index 6440e9cef5a..3f729659faa 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -19,6 +19,8 @@ The rules for this file: * 2.0.0 Fixes + * Instead of using ATOM for both ATOM and HETATM, HETATM record type + keyword is properly written out by PDB writer (Issue #1753, PR #2880) * Bond attribute is no longer set if PDB file contains no CONECT records (Issue #2832) * ResidueAttrs now have Atom as a target class by default; ICodes and diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py index 326e979527e..8e291267bd3 100644 --- a/package/MDAnalysis/coordinates/PDB.py +++ b/package/MDAnalysis/coordinates/PDB.py @@ -475,6 +475,8 @@ class PDBWriter(base.WriterBase): .. _MODEL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#MODEL .. _ENDMDL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ENDMDL .. _CONECT: http://www.wwpdb.org/documentation/file-format-content/format32/sect10.html#CONECT + .. _ATOM: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ATOM + .. _HETATM: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#HETATM Note @@ -493,6 +495,10 @@ class PDBWriter(base.WriterBase): are not set (:code:`None` or :code:`np.zeros(6)`, see Issue #2698). + When atom's record_type attribute is present (i.e. Universe object was + created by loading a PDB file), ATOM and HETATM record type keywords + are written out accordingly. + See Also -------- This class is identical to :class:`MultiPDBWriter` with the one @@ -1000,6 +1006,10 @@ def _write_timestep(self, ts, multiframe=False): ChainID now comes from the last character of segid, as stated in the documentation. An indexing issue meant it previously used the first charater (Issue #2224) + .. versionchanged:: 2.0.0 + when only record_type attribute is present, instead of using ATOM for both ATOM + and HETATM, HETATM record type keyword is properly written out (Issue #1753) + """ atoms = self.obj.atoms pos = atoms.positions @@ -1033,8 +1043,8 @@ def get_attr(attrname, default): occupancies = get_attr('occupancies', 1.0) tempfactors = get_attr('tempfactors', 0.0) atomnames = get_attr('names', 'X') + record_types = get_attr('record_types', 'ATOM') - no_record_type = False for i, atom in enumerate(atoms): vals = {} vals['serial'] = util.ltruncate_int(i + 1, 5) # check for overflow here? @@ -1050,20 +1060,13 @@ def get_attr(attrname, default): vals['segID'] = segids[i][:4] vals['element'] = guess_atom_element(atomnames[i].strip())[:2] - if hasattr(atom, 'record_type'): - if atom.record_type == 'HETATM': - # .. _HETATM: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#HETATM - self.pdbfile.write(self.fmt['HETATM'].format(**vals)) - else: - # .. _ATOM: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ATOM - self.pdbfile.write(self.fmt['ATOM'].format(**vals)) - else: - # .. _ATOM: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ATOM - no_record_type = True - self.pdbfile.write(self.fmt['ATOM'].format(**vals)) - - if no_record_type: - warnings.warn("PDBWriter: No record type found for an atom - HETATM may be written out as ATOM") + # record_type attribute, if exists, can be ATOM or HETATM + try: + self.pdbfile.write(self.fmt[record_types[i]].format(**vals)) + except KeyError: + warnings.warn("Found '{}' for record type, but allowed types " + "are ATOM or HETATM".format(record_types[i])) + raise ValueError if multiframe: self.ENDMDL() diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py index c337d54dbba..96e6e0bdea5 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_pdb.py +++ b/testsuite/MDAnalysisTests/coordinates/test_pdb.py @@ -20,12 +20,12 @@ # MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # -import pytest -from io import StringIO import os +from io import StringIO import MDAnalysis as mda import numpy as np +import pytest from MDAnalysisTests import make_Universe from MDAnalysisTests.coordinates.base import _SingleFrameReader from MDAnalysisTests.coordinates.reference import (RefAdKSmall, @@ -36,7 +36,8 @@ INC_PDB, PDB_xlserial, ALIGN, ENT, PDB_cm, PDB_cm_gz, PDB_cm_bz2, PDB_mc, PDB_mc_gz, PDB_mc_bz2, - PDB_CRYOEM_BOX, MMTF_NOCRYST) + PDB_CRYOEM_BOX, MMTF_NOCRYST, + PDB_HOLE, mol2_molecule) from numpy.testing import (assert_equal, assert_array_almost_equal, assert_almost_equal) @@ -188,6 +189,14 @@ def universe2(self): def universe3(self): return mda.Universe(PDB) + @pytest.fixture + def universe4(self): + return mda.Universe(PDB_HOLE, PDB_HOLE) + + @pytest.fixture + def universe5(self): + return mda.Universe(mol2_molecule, mol2_molecule) + @pytest.fixture(params=[ [PDB_CRYOEM_BOX, np.zeros(6)], [MMTF_NOCRYST, None] @@ -446,6 +455,62 @@ def test_stringio_outofrange(self, universe3): with mda.coordinates.PDB.PDBWriter(outstring) as writer: writer.write(u.atoms) + def test_hetatm_written(self, universe4, tmpdir): + """ + Checks that HETATM record types are written. + """ + + u = universe4 + u_atoms = u.select_atoms("resname ETA and record_type HETATM") + assert_equal(len(u_atoms), 8) + + outfile = str(tmpdir.join('test-hetatm.pdb')) + u.atoms.write(outfile) + written = mda.Universe(outfile, outfile) + written_atoms = written.select_atoms("resname ETA and " + "record_type HETATM") + + assert_equal(len(u_atoms), len(written_atoms)) + + def test_default_atom_record_type_written(self, universe5, tmpdir): + """ + Checks that ATOM record types are written when there is no + record_type attribute. + """ + + u = universe5 + outfile = str(tmpdir.join('test-mol2-to-pdb.pdb')) + + expected_msg = "Found no information for attr: " \ + "'record_types' Using default value of 'ATOM'" + with pytest.warns(UserWarning, match=expected_msg): + u.atoms.write(outfile) + + written = mda.Universe(outfile, outfile) + assert_equal(len(u.atoms), len(written.atoms)) + + atms = written.select_atoms("record_type ATOM") + assert_equal(len(atms.atoms), len(u.atoms)) + + hetatms = written.select_atoms("record_type HETATM") + assert_equal(len(hetatms.atoms), 0) + + def test_abnormal_record_type(self, universe5, tmpdir): + """ + Checks whether KeyError is raised when record type is + neither ATOM or HETATM. + """ + u = universe5 + u.add_TopologyAttr('record_type', ['ABNORM']*len(u.atoms)) + outfile = str(tmpdir.join('test-abnormal-record_type.pdb')) + + expected_msg = "Found 'ABNORM' for record type, but allowed " \ + "types are ATOM or HETATM" + + with pytest.raises(ValueError): + with pytest.warns(UserWarning, match=expected_msg): + u.atoms.write(outfile) + class TestMultiPDBReader(object): @staticmethod From 9e646ecea94f543476b564a237f18665794f3b65 Mon Sep 17 00:00:00 2001 From: Mieczyslaw Torchala Date: Wed, 5 Aug 2020 20:01:41 +0100 Subject: [PATCH 04/14] changes as suggested by the reviewer --- package/CHANGELOG | 3 ++- package/MDAnalysis/coordinates/PDB.py | 29 +++++++++++++++------------ 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 3f729659faa..e094b9108c9 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -14,7 +14,8 @@ The rules for this file: ------------------------------------------------------------------------------ ??/??/?? tylerjereddy, richardjgowers, IAlibay, hmacdope, orbeckst, cbouy, - lilyminium, daveminh, jbarnoud, yuxuanzhuang, VOD555, ianmkenney + lilyminium, daveminh, jbarnoud, yuxuanzhuang, VOD555, ianmkenney, + mieczyslaw * 2.0.0 diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py index 8e291267bd3..78caf526ceb 100644 --- a/package/MDAnalysis/coordinates/PDB.py +++ b/package/MDAnalysis/coordinates/PDB.py @@ -495,9 +495,10 @@ class PDBWriter(base.WriterBase): are not set (:code:`None` or :code:`np.zeros(6)`, see Issue #2698). - When atom's record_type attribute is present (i.e. Universe object was - created by loading a PDB file), ATOM and HETATM record type keywords - are written out accordingly. + When the record_types attribute is present (e.g. Universe object was + created by loading a PDB file), ATOM_ and HETATM_ record type keywords + are written out accordingly. Otherwise, the ATOM_ record type is the + default output. See Also -------- @@ -998,17 +999,19 @@ def _write_timestep(self, ts, multiframe=False): .. versionchanged:: 0.7.6 The *multiframe* keyword was added, which completely determines if - MODEL records are written. (Previously, this was decided based on the - underlying trajectory and only if ``len(traj) > 1`` would MODEL records - have been written.) + MODEL records are written. (Previously, this was decided based on + the underlying trajectory and only if ``len(traj) > 1`` would + MODEL records have been written.) .. versionchanged:: 1.0.0 - ChainID now comes from the last character of segid, as stated in the documentation. - An indexing issue meant it previously used the first charater (Issue #2224) + ChainID now comes from the last character of segid, as stated in + the documentation. An indexing issue meant it previously used the + first charater (Issue #2224) .. versionchanged:: 2.0.0 - when only record_type attribute is present, instead of using ATOM for both ATOM - and HETATM, HETATM record type keyword is properly written out (Issue #1753) + When only record_type attribute is present, instead of using ATOM + for both ATOM and HETATM, HETATM record types are properly written + out (Issue #1753). """ atoms = self.obj.atoms @@ -1064,9 +1067,9 @@ def get_attr(attrname, default): try: self.pdbfile.write(self.fmt[record_types[i]].format(**vals)) except KeyError: - warnings.warn("Found '{}' for record type, but allowed types " - "are ATOM or HETATM".format(record_types[i])) - raise ValueError + errmsg = (f"Found {record_types[i]} for the record type, but " + f"only allowed types are ATOM or HETATM") + raise ValueError(errmsg) from None if multiframe: self.ENDMDL() From 0705ae8bbdf3bac0fa06886392abf581cac5386a Mon Sep 17 00:00:00 2001 From: Mieczyslaw Torchala Date: Thu, 6 Aug 2020 14:41:50 +0100 Subject: [PATCH 05/14] additional comments addressed --- package/MDAnalysis/coordinates/PDB.py | 21 ++++---------- .../MDAnalysisTests/coordinates/test_pdb.py | 29 ++++++++++--------- 2 files changed, 21 insertions(+), 29 deletions(-) diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py index 78caf526ceb..58ce892a319 100644 --- a/package/MDAnalysis/coordinates/PDB.py +++ b/package/MDAnalysis/coordinates/PDB.py @@ -477,7 +477,9 @@ class PDBWriter(base.WriterBase): .. _CONECT: http://www.wwpdb.org/documentation/file-format-content/format32/sect10.html#CONECT .. _ATOM: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ATOM .. _HETATM: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#HETATM - + .. _NUMMDL: http://www.wwpdb.org/documentation/file-format-content/format32/sect2.html#NUMMDL + .. _COMPND: http://www.wwpdb.org/documentation/file-format-content/format33/sect2.html#COMPND + .. _REMARKS: http://www.wwpdb.org/documentation/file-format-content/format33/remarks.html Note ---- @@ -620,10 +622,6 @@ def __init__(self, filename, bonds="conect", n_atoms=None, start=0, step=1, multi frame PDB file in which frames are written as MODEL_ ... ENDMDL_ records. If ``None``, then the class default is chosen. [``None``] - .. _CONECT: http://www.wwpdb.org/documentation/file-format-content/format32/sect10.html#CONECT - .. _MODEL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#MODEL - .. _ENDMDL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ENDMDL - """ # n_atoms = None : dummy keyword argument # (not used, but Writer() always provides n_atoms as the second argument) @@ -680,9 +678,6 @@ def _write_pdb_header(self): unitary values (cubic box with sides of 1 Å) if unit cell dimensions are not set. - .. _COMPND: http://www.wwpdb.org/documentation/file-format-content/format33/sect2.html#COMPND - .. _REMARKS: http://www.wwpdb.org/documentation/file-format-content/format33/remarks.html - .. versionchanged: 1.0.0 Fix writing of PDB file without unit cell dimensions (Issue #2679). If cell dimensions are not found, unitary values (cubic box with @@ -993,13 +988,9 @@ def _write_timestep(self, ts, multiframe=False): :class:`PDBWriter` is in single frame mode and no MODEL_ records are written. - .. _MODEL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#MODEL - .. _ENDMDL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ENDMDL - .. _NUMMDL: http://www.wwpdb.org/documentation/file-format-content/format32/sect2.html#NUMMDL - .. versionchanged:: 0.7.6 The *multiframe* keyword was added, which completely determines if - MODEL records are written. (Previously, this was decided based on + MODEL_ records are written. (Previously, this was decided based on the underlying trajectory and only if ``len(traj) > 1`` would MODEL records have been written.) @@ -1009,8 +1000,8 @@ def _write_timestep(self, ts, multiframe=False): first charater (Issue #2224) .. versionchanged:: 2.0.0 - When only record_type attribute is present, instead of using ATOM - for both ATOM and HETATM, HETATM record types are properly written + When only record_type attribute is present, instead of using ATOM_ + for both ATOM_ and HETATM_, HETATM_ record types are properly written out (Issue #1753). """ diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py index 96e6e0bdea5..3b27589b06a 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_pdb.py +++ b/testsuite/MDAnalysisTests/coordinates/test_pdb.py @@ -191,11 +191,11 @@ def universe3(self): @pytest.fixture def universe4(self): - return mda.Universe(PDB_HOLE, PDB_HOLE) + return mda.Universe(PDB_HOLE) @pytest.fixture def universe5(self): - return mda.Universe(mol2_molecule, mol2_molecule) + return mda.Universe(mol2_molecule) @pytest.fixture(params=[ [PDB_CRYOEM_BOX, np.zeros(6)], @@ -461,16 +461,17 @@ def test_hetatm_written(self, universe4, tmpdir): """ u = universe4 - u_atoms = u.select_atoms("resname ETA and record_type HETATM") - assert_equal(len(u_atoms), 8) + u_hetatms = u.select_atoms("resname ETA and record_type HETATM") + assert_equal(len(u_hetatms), 8) outfile = str(tmpdir.join('test-hetatm.pdb')) u.atoms.write(outfile) - written = mda.Universe(outfile, outfile) + written = mda.Universe(outfile) written_atoms = written.select_atoms("resname ETA and " "record_type HETATM") - assert_equal(len(u_atoms), len(written_atoms)) + assert len(u_hetatms) == len(written_atoms), "mismatched HETATM number" + assert_almost_equal(u_hetatms.atoms.positions, written_atoms.atoms.positions) def test_default_atom_record_type_written(self, universe5, tmpdir): """ @@ -481,19 +482,19 @@ def test_default_atom_record_type_written(self, universe5, tmpdir): u = universe5 outfile = str(tmpdir.join('test-mol2-to-pdb.pdb')) - expected_msg = "Found no information for attr: " \ - "'record_types' Using default value of 'ATOM'" + expected_msg = ("Found no information for attr: " + "'record_types' Using default value of 'ATOM'") with pytest.warns(UserWarning, match=expected_msg): u.atoms.write(outfile) - written = mda.Universe(outfile, outfile) - assert_equal(len(u.atoms), len(written.atoms)) + written = mda.Universe(outfile) + assert len(u.atoms) == len(written.atoms), "mismatched number of atoms" atms = written.select_atoms("record_type ATOM") - assert_equal(len(atms.atoms), len(u.atoms)) + assert len(atms.atoms) == len(u.atoms), "mismatched ATOM number" hetatms = written.select_atoms("record_type HETATM") - assert_equal(len(hetatms.atoms), 0) + assert len(hetatms.atoms) == 0, "mismatched HETATM number" def test_abnormal_record_type(self, universe5, tmpdir): """ @@ -504,8 +505,8 @@ def test_abnormal_record_type(self, universe5, tmpdir): u.add_TopologyAttr('record_type', ['ABNORM']*len(u.atoms)) outfile = str(tmpdir.join('test-abnormal-record_type.pdb')) - expected_msg = "Found 'ABNORM' for record type, but allowed " \ - "types are ATOM or HETATM" + expected_msg = ("Found 'ABNORM' for record type, but allowed " + "types are ATOM or HETATM") with pytest.raises(ValueError): with pytest.warns(UserWarning, match=expected_msg): From 09278fcef07a2b8ba6c32e0302ea072a11e54cdd Mon Sep 17 00:00:00 2001 From: Mieczyslaw Torchala Date: Thu, 6 Aug 2020 15:00:55 +0100 Subject: [PATCH 06/14] use outfile fixture --- testsuite/MDAnalysisTests/coordinates/test_pdb.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py index 3b27589b06a..abfc8900cb8 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_pdb.py +++ b/testsuite/MDAnalysisTests/coordinates/test_pdb.py @@ -230,7 +230,6 @@ def u_no_resids(self): def u_no_names(self): return make_Universe(['resids', 'resnames'], trajectory=True) - def test_writer(self, universe, outfile): "Test writing from a single frame PDB file to a PDB file." "" universe.atoms.write(outfile) @@ -455,7 +454,7 @@ def test_stringio_outofrange(self, universe3): with mda.coordinates.PDB.PDBWriter(outstring) as writer: writer.write(u.atoms) - def test_hetatm_written(self, universe4, tmpdir): + def test_hetatm_written(self, universe4, tmpdir, outfile): """ Checks that HETATM record types are written. """ @@ -464,7 +463,6 @@ def test_hetatm_written(self, universe4, tmpdir): u_hetatms = u.select_atoms("resname ETA and record_type HETATM") assert_equal(len(u_hetatms), 8) - outfile = str(tmpdir.join('test-hetatm.pdb')) u.atoms.write(outfile) written = mda.Universe(outfile) written_atoms = written.select_atoms("resname ETA and " @@ -473,17 +471,17 @@ def test_hetatm_written(self, universe4, tmpdir): assert len(u_hetatms) == len(written_atoms), "mismatched HETATM number" assert_almost_equal(u_hetatms.atoms.positions, written_atoms.atoms.positions) - def test_default_atom_record_type_written(self, universe5, tmpdir): + def test_default_atom_record_type_written(self, universe5, tmpdir, outfile): """ Checks that ATOM record types are written when there is no record_type attribute. """ u = universe5 - outfile = str(tmpdir.join('test-mol2-to-pdb.pdb')) expected_msg = ("Found no information for attr: " "'record_types' Using default value of 'ATOM'") + with pytest.warns(UserWarning, match=expected_msg): u.atoms.write(outfile) @@ -496,14 +494,13 @@ def test_default_atom_record_type_written(self, universe5, tmpdir): hetatms = written.select_atoms("record_type HETATM") assert len(hetatms.atoms) == 0, "mismatched HETATM number" - def test_abnormal_record_type(self, universe5, tmpdir): + def test_abnormal_record_type(self, universe5, tmpdir, outfile): """ Checks whether KeyError is raised when record type is neither ATOM or HETATM. """ u = universe5 u.add_TopologyAttr('record_type', ['ABNORM']*len(u.atoms)) - outfile = str(tmpdir.join('test-abnormal-record_type.pdb')) expected_msg = ("Found 'ABNORM' for record type, but allowed " "types are ATOM or HETATM") From 44f63c394abe6b26c336df5c3e8add4dd673fe1d Mon Sep 17 00:00:00 2001 From: Mieczyslaw Torchala Date: Thu, 6 Aug 2020 15:18:06 +0100 Subject: [PATCH 07/14] links sorted out --- package/MDAnalysis/coordinates/PDB.py | 37 ++++++++------------------- 1 file changed, 11 insertions(+), 26 deletions(-) diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py index 58ce892a319..54fa0d44272 100644 --- a/package/MDAnalysis/coordinates/PDB.py +++ b/package/MDAnalysis/coordinates/PDB.py @@ -472,14 +472,18 @@ class PDBWriter(base.WriterBase): .. _`PDB 3.2 standard`: http://www.wwpdb.org/documentation/file-format-content/format32/v3.2.html - .. _MODEL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#MODEL - .. _ENDMDL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ENDMDL - .. _CONECT: http://www.wwpdb.org/documentation/file-format-content/format32/sect10.html#CONECT .. _ATOM: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ATOM + .. _COMPND: http://www.wwpdb.org/documentation/file-format-content/format32/sect2.html#COMPND + .. _CONECT: http://www.wwpdb.org/documentation/file-format-content/format32/sect10.html#CONECT + .. _CRYST1: http://www.wwpdb.org/documentation/file-format-content/format32/sect8.html#CRYST1 + .. _END: http://www.wwpdb.org/documentation/file-format-content/format32/sect11.html#END + .. _ENDMDL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ENDMDL + .. _HEADER: http://www.wwpdb.org/documentation/file-format-content/format32/sect2.html#HEADER .. _HETATM: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#HETATM + .. _MODEL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#MODEL .. _NUMMDL: http://www.wwpdb.org/documentation/file-format-content/format32/sect2.html#NUMMDL - .. _COMPND: http://www.wwpdb.org/documentation/file-format-content/format33/sect2.html#COMPND - .. _REMARKS: http://www.wwpdb.org/documentation/file-format-content/format33/remarks.html + .. _REMARKS: http://www.wwpdb.org/documentation/file-format-content/format32/remarks.html + .. _TITLE: http://www.wwpdb.org/documentation/file-format-content/format32/sect2.html#TITLE Note ---- @@ -1069,8 +1073,6 @@ def get_attr(attrname, default): def HEADER(self, trajectory): """Write HEADER_ record. - .. _HEADER: http://www.wwpdb.org/documentation/file-format-content/format32/sect2.html#HEADER - .. versionchanged:: 0.20.0 Strip `trajectory.header` since it can be modified by the user and should be sanitized (Issue #2324) @@ -1083,23 +1085,16 @@ def HEADER(self, trajectory): def TITLE(self, *title): """Write TITLE_ record. - .. _TITLE: http://www.wwpdb.org/documentation/file-format-content/format32/sect2.html - """ line = " ".join(title) # TODO: should do continuation automatically self.pdbfile.write(self.fmt['TITLE'].format(line)) def REMARK(self, *remarks): - """Write generic REMARK_ record (without number). + """Write generic REMARKS_ record (without number). - Each string provided in *remarks* is written as a separate REMARK_ + Each string provided in *remarks* is written as a separate REMARKS_ record. - See also `REMARK (update)`_. - - .. _REMARK: http://www.wwpdb.org/documentation/file-format-content/format32/remarks1.html - .. _REMARK (update): http://www.wwpdb.org/documentation/file-format-content/format32/remarks2.html - """ for remark in remarks: self.pdbfile.write(self.fmt['REMARK'].format(remark)) @@ -1129,10 +1124,6 @@ def MODEL(self, modelnumber): standard (i.e., 4 digits). If frame numbers are larger than 9999, they will wrap around, i.e., 9998, 9999, 0, 1, 2, ... - - .. _MODEL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#MODEL - - .. versionchanged:: 0.19.0 Maximum model number is enforced. @@ -1147,8 +1138,6 @@ def END(self): method right before closing the file it is recommended to *not* call :meth:`~PDBWriter.END` explicitly. - .. _END: http://www.wwpdb.org/documentation/file-format-content/format32/sect11.html#END - """ if not self.has_END: # only write a single END record @@ -1158,16 +1147,12 @@ def END(self): def ENDMDL(self): """Write the ENDMDL_ record. - .. _ENDMDL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ENDMDL - """ self.pdbfile.write(self.fmt['ENDMDL']) def CONECT(self, conect): """Write CONECT_ record. - .. _CONECT: http://www.wwpdb.org/documentation/file-format-content/format32/sect10.html#CONECT - """ conect = ["{0:5d}".format(entry + 1) for entry in conect] conect = "".join(conect) From 96fbfbcd86a3bcc8042f9e76af5e0e1ca0877be3 Mon Sep 17 00:00:00 2001 From: Mieczyslaw Torchala Date: Thu, 6 Aug 2020 15:39:21 +0100 Subject: [PATCH 08/14] remove duplicated _CRYST1 --- package/MDAnalysis/coordinates/PDB.py | 1 - 1 file changed, 1 deletion(-) diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py index 54fa0d44272..9d655bcdee3 100644 --- a/package/MDAnalysis/coordinates/PDB.py +++ b/package/MDAnalysis/coordinates/PDB.py @@ -475,7 +475,6 @@ class PDBWriter(base.WriterBase): .. _ATOM: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ATOM .. _COMPND: http://www.wwpdb.org/documentation/file-format-content/format32/sect2.html#COMPND .. _CONECT: http://www.wwpdb.org/documentation/file-format-content/format32/sect10.html#CONECT - .. _CRYST1: http://www.wwpdb.org/documentation/file-format-content/format32/sect8.html#CRYST1 .. _END: http://www.wwpdb.org/documentation/file-format-content/format32/sect11.html#END .. _ENDMDL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ENDMDL .. _HEADER: http://www.wwpdb.org/documentation/file-format-content/format32/sect2.html#HEADER From 31e58bbea538b7aff007b0a4e3ec6ac045928cf6 Mon Sep 17 00:00:00 2001 From: Mieczyslaw Torchala Date: Wed, 12 Aug 2020 15:21:50 +0100 Subject: [PATCH 09/14] address reviewer comments --- package/MDAnalysis/coordinates/PDB.py | 6 +++--- testsuite/MDAnalysisTests/coordinates/test_pdb.py | 15 +++++++++------ 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py index 9d655bcdee3..f51d2084dcc 100644 --- a/package/MDAnalysis/coordinates/PDB.py +++ b/package/MDAnalysis/coordinates/PDB.py @@ -1003,9 +1003,9 @@ def _write_timestep(self, ts, multiframe=False): first charater (Issue #2224) .. versionchanged:: 2.0.0 - When only record_type attribute is present, instead of using ATOM_ - for both ATOM_ and HETATM_, HETATM_ record types are properly written - out (Issue #1753). + When only record_type attribute is present, instead of + using ATOM_ for both ATOM_ and HETATM_, HETATM_ record + types are properly written out (Issue #1753). """ atoms = self.obj.atoms diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py index abfc8900cb8..a64c87ec80c 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_pdb.py +++ b/testsuite/MDAnalysisTests/coordinates/test_pdb.py @@ -468,8 +468,10 @@ def test_hetatm_written(self, universe4, tmpdir, outfile): written_atoms = written.select_atoms("resname ETA and " "record_type HETATM") - assert len(u_hetatms) == len(written_atoms), "mismatched HETATM number" - assert_almost_equal(u_hetatms.atoms.positions, written_atoms.atoms.positions) + assert len(u_hetatms) == len(written_atoms), \ + "mismatched HETATM number" + assert_almost_equal(u_hetatms.atoms.positions, + written_atoms.atoms.positions) def test_default_atom_record_type_written(self, universe5, tmpdir, outfile): """ @@ -486,10 +488,12 @@ def test_default_atom_record_type_written(self, universe5, tmpdir, outfile): u.atoms.write(outfile) written = mda.Universe(outfile) - assert len(u.atoms) == len(written.atoms), "mismatched number of atoms" + assert len(u.atoms) == len(written.atoms), \ + "mismatched number of atoms" atms = written.select_atoms("record_type ATOM") - assert len(atms.atoms) == len(u.atoms), "mismatched ATOM number" + assert len(atms.atoms) == len(u.atoms), \ + "mismatched ATOM number" hetatms = written.select_atoms("record_type HETATM") assert len(hetatms.atoms) == 0, "mismatched HETATM number" @@ -506,8 +510,7 @@ def test_abnormal_record_type(self, universe5, tmpdir, outfile): "types are ATOM or HETATM") with pytest.raises(ValueError): - with pytest.warns(UserWarning, match=expected_msg): - u.atoms.write(outfile) + u.atoms.write(outfile) class TestMultiPDBReader(object): From 908746f17bd6ed8daae79cf3995348c3281e4a34 Mon Sep 17 00:00:00 2001 From: Mieczyslaw Torchala Date: Wed, 12 Aug 2020 15:30:45 +0100 Subject: [PATCH 10/14] match exception message --- testsuite/MDAnalysisTests/coordinates/test_pdb.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py index a64c87ec80c..749ea56be83 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_pdb.py +++ b/testsuite/MDAnalysisTests/coordinates/test_pdb.py @@ -506,10 +506,10 @@ def test_abnormal_record_type(self, universe5, tmpdir, outfile): u = universe5 u.add_TopologyAttr('record_type', ['ABNORM']*len(u.atoms)) - expected_msg = ("Found 'ABNORM' for record type, but allowed " - "types are ATOM or HETATM") + expected_msg = ("Found ABNORM for the record type, but only " + "allowed types are ATOM or HETATM") - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=expected_msg): u.atoms.write(outfile) From d80665888b15157de570b5dbe31e3da30c354bde Mon Sep 17 00:00:00 2001 From: Mieczyslaw Torchala Date: Wed, 12 Aug 2020 15:32:00 +0100 Subject: [PATCH 11/14] line lenght fix --- testsuite/MDAnalysisTests/coordinates/test_pdb.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py index 749ea56be83..9cd3fbc5989 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_pdb.py +++ b/testsuite/MDAnalysisTests/coordinates/test_pdb.py @@ -473,7 +473,8 @@ def test_hetatm_written(self, universe4, tmpdir, outfile): assert_almost_equal(u_hetatms.atoms.positions, written_atoms.atoms.positions) - def test_default_atom_record_type_written(self, universe5, tmpdir, outfile): + def test_default_atom_record_type_written(self, universe5, tmpdir, + outfile): """ Checks that ATOM record types are written when there is no record_type attribute. From 88802f99ceac3675950d26bd3c72e09e72430ee9 Mon Sep 17 00:00:00 2001 From: Mieczyslaw Torchala Date: Fri, 14 Aug 2020 00:13:02 +0100 Subject: [PATCH 12/14] change PDB version to 3.3 --- package/MDAnalysis/coordinates/PDB.py | 52 +++++++++++++-------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py index f51d2084dcc..ae0ace7a73a 100644 --- a/package/MDAnalysis/coordinates/PDB.py +++ b/package/MDAnalysis/coordinates/PDB.py @@ -55,7 +55,7 @@ W.write(calphas) It is important to *always close the trajectory* when done because only at this -step is the final END_ record written, which is required by the `PDB 3.2 +step is the final END_ record written, which is required by the `PDB 3.3 standard`_. Using the writer as a context manager ensures that this always happens. @@ -65,7 +65,7 @@ A pure-Python implementation for PDB files commonly encountered in MD simulations comes under the names :class:`PDBReader` and :class:`PDBWriter`. It -only implements a subset of the `PDB 3.2 standard`_ and also allows some +only implements a subset of the `PDB 3.3 standard`_ and also allows some typical enhancements such as 4-letter resids (introduced by CHARMM/NAMD). The :class:`PDBReader` can read multi-frame PDB files and represents @@ -136,8 +136,8 @@ :inherited-members: -.. _`PDB 3.2 standard`: - http://www.wwpdb.org/documentation/file-format-content/format32/v3.2.html +.. _`PDB 3.3 standard`: + http://www.wwpdb.org/documentation/file-format-content/format33/v3.3.html """ from io import StringIO, BytesIO @@ -177,11 +177,11 @@ class PDBReader(base.ReaderBase): Reads multi-`MODEL`_ PDB files as trajectories. .. _PDB-formatted: - http://www.wwpdb.org/documentation/file-format-content/format32/v3.2.html + http://www.wwpdb.org/documentation/file-format-content/format33/v3.3.html .. _PDB coordinate section: - http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html + http://www.wwpdb.org/documentation/file-format-content/format33/sect9.html .. _MODEL: - http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#MODEL + http://www.wwpdb.org/documentation/file-format-content/format33/sect9.html#MODEL ============= ============ =========== ============================================= COLUMNS DATA TYPE FIELD DEFINITION @@ -459,7 +459,7 @@ def close(self): class PDBWriter(base.WriterBase): - """PDB writer that implements a subset of the `PDB 3.2 standard`_ . + """PDB writer that implements a subset of the `PDB 3.3 standard`_ . PDB format as used by NAMD/CHARMM: 4-letter resnames and segID are allowed, altLoc is written. @@ -470,19 +470,19 @@ class PDBWriter(base.WriterBase): PDB "movie" (multi frame mode, *multiframe* = ``True``), consisting of multiple models (using the MODEL_ and ENDMDL_ records). - .. _`PDB 3.2 standard`: - http://www.wwpdb.org/documentation/file-format-content/format32/v3.2.html - .. _ATOM: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ATOM - .. _COMPND: http://www.wwpdb.org/documentation/file-format-content/format32/sect2.html#COMPND - .. _CONECT: http://www.wwpdb.org/documentation/file-format-content/format32/sect10.html#CONECT - .. _END: http://www.wwpdb.org/documentation/file-format-content/format32/sect11.html#END - .. _ENDMDL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ENDMDL - .. _HEADER: http://www.wwpdb.org/documentation/file-format-content/format32/sect2.html#HEADER - .. _HETATM: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#HETATM - .. _MODEL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#MODEL - .. _NUMMDL: http://www.wwpdb.org/documentation/file-format-content/format32/sect2.html#NUMMDL - .. _REMARKS: http://www.wwpdb.org/documentation/file-format-content/format32/remarks.html - .. _TITLE: http://www.wwpdb.org/documentation/file-format-content/format32/sect2.html#TITLE + .. _`PDB 3.3 standard`: + http://www.wwpdb.org/documentation/file-format-content/format33/v3.3.html + .. _ATOM: http://www.wwpdb.org/documentation/file-format-content/format33/sect9.html#ATOM + .. _COMPND: http://www.wwpdb.org/documentation/file-format-content/format33/sect2.html#COMPND + .. _CONECT: http://www.wwpdb.org/documentation/file-format-content/format33/sect10.html#CONECT + .. _END: http://www.wwpdb.org/documentation/file-format-content/format33/sect11.html#END + .. _ENDMDL: http://www.wwpdb.org/documentation/file-format-content/format33/sect9.html#ENDMDL + .. _HEADER: http://www.wwpdb.org/documentation/file-format-content/format33/sect2.html#HEADER + .. _HETATM: http://www.wwpdb.org/documentation/file-format-content/format33/sect9.html#HETATM + .. _MODEL: http://www.wwpdb.org/documentation/file-format-content/format33/sect9.html#MODEL + .. _NUMMDL: http://www.wwpdb.org/documentation/file-format-content/format33/sect2.html#NUMMDL + .. _REMARKS: http://www.wwpdb.org/documentation/file-format-content/format33/remarks.html + .. _TITLE: http://www.wwpdb.org/documentation/file-format-content/format33/sect2.html#TITLE Note ---- @@ -1161,7 +1161,7 @@ def CONECT(self, conect): class ExtendedPDBReader(PDBReader): """PDBReader that reads a PDB-formatted file with five-digit residue numbers. - This reader does not conform to the `PDB 3.2 standard`_ because it allows + This reader does not conform to the `PDB 3.3 standard`_ because it allows five-digit residue numbers that may take up columns 23 to 27 (inclusive) instead of being confined to 23-26 (with column 27 being reserved for the insertion code in the PDB standard). PDB files in this format are written @@ -1180,7 +1180,7 @@ class ExtendedPDBReader(PDBReader): class MultiPDBWriter(PDBWriter): - """PDB writer that implements a subset of the `PDB 3.2 standard`_ . + """PDB writer that implements a subset of the `PDB 3.3 standard`_ . PDB format as used by NAMD/CHARMM: 4-letter resnames and segID, altLoc is written. @@ -1190,9 +1190,9 @@ class MultiPDBWriter(PDBWriter): and ENDMDL_ records). - .. _MODEL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#MODEL - .. _ENDMDL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ENDMDL - .. _CONECT: http://www.wwpdb.org/documentation/file-format-content/format32/sect10.html#CONECT + .. _MODEL: http://www.wwpdb.org/documentation/file-format-content/format33/sect9.html#MODEL + .. _ENDMDL: http://www.wwpdb.org/documentation/file-format-content/format33/sect9.html#ENDMDL + .. _CONECT: http://www.wwpdb.org/documentation/file-format-content/format33/sect10.html#CONECT See Also From 4b44ecbca650d9d060942d4873e59852cf20e064 Mon Sep 17 00:00:00 2001 From: Irfan Alibay Date: Fri, 14 Aug 2020 16:30:21 +0100 Subject: [PATCH 13/14] Issue 2906 --- package/CHANGELOG | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index efdbfd0b54f..ec2a2212112 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -21,8 +21,8 @@ The rules for this file: Fixes * Instead of using ATOM for both ATOM and HETATM, HETATM record type - keyword is properly written out by PDB writer. Change referenced - PDB standard version to 3.3. (Issue #1753, PR #2880). + keyword is properly written out by PDB writer (Issue #1753, PR #2880). + * Change referenced PDB standard version to 3.3 (Issue #2906). * Fixed reading in masses and charges from a hoomdxml file (Issue #2888, PR #2889) * Bond attribute is no longer set if PDB file contains no CONECT records From 6890a31eb16ead4dd85fcca1fe563e57f5e0adcd Mon Sep 17 00:00:00 2001 From: Mieczyslaw Torchala Date: Fri, 14 Aug 2020 21:28:28 +0100 Subject: [PATCH 14/14] reviewer's request --- package/MDAnalysis/coordinates/PDB.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py index ae0ace7a73a..8b90c1bda9e 100644 --- a/package/MDAnalysis/coordinates/PDB.py +++ b/package/MDAnalysis/coordinates/PDB.py @@ -500,10 +500,10 @@ class PDBWriter(base.WriterBase): are not set (:code:`None` or :code:`np.zeros(6)`, see Issue #2698). - When the record_types attribute is present (e.g. Universe object was - created by loading a PDB file), ATOM_ and HETATM_ record type keywords - are written out accordingly. Otherwise, the ATOM_ record type is the - default output. + When the :attr:`record_types` attribute is present (e.g. Universe object + was created by loading a PDB file), ATOM_ and HETATM_ record type + keywords are written out accordingly. Otherwise, the ATOM_ record type + is the default output. See Also -------- @@ -1003,7 +1003,7 @@ def _write_timestep(self, ts, multiframe=False): first charater (Issue #2224) .. versionchanged:: 2.0.0 - When only record_type attribute is present, instead of + When only :attr:`record_types` attribute is present, instead of using ATOM_ for both ATOM_ and HETATM_, HETATM_ record types are properly written out (Issue #1753).