Skip to content

Commit

Permalink
Merge pull request #1762 from MDAnalysis/issue-1753-add_recordtype
Browse files Browse the repository at this point in the history
added reading of record types
  • Loading branch information
jbarnoud authored Jan 29, 2018
2 parents 10ee7ff + 4de452f commit d9ef4ec
Show file tree
Hide file tree
Showing 9 changed files with 144 additions and 8 deletions.
6 changes: 5 additions & 1 deletion package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,14 @@ The rules for this file:
* release numbers follow "Semantic Versioning" http://semver.org

------------------------------------------------------------------------------
mm/dd/18
mm/dd/18 richardjgowers

* 0.17.1

Enhancements
* Added reading of record types (ATOM/HETATM) for PDB, PDBQT and PQR formats
(Issue #1753)

Fixes


Expand Down
32 changes: 32 additions & 0 deletions package/MDAnalysis/core/topologyattrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,38 @@ def _gen_initial_values(na, nr, ns):
return np.zeros(na)


class RecordTypes(AtomAttr):
"""For PDB-like formats, indicates if ATOM or HETATM
Defaults to 'ATOM'
"""
# internally encodes {True: 'ATOM', False: 'HETATM'}
attrname = 'record_types'
singular = 'record_type'
per_object = 'atom'

def __init__(self, values, guessed=False):
self.values = np.where(values == 'ATOM', True, False)
self._guessed = guessed

@staticmethod
def _gen_initial_values(na, nr, ns):
return np.array(['ATOM'] * na, dtype=object)

def get_atoms(self, atomgroup):
return np.where(self.values[atomgroup.ix], 'ATOM', 'HETATM')

@_check_length
def set_atoms(self, atomgroup, values):
self.values[atomgroup.ix] = np.where(values == 'ATOM', True, False)

def get_residues(self, rg):
return [self.get_atoms(r.atoms) for r in rg]

def get_segments(self, sg):
return [self.get_atoms(s.atoms) for s in sg]


class ChainIDs(AtomAttr):
"""ChainID per atom
Expand Down
11 changes: 8 additions & 3 deletions package/MDAnalysis/topology/PDBParser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
Expand Down Expand Up @@ -74,6 +73,7 @@
ICodes,
Masses,
Occupancies,
RecordTypes,
Resids,
Resnames,
Resnums,
Expand All @@ -96,6 +96,7 @@ class PDBParser(TopologyReaderBase):
- chainids
- bfactors
- occupancies
- record_types (ATOM/HETATM)
- resids
- resnames
- segids
Expand All @@ -108,8 +109,10 @@ class PDBParser(TopologyReaderBase):
:class:`MDAnalysis.coordinates.PDB.PDBReader`
.. versionadded:: 0.8
.. versionchanged:: 0.17.1
Added parsing of Record types
"""
format = ['PDB','ENT']
format = ['PDB', 'ENT']

def parse(self, **kwargs):
"""Parse atom information from PDB file
Expand All @@ -134,6 +137,7 @@ def _parseatoms(self):
"""Create the initial Topology object"""
resid_prev = 0 # resid looping hack

record_types = []
serials = []
names = []
altlocs = []
Expand All @@ -160,6 +164,7 @@ def _parseatoms(self):
if not line.startswith(('ATOM', 'HETATM')):
continue

record_types.append(line[:6].strip())
try:
serial = int(line[6:11])
except ValueError:
Expand Down Expand Up @@ -217,6 +222,7 @@ def _parseatoms(self):
(names, Atomnames, object),
(altlocs, AltLocs, object),
(chainids, ChainIDs, object),
(record_types, RecordTypes, object),
(serials, Atomids, np.int32),
(tempfactors, Tempfactors, np.float32),
(occupancies, Occupancies, np.float32),
Expand Down Expand Up @@ -343,4 +349,3 @@ def _parse_conect(conect):
bond_atoms = (int(conect[11 + i * 5: 16 + i * 5]) for i in
range(n_bond_atoms))
return atom_id, bond_atoms

8 changes: 8 additions & 0 deletions package/MDAnalysis/topology/PDBQTParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
Charges,
Masses,
Occupancies,
RecordTypes,
Resids,
Resnums,
Resnames,
Expand All @@ -89,6 +90,7 @@ class PDBQTParser(TopologyReaderBase):
- resnames
- chainIDs (becomes segid)
- resids
- record_types (ATOM/HETATM)
- icodes
- occupancies
- tempfactors
Expand All @@ -97,6 +99,9 @@ class PDBQTParser(TopologyReaderBase):
Guesses the following:
- elements
- masses
.. versionchanged:: 0.17.1
Added parsing of Record types
"""
format = 'PDBQT'

Expand All @@ -107,6 +112,7 @@ def parse(self, **kwargs):
-------
MDAnalysis Topology object
"""
record_types = []
serials = []
names = []
altlocs = []
Expand All @@ -124,6 +130,7 @@ def parse(self, **kwargs):
line = line.strip()
if not line.startswith(('ATOM', 'HETATM')):
continue
record_types.append(line[:6].strip())
serials.append(int(line[6:11]))
names.append(line[12:16].strip())
altlocs.append(line[16:17].strip())
Expand All @@ -142,6 +149,7 @@ def parse(self, **kwargs):

attrs = []
for attrlist, Attr, dtype in (
(record_types, RecordTypes, object),
(serials, Atomids, np.int32),
(names, Atomnames, object),
(altlocs, AltLocs, object),
Expand Down
8 changes: 8 additions & 0 deletions package/MDAnalysis/topology/PQRParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
ICodes,
Masses,
Radii,
RecordTypes,
Resids,
Resnums,
Resnames,
Expand All @@ -77,6 +78,7 @@ class PQRParser(TopologyReaderBase):
- Atomnames
- Charges
- Radii
- RecordTypes (ATOM/HETATM)
- Resids
- Resnames
- Segids
Expand All @@ -90,6 +92,9 @@ class PQRParser(TopologyReaderBase):
'SYSTEM' as the new segid).
.. versionchanged:: 0.16.1
Now reads insertion codes and splits into new residues around these
.. versionchanged:: 0.17.1
Added parsing of Record types
"""
format = 'PQR'

Expand All @@ -100,6 +105,7 @@ def parse(self, **kwargs):
-------
A MDAnalysis Topology object
"""
record_types = []
serials = []
names = []
resnames = []
Expand Down Expand Up @@ -131,6 +137,7 @@ def parse(self, **kwargs):
else:
icode = ''

record_types.append(recordName)
serials.append(serial)
names.append(name)
resnames.append(resName)
Expand All @@ -151,6 +158,7 @@ def parse(self, **kwargs):
attrs.append(Charges(np.array(charges, dtype=np.float32)))
attrs.append(Atomtypes(atomtypes, guessed=True))
attrs.append(Masses(masses, guessed=True))
attrs.append(RecordTypes(np.array(record_types, dtype=object)))
attrs.append(Radii(np.array(radii, dtype=np.float32)))

resids = np.array(resids, dtype=np.int32)
Expand Down
55 changes: 55 additions & 0 deletions testsuite/MDAnalysisTests/core/test_topologyattrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,3 +488,58 @@ def test_no_deprecation(self, universe, instruction):
"""
with no_deprecated_call():
exec(instruction) #pylint: disable=W0122


class TestRecordTypes(object):
def test_record_types_default(self):
u = make_Universe()

u.add_TopologyAttr('record_type')

assert u.atoms[0].record_type == 'ATOM'
assert_equal(u.atoms[:10].record_types, 'ATOM')

@pytest.fixture()
def rectype_uni(self):
# standard 125/25/5 universe
u = make_Universe()
u.add_TopologyAttr('record_type')
# first 25 atoms are ATOM (first 5 residues, first segment)
# 25 to 50th are HETATM (res 5:10, second segment)
# all after are ATOM
u.atoms[:25].record_types = 'ATOM'
u.atoms[25:50].record_types = 'HETATM'
u.atoms[50:].record_types = 'ATOM'

return u

def test_encoding(self, rectype_uni):
ag = rectype_uni.atoms[:10]

ag[0].record_type = 'ATOM'
ag[1:4].record_types = 'HETATM'

assert ag[0].record_type == 'ATOM'
assert ag[1].record_type == 'HETATM'

def test_residue_record_types(self, rectype_uni):
rt = rectype_uni.residues.record_types

assert isinstance(rt, list)
assert len(rt) == 25

# check return type explicitly
# some versions of numpy allow bool to str comparison
assert not rt[0].dtype == bool
assert (rt[0] == 'ATOM').all()
assert (rt[5] == 'HETATM').all()

def test_segment_record_types(self, rectype_uni):
rt = rectype_uni.segments.record_types

assert isinstance(rt, list)
assert len(rt) == 5

assert not rt[0].dtype == bool
assert (rt[0] == 'ATOM').all()
assert (rt[1] == 'HETATM').all()
16 changes: 14 additions & 2 deletions testsuite/MDAnalysisTests/topology/test_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from MDAnalysisTests.topology.base import ParserBase
from MDAnalysisTests.datafiles import (
PDB,
PDB_HOLE,
PDB_small,
PDB_conect,
PDB_conect2TER,
Expand All @@ -44,7 +45,7 @@ class TestPDBParser(ParserBase):
"""This one has neither chainids or segids"""
parser = mda.topology.PDBParser.PDBParser
ref_filename = PDB
expected_attrs = ['ids', 'names', 'resids', 'resnames']
expected_attrs = ['ids', 'names', 'record_types', 'resids', 'resnames']
guessed_attrs = ['types', 'masses']
expected_n_atoms = 47681
expected_n_residues = 11302
Expand All @@ -55,7 +56,8 @@ class TestPDBParserSegids(ParserBase):
"""Has segids"""
parser = mda.topology.PDBParser.PDBParser
ref_filename = PDB_small
expected_attrs = ['ids', 'names', 'resids', 'resnames', 'segids']
expected_attrs = ['ids', 'names', 'record_types', 'resids', 'resnames',
'segids']
guessed_attrs = ['types', 'masses']
expected_n_atoms = 3341
expected_n_residues = 214
Expand Down Expand Up @@ -148,3 +150,13 @@ def test_sameresid_diffresname():
for i, (resid, resname) in enumerate(zip(resids, resnames)):
assert top.resids.values[i] == resid
assert top.resnames.values[i] == resname


def test_PDB_record_types():
u = mda.Universe(PDB_HOLE)

assert u.atoms[0].record_type == 'ATOM'
assert u.atoms[132].record_type == 'HETATM'

assert_equal(u.atoms[10:20].record_types, 'ATOM')
assert_equal(u.atoms[271:].record_types, 'HETATM')
2 changes: 1 addition & 1 deletion testsuite/MDAnalysisTests/topology/test_pdbqt.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class TestPDBQT(ParserBase):
ref_filename = PDBQT_input
expected_attrs = [
'ids', 'names', 'charges', 'types', 'altLocs', 'resids', 'resnames',
'segids'
'segids', 'record_types'
]
guessed_attrs = ['masses']
expected_n_atoms = 1805
Expand Down
14 changes: 13 additions & 1 deletion testsuite/MDAnalysisTests/topology/test_pqr.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
#
from __future__ import absolute_import

from numpy.testing import assert_equal

import MDAnalysis as mda

from MDAnalysisTests.topology.base import ParserBase
Expand All @@ -33,7 +35,7 @@
class TestPQRParser(ParserBase):
parser = mda.topology.PQRParser.PQRParser
ref_filename = PQR
expected_attrs = ['ids', 'names', 'charges', 'radii',
expected_attrs = ['ids', 'names', 'charges', 'radii', 'record_types',
'resids', 'resnames', 'icodes',
'segids']
guessed_attrs = ['masses', 'types']
Expand All @@ -55,3 +57,13 @@ class TestPQRParser2(TestPQRParser):
ref_filename = PQR_icodes
expected_n_atoms = 5313
expected_n_residues = 474


def test_record_types():
u = mda.Universe(PQR_icodes)

assert u.atoms[4052].record_type == 'ATOM'
assert u.atoms[4053].record_type == 'HETATM'

assert_equal(u.atoms[:10].record_types, 'ATOM')
assert_equal(u.atoms[4060:4070].record_types, 'HETATM')

0 comments on commit d9ef4ec

Please sign in to comment.