From 9fde9d1ef85190d7a2210d0a12e4126b477b35e4 Mon Sep 17 00:00:00 2001 From: Henrik Jaeger Date: Wed, 7 Apr 2021 08:35:22 +0200 Subject: [PATCH 1/2] Parse atomtypes section of itp files (#3220) Fixes #3199 --- package/CHANGELOG | 2 + package/MDAnalysis/topology/ITPParser.py | 82 +++++++++++++++---- testsuite/MDAnalysisTests/data/atomtypes.itp | 30 +++++++ testsuite/MDAnalysisTests/datafiles.py | 3 + .../MDAnalysisTests/topology/test_itp.py | 45 ++++++++-- 5 files changed, 137 insertions(+), 25 deletions(-) create mode 100644 testsuite/MDAnalysisTests/data/atomtypes.itp diff --git a/package/CHANGELOG b/package/CHANGELOG index cd922bb8401..76351c969e2 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -91,6 +91,8 @@ Fixes * Fix syntax warning over comparison of literals using is (Issue #3066) Enhancements + * ITPParser now reads [ atomtypes ] sections in ITP files, used for charges + and masses not defined in the [ atoms ] sections * Add `set_dimensions` transformation class for setting constant box dimensions for all timesteps in trajectory (Issue #2691) * Added a ValueError raised when not given a gridcenter while diff --git a/package/MDAnalysis/topology/ITPParser.py b/package/MDAnalysis/topology/ITPParser.py index e2962632781..952ee866d69 100644 --- a/package/MDAnalysis/topology/ITPParser.py +++ b/package/MDAnalysis/topology/ITPParser.py @@ -35,9 +35,12 @@ 1 molecule of each defined ``moleculetype``. If a ``[ molecules ]`` section is present, ``infer_system`` is ignored. -If files are included with the `#include` directive, they will also be read. If they are not in the working directory, ITPParser will look for them in the ``include_dir`` directory. By default, this is set to ``include_dir="/usr/local/gromacs/share/gromacs/top/"``. -Variables can be defined with the `#define` directive in files, or by passing in -:ref:`keyword arguments `. +If files are included with the `#include` directive, they will also be read. +If they are not in the working directory, ITPParser will look for them in the +``include_dir`` directory. By default, this is set to +``include_dir="/usr/local/gromacs/share/gromacs/top/"``. +Variables can be defined with the `#define` directive in files, or by passing +in :ref:`keyword arguments `. Examples -------- @@ -151,11 +154,13 @@ ) from ..core.topology import Topology + class Chargegroups(AtomAttr): """The charge group for each Atom""" attrname = 'chargegroups' singular = 'chargegroup' + class GmxTopIterator: """ Iterate over the lines of a TOP/ITP file and its included files @@ -324,7 +329,7 @@ def parse_atoms(self, line): for lst in self.atom_order: try: lst.append(values.pop(0)) - except IndexError: # ran out of values + except IndexError: # ran out of values lst.append('') def parse_bonds(self, line): @@ -333,7 +338,7 @@ def parse_bonds(self, line): def parse_angles(self, line): self.add_param(line, self.angles, n_funct=3, - funct_values=(1, 2, 3, 4, 5, 6, 8, 10)) + funct_values=(1, 2, 3, 4, 5, 6, 8, 10)) def parse_dihedrals(self, line): dih = self.add_param(line, self.dihedrals, n_funct=4, @@ -374,7 +379,6 @@ def resolve_residue_attrs(self): self.resolved_residue_attrs = True - def shift_indices(self, atomid=0, resid=0, molnum=0, cgnr=0, n_res=0, n_atoms=0): """ Get attributes ready for adding onto a larger topology. @@ -419,9 +423,6 @@ def shift_indices(self, atomid=0, resid=0, molnum=0, cgnr=0, n_res=0, n_atoms=0) new_params.append(new) return atom_order, new_params, molnums, self.moltypes, residx - - - def add_param(self, line, container, n_funct=2, funct_values=[]): """Add defined GROMACS directive lines, only if the funct is in ``funct_values``""" @@ -444,7 +445,6 @@ def index_ids(self, values): return tuple(map(self.ids.index, values)) - class ITPParser(TopologyReaderBase): """Read topology information from a GROMACS ITP_ or TOP_ file. @@ -465,7 +465,6 @@ class ITPParser(TopologyReaderBase): - dihedrals - impropers - .. _ITP: http://manual.gromacs.org/current/reference-manual/topologies/topology-file-formats.html#molecule-itp-file .. _TOP: http://manual.gromacs.org/current/reference-manual/file-formats.html#top """ @@ -495,6 +494,7 @@ def parse(self, include_dir='/usr/local/gromacs/share/gromacs/top/', MDAnalysis *Topology* object """ + self.atomtypes = {} self.molecules = {} self._molecules = [] # for order self.current_mol = None @@ -508,7 +508,10 @@ def parse(self, include_dir='/usr/local/gromacs/share/gromacs/top/', if '[' in line and ']' in line: section = line.split('[')[1].split(']')[0].strip() - if section == 'moleculetype': + if section == 'atomtypes': + self.parser = self.parse_atomtypes + + elif section == 'moleculetype': self.parser = self.parse_moleculetype elif section == 'molecules': @@ -527,7 +530,33 @@ def parse(self, include_dir='/usr/local/gromacs/share/gromacs/top/', self.system_molecules = [x.name for x in self._molecules] self.build_system() - + + self.types = np.array(self.types) + self.charges = np.array(self.charges) + self.masses = np.array(self.masses) + + if not all(self.charges): + empty = self.charges == '' + self.charges[empty] = [ + ( + self.atomtypes.get(x)["charge"] + if x in self.atomtypes.keys() + else '' + ) + for x in self.types[empty] + ] + + if not all(self.masses): + empty = self.masses == '' + self.masses[empty] = [ + ( + self.atomtypes.get(x)["mass"] + if x in self.atomtypes.keys() + else '' + ) + for x in self.types[empty] + ] + attrs = [] # atom stuff for vals, Attr, dtype in ( @@ -539,12 +568,16 @@ def parse(self, include_dir='/usr/local/gromacs/share/gromacs/top/', ): if all(vals): attrs.append(Attr(np.array(vals, dtype=dtype))) - + if not all(self.masses): - masses = guessers.guess_masses(self.types) - attrs.append(Masses(masses, guessed=True)) + empty = self.masses == '' + self.masses[empty] = guessers.guess_masses( + guessers.guess_types(self.types)[empty]) + attrs.append(Masses(np.array(self.masses, dtype=np.float64), + guessed=True)) else: - attrs.append(Masses(np.array(self.masses, dtype=np.float64))) + attrs.append(Masses(np.array(self.masses, dtype=np.float64), + guessed=False)) # residue stuff resids = np.array(self.resids, dtype=np.int32) @@ -586,10 +619,23 @@ def parse(self, include_dir='/usr/local/gromacs/share/gromacs/top/', return top - def _pass(self, line): pass + def parse_atomtypes(self, line): + keys = ['type_bonded', 'atomic_number', 'mass', 'charge', 'p_type'] + fields = line.split() + if len(fields[5]) == 1 and fields[5].isalpha(): + values = fields[1:6] + elif len(fields[3]) == 1 and fields[3].isalpha(): + values = '', '', fields[1], fields[2], fields[3] + elif len(fields[4]) == 1 and fields[4].isalpha(): + if fields[1][0].isalpha(): + values = fields[1], '', fields[2], fields[3], fields[4] + else: + values = '', fields[1], fields[2], fields[3], fields[4] + self.atomtypes[fields[0]] = dict(zip(keys, values)) + def parse_moleculetype(self, line): name = line.split()[0] self.current_mol = self.molecules[name] = Molecule(name) diff --git a/testsuite/MDAnalysisTests/data/atomtypes.itp b/testsuite/MDAnalysisTests/data/atomtypes.itp new file mode 100644 index 00000000000..301ac1d9e06 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/atomtypes.itp @@ -0,0 +1,30 @@ +[ defaults ] +; nbfunc comb-rule gen-pairs fudgeLJ fudgeQQ + 1 2 no 1.0 1.0 + + +[ atomtypes ] +;All possible variations of atomtypes lines +;type b_type at.num mass charge ptype sigma epsilon +A 10.086 2.000 A 0.356359487256 0.209200 +B BB 13 20.989 -3.000 A 0.349832094823 0.202300 +C 15 25.935 1.000 A 0.320190238903 0.206840 +D BD 12.129 -1.000 A 0.310128943840 0.201950 + + +[ moleculetype ] +; molname nrexcl +TEST 1 + +[ atoms ] +;nr type resnr residue atom cgnr charge mass +1 A 1 SURF A 1 4.000 8.000; +2 B 1 SURF B 2 1.1 ; +3 B 1 SURF B 3 ; +4 H 1 SURF H 4 1.000; + +[ system ] +Testing + +[ molecules ] +TEST 1 diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py index 2d443df3b0f..d647f435169 100644 --- a/testsuite/MDAnalysisTests/datafiles.py +++ b/testsuite/MDAnalysisTests/datafiles.py @@ -187,6 +187,7 @@ "GRO_huge_box", # for testing gro parser with hige box sizes "ITP", # for GROMACS generated itps "ITP_nomass", # for ATB generated itps + "ITP_atomtypes", # atom definitions to check atomtyes section parsing "NAMDBIN", # for NAMD generated binary file "ITP_edited", # to check different directives are read properly "ITP_tip5p", # tip5p water from opls-aa, edited with additional keywords @@ -542,9 +543,11 @@ ITP = resource_filename(__name__, 'data/gromacs_ala10.itp') ITP_nomass = resource_filename(__name__, 'data/itp_nomass.itp') +ITP_atomtypes = resource_filename(__name__, 'data/atomtypes.itp') ITP_edited = resource_filename(__name__, 'data/edited_itp.itp') ITP_tip5p = resource_filename(__name__, "data/tip5p.itp") ITP_spce = resource_filename(__name__, 'data/spce.itp') + GMX_TOP = resource_filename(__name__, 'data/gromacs_ala10.top') GMX_DIR = resource_filename(__name__, 'data/gromacs/') GMX_TOP_BAD = resource_filename(__name__, 'data/bad_top.top') diff --git a/testsuite/MDAnalysisTests/topology/test_itp.py b/testsuite/MDAnalysisTests/topology/test_itp.py index b16fd38bb12..3f0607d8def 100644 --- a/testsuite/MDAnalysisTests/topology/test_itp.py +++ b/testsuite/MDAnalysisTests/topology/test_itp.py @@ -29,16 +29,18 @@ from MDAnalysisTests.topology.base import ParserBase from MDAnalysisTests.datafiles import ( ITP, # GROMACS itp - ITP_nomass, # from Automated Topology Builder + ITP_nomass, # from Automated Topology Builder + ITP_atomtypes, ITP_edited, ITP_tip5p, ITP_spce, GMX_TOP, GMX_DIR, GMX_TOP_BAD, - ITP_no_endif + ITP_no_endif, ) + class BaseITP(ParserBase): parser = mda.topology.ITPParser.ITPParser expected_attrs = ['ids', 'names', 'types', 'masses', @@ -55,7 +57,6 @@ class BaseITP(ParserBase): expected_n_dihedrals = 0 expected_n_impropers = 0 - @pytest.fixture def universe(self, filename): return mda.Universe(filename) @@ -84,7 +85,6 @@ class TestITP(BaseITP): expected_n_angles = 91 expected_n_dihedrals = 30 expected_n_impropers = 29 - def test_bonds_atom_counts(self, universe): assert len(universe.atoms[[0]].bonds) == 3 @@ -126,7 +126,6 @@ def test_dihedrals_values(self, top): def test_dihedrals_type(self, universe): assert universe.dihedrals[0].type == (1, 1) - def test_impropers_atom_counts(self, universe): assert len(universe.atoms[[0]].impropers) == 1 @@ -138,6 +137,7 @@ def test_impropers_values(self, top): def test_impropers_type(self, universe): assert universe.impropers[0].type == 2 + class TestITPNoMass(ParserBase): parser = mda.topology.ITPParser.ITPParser ref_filename = ITP_nomass @@ -159,6 +159,36 @@ def test_mass_guess(self, universe): assert universe.atoms[0].mass not in ('', None) +class TestITPAtomtypes(ParserBase): + parser = mda.topology.ITPParser.ITPParser + ref_filename = ITP_atomtypes + expected_attrs = ['ids', 'names', 'types', 'masses', + 'charges', 'chargegroups', + 'resids', 'resnames', + 'segids', 'moltypes', 'molnums', + 'bonds', 'angles', 'dihedrals', 'impropers'] + guessed_attrs = ['masses'] + expected_n_atoms = 4 + expected_n_residues = 1 + expected_n_segments = 1 + + @pytest.fixture + def universe(self, filename): + return mda.Universe(filename) + + def test_charge_parse(self, universe): + assert_almost_equal(universe.atoms[0].charge, 4) + assert_almost_equal(universe.atoms[1].charge, 1.1) + assert_almost_equal(universe.atoms[2].charge, -3.000) + assert_almost_equal(universe.atoms[3].charge, 1.) + + def test_mass_parse_or_guess(self, universe): + assert_almost_equal(universe.atoms[0].mass, 8.0) + assert_almost_equal(universe.atoms[1].mass, 20.98) + assert_almost_equal(universe.atoms[2].mass, 20.98) + assert_almost_equal(universe.atoms[3].mass, 1.008) + + class TestDifferentDirectivesITP(BaseITP): ref_filename = ITP_edited @@ -168,7 +198,6 @@ class TestDifferentDirectivesITP(BaseITP): expected_n_dihedrals = 28 expected_n_impropers = 29 - def test_no_extra_angles(self, top): for a in ((57, 59, 61), (60, 59, 61), (59, 61, 62)): assert a not in top.angles.values @@ -183,6 +212,7 @@ def test_dihedrals_atom_counts(self, universe): def test_dihedrals_identity(self, universe): assert universe.dihedrals[0].type == (1, 1) + class TestITPNoKeywords(BaseITP): """ Test reading ITP files *without* defined keywords. @@ -254,7 +284,6 @@ def test_defines(self, top): def test_kwargs_overrides_defines(self, top): assert_almost_equal(top.charges.values[2], 3) - class TestNestedIfs(BaseITP): """ @@ -282,6 +311,7 @@ def top(self, filename): def test_heavy_atom(self, universe): assert universe.atoms[5].mass > 40 + class TestReadTop(BaseITP): """ Test reading top files @@ -322,6 +352,7 @@ def test_sequential(self, universe): assert_equal(universe.residues.resindices, np.arange(self.expected_n_residues)) assert_equal(universe.atoms.chargegroups[-1], 63) + class TestErrors: parser = mda.topology.ITPParser.ITPParser From 3aebcb5726f75eac0b433640414de0b899f7e1fb Mon Sep 17 00:00:00 2001 From: Paarth Thadani Date: Wed, 7 Apr 2021 20:06:14 +0530 Subject: [PATCH 2/2] Removes deprecated sklearn arguments from encore (#3178) Fixes #2986 Co-authored-by: Lily Wang <31115101+lilyminium@users.noreply.github.com> --- package/AUTHORS | 1 + package/CHANGELOG | 6 +++-- .../encore/clustering/ClusteringMethod.py | 22 +++---------------- .../MDAnalysisTests/analysis/test_encore.py | 7 +++--- 4 files changed, 12 insertions(+), 24 deletions(-) diff --git a/package/AUTHORS b/package/AUTHORS index 012de8ce644..12318895610 100644 --- a/package/AUTHORS +++ b/package/AUTHORS @@ -161,6 +161,7 @@ Chronological list of authors - Dimitrios Papageorgiou - Hannah Pollak - Estefania Barreto-Ojeda + - Paarth Thadani External code ------------- diff --git a/package/CHANGELOG b/package/CHANGELOG index 76351c969e2..f2fd09f700a 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -17,13 +17,15 @@ The rules for this file: lilyminium, daveminh, jbarnoud, yuxuanzhuang, VOD555, ianmkenney, calcraven,xiki-tempula, mieczyslaw, manuel.nuno.melo, PicoCentauri, hanatok, rmeli, aditya-kamath, tirkarthi, LeonardoBarneschi, hejamu, - biogen98, orioncohen, z3y50n, hp115, ojeda-e + biogen98, orioncohen, z3y50n, hp115, ojeda-e, thadanipaarth * 2.0.0 Fixes + * Removed deprecated parameters `n_jobs` and `precompute_distances` of + sklearn.cluster.KMeans (Issue #2986) * Helix_analysis coverage raised to 100% and `from __future__ import` - removed (Issue #3209) + removed (Issue #3209) * Fixed 'sphzone', 'sphlayer', 'cyzone', and 'cylayer' to return empty if the zone/layer is empty, consistent with 'around' (Issue #2915) * A Universe created from an ROMol with no atoms returns now a Universe diff --git a/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py b/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py index 7e66458f264..b18d6a54350 100644 --- a/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py +++ b/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py @@ -195,6 +195,9 @@ def __init__(self, (default is 50). Parameter in the Affinity Propagation for clustering. + **kwargs : optional + Other keyword arguments are passed to :class:`sklearn.cluster.AffinityPropagation`. + """ self.ap = \ sklearn.cluster.AffinityPropagation( @@ -331,7 +334,6 @@ def __init__(self, verbose=False, random_state=None, copy_x=True, - n_jobs=1, **kwargs): """ Parameters @@ -360,14 +362,6 @@ def __init__(self, If a callable is passed, it should take arguments X, k and and a random state and return an initialization. - precompute_distances : {'auto', True, False} - Precompute distances (faster but takes more memory). - 'auto' : do not precompute distances if - n_samples * n_clusters > 12 million. This corresponds to about - 100MB overhead per job using double precision. - True : always precompute distances - False : never precompute distances - tol : float, optional (default 1e-4) The relative increment in the results before declaring convergence. @@ -388,25 +382,15 @@ def __init__(self, differences may be introduced by subtracting and then adding the data mean. - n_jobs : int - The number of jobs to use for the computation. This works by - computing each of the n_init runs in parallel. If -1 all CPUs - are used. If 1 is given, no parallel computing code is used at - all, which is useful for debugging. For n_jobs below -1, - (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs - but one are used. - """ self.kmeans = sklearn.cluster.KMeans(n_clusters=n_clusters, max_iter=max_iter, n_init=n_init, init=init, - precompute_distances='auto', tol=tol, verbose=verbose, random_state=random_state, copy_x=copy_x, - n_jobs=n_jobs, **kwargs) def __call__(self, coordinates): diff --git a/testsuite/MDAnalysisTests/analysis/test_encore.py b/testsuite/MDAnalysisTests/analysis/test_encore.py index d7d8f9f5654..b579c02bdbb 100644 --- a/testsuite/MDAnalysisTests/analysis/test_encore.py +++ b/testsuite/MDAnalysisTests/analysis/test_encore.py @@ -466,7 +466,7 @@ def test_clustering_AffinityPropagationNative_direct(self, ens1): def test_clustering_AffinityPropagation_direct(self, ens1): pytest.importorskip('sklearn') - method = encore.AffinityPropagation() + method = encore.AffinityPropagation(random_state=0) distance_matrix = encore.get_distance_matrix(ens1) cluster_assignment = method(distance_matrix) expected_value = 7 @@ -497,7 +497,8 @@ def test_clustering_two_different_methods(self, ens1): pytest.importorskip('sklearn') cluster_collection = encore.cluster( [ens1], - method=[encore.AffinityPropagation(preference=-7.5), + method=[encore.AffinityPropagation(preference=-7.5, + random_state=0), encore.DBSCAN(min_samples=2)]) assert len(cluster_collection[0]) == len(cluster_collection[1]), \ "Unexpected result: {0}".format(cluster_collection) @@ -523,7 +524,7 @@ def test_sklearn_affinity_propagation(self, ens1): pytest.importorskip('sklearn') cc1 = encore.cluster([ens1]) cc2 = encore.cluster([ens1], - method=encore.AffinityPropagation()) + method=encore.AffinityPropagation(random_state=0)) assert len(cc1) == len(cc2), \ "Native and sklearn implementations of affinity "\ "propagation don't agree: mismatch in number of "\