Merge branch 'develop' into get_connections_develop

MDAnalysis · Apr 7, 2021 · 967ebf7 · 967ebf7
2 parents 759e9ee + 3aebcb5
commit 967ebf7
Show file tree

Hide file tree

Showing 8 changed files with 149 additions and 49 deletions.
diff --git a/package/AUTHORS b/package/AUTHORS
@@ -161,6 +161,7 @@ Chronological list of authors
   - Dimitrios Papageorgiou
   - Hannah Pollak
   - Estefania Barreto-Ojeda
+  - Paarth Thadani
 
 External code
 -------------

diff --git a/package/CHANGELOG b/package/CHANGELOG
@@ -17,13 +17,15 @@ The rules for this file:
          lilyminium, daveminh, jbarnoud, yuxuanzhuang, VOD555, ianmkenney,
          calcraven，xiki-tempula, mieczyslaw, manuel.nuno.melo, PicoCentauri,
          hanatok, rmeli, aditya-kamath, tirkarthi, LeonardoBarneschi, hejamu,
-         biogen98, orioncohen, z3y50n, hp115, ojeda-e
+         biogen98, orioncohen, z3y50n, hp115, ojeda-e, thadanipaarth
 
   * 2.0.0
 
 Fixes
+  * Removed deprecated parameters `n_jobs` and `precompute_distances` of
+    sklearn.cluster.KMeans (Issue #2986)
   * Helix_analysis coverage raised to 100% and `from __future__ import`
-    removed (Issue #3209) 
+    removed (Issue #3209)
   * Fixed 'sphzone', 'sphlayer', 'cyzone', and 'cylayer' to return empty if the
    zone/layer is empty, consistent with 'around' (Issue #2915)
   * A Universe created from an ROMol with no atoms returns now a Universe 
@@ -94,6 +96,8 @@ Enhancements
   * Added intra_bonds, intra_angles, intra_dihedrals etc. to return only
     the connections involving atoms within the AtomGroup, instead of
     including atoms outside the AtomGroup (Issue #1264, #2821, PR #3200)
+  * ITPParser now reads [ atomtypes ] sections in ITP files, used for charges
+    and masses not defined in the [ atoms ] sections
   * Add `set_dimensions` transformation class for setting constant 
     box dimensions for all timesteps in trajectory (Issue #2691)
   * Added a ValueError raised when not given a gridcenter while

diff --git a/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py b/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py
@@ -195,6 +195,9 @@ def __init__(self,
                 (default is 50). Parameter in the Affinity Propagation for
                 clustering.
 
+            **kwargs : optional
+                Other keyword arguments are passed to :class:`sklearn.cluster.AffinityPropagation`.
+
             """
             self.ap = \
                 sklearn.cluster.AffinityPropagation(
@@ -331,7 +334,6 @@ def __init__(self,
                      verbose=False,
                      random_state=None,
                      copy_x=True,
-                     n_jobs=1,
                      **kwargs):
             """
             Parameters
@@ -360,14 +362,6 @@ def __init__(self,
                 If a callable is passed, it should take arguments X, k and
                 and a random state and return an initialization.
 
-            precompute_distances : {'auto', True, False}
-                Precompute distances (faster but takes more memory).
-                'auto' : do not precompute distances if
-                n_samples * n_clusters > 12 million. This corresponds to about
-                100MB overhead per job using double precision.
-                True : always precompute distances
-                False : never precompute distances
-
             tol : float, optional (default 1e-4)
                 The relative increment in the results before declaring
                 convergence.
@@ -388,25 +382,15 @@ def __init__(self,
                 differences may be introduced by subtracting and then adding
                 the data mean.
 
-            n_jobs : int
-                The number of jobs to use for the computation. This works by
-                computing each of the n_init runs in parallel. If -1 all CPUs
-                are used. If 1 is given, no parallel computing code is used at
-                all, which is useful for debugging. For n_jobs below -1,
-                (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs
-                but one are used.
-
             """
             self.kmeans = sklearn.cluster.KMeans(n_clusters=n_clusters,
                                                  max_iter=max_iter,
                                                  n_init=n_init,
                                                  init=init,
-                                                 precompute_distances='auto',
                                                  tol=tol,
                                                  verbose=verbose,
                                                  random_state=random_state,
                                                  copy_x=copy_x,
-                                                 n_jobs=n_jobs,
                                                  **kwargs)
 
         def __call__(self, coordinates):

diff --git a/package/MDAnalysis/topology/ITPParser.py b/package/MDAnalysis/topology/ITPParser.py
@@ -35,9 +35,12 @@
 1 molecule of each defined ``moleculetype``. 
 If a ``[ molecules ]`` section is present, ``infer_system`` is ignored.
 
-If files are included with the `#include` directive, they will also be read. If they are not in the working directory, ITPParser will look for them in the ``include_dir`` directory. By default, this is set to ``include_dir="/usr/local/gromacs/share/gromacs/top/"``.
-Variables can be defined with the `#define` directive in files, or by passing in 
-:ref:`keyword arguments <itp-define-kwargs>`.
+If files are included with the `#include` directive, they will also be read.
+If they are not in the working directory, ITPParser will look for them in the
+``include_dir`` directory. By default, this is set to
+``include_dir="/usr/local/gromacs/share/gromacs/top/"``.
+Variables can be defined with the `#define` directive in files, or by passing
+in :ref:`keyword arguments <itp-define-kwargs>`.
 
 Examples
 --------
@@ -151,11 +154,13 @@
 )
 from ..core.topology import Topology
 
+
 class Chargegroups(AtomAttr):
     """The charge group for each Atom"""
     attrname = 'chargegroups'
     singular = 'chargegroup'
 
+
 class GmxTopIterator:
     """
     Iterate over the lines of a TOP/ITP file and its included files
@@ -324,7 +329,7 @@ def parse_atoms(self, line):
         for lst in self.atom_order:
             try:
                 lst.append(values.pop(0))
-            except IndexError: # ran out of values
+            except IndexError:  # ran out of values
                 lst.append('')
 
     def parse_bonds(self, line):
@@ -333,7 +338,7 @@ def parse_bonds(self, line):
 
     def parse_angles(self, line):
         self.add_param(line, self.angles, n_funct=3, 
-                        funct_values=(1, 2, 3, 4, 5, 6, 8, 10))
+                       funct_values=(1, 2, 3, 4, 5, 6, 8, 10))
 
     def parse_dihedrals(self, line):
         dih = self.add_param(line, self.dihedrals, n_funct=4, 
@@ -374,7 +379,6 @@ def resolve_residue_attrs(self):
 
         self.resolved_residue_attrs = True
 
-
     def shift_indices(self, atomid=0, resid=0, molnum=0, cgnr=0, n_res=0, n_atoms=0):
         """
         Get attributes ready for adding onto a larger topology.
@@ -419,9 +423,6 @@ def shift_indices(self, atomid=0, resid=0, molnum=0, cgnr=0, n_res=0, n_atoms=0)
             new_params.append(new)
 
         return atom_order, new_params, molnums, self.moltypes, residx
-
-
-
 
     def add_param(self, line, container, n_funct=2, funct_values=[]):
         """Add defined GROMACS directive lines, only if the funct is in ``funct_values``"""
@@ -444,7 +445,6 @@ def index_ids(self, values):
         return tuple(map(self.ids.index, values))
 
 
-
 class ITPParser(TopologyReaderBase):
     """Read topology information from a GROMACS ITP_ or TOP_ file.
 
@@ -465,7 +465,6 @@ class ITPParser(TopologyReaderBase):
     - dihedrals
     - impropers
 
-
     .. _ITP: http://manual.gromacs.org/current/reference-manual/topologies/topology-file-formats.html#molecule-itp-file
     .. _TOP: http://manual.gromacs.org/current/reference-manual/file-formats.html#top
     """
@@ -495,6 +494,7 @@ def parse(self, include_dir='/usr/local/gromacs/share/gromacs/top/',
         MDAnalysis *Topology* object
         """
 
+        self.atomtypes = {}
         self.molecules = {}
         self._molecules = []  # for order
         self.current_mol = None
@@ -508,7 +508,10 @@ def parse(self, include_dir='/usr/local/gromacs/share/gromacs/top/',
                 if '[' in line and ']' in line:
                     section = line.split('[')[1].split(']')[0].strip()
 
-                    if section == 'moleculetype':
+                    if section == 'atomtypes':
+                        self.parser = self.parse_atomtypes
+
+                    elif section == 'moleculetype':
                         self.parser = self.parse_moleculetype
 
                     elif section == 'molecules':
@@ -527,7 +530,33 @@ def parse(self, include_dir='/usr/local/gromacs/share/gromacs/top/',
             self.system_molecules = [x.name for x in self._molecules]
 
         self.build_system()
-
+
+        self.types = np.array(self.types)
+        self.charges = np.array(self.charges)
+        self.masses = np.array(self.masses)
+
+        if not all(self.charges):
+            empty = self.charges == ''
+            self.charges[empty] = [
+                (
+                    self.atomtypes.get(x)["charge"]
+                    if x in self.atomtypes.keys()
+                    else ''
+                )
+                for x in self.types[empty]
+            ]
+
+        if not all(self.masses):
+            empty = self.masses == ''
+            self.masses[empty] = [
+                (
+                    self.atomtypes.get(x)["mass"]
+                    if x in self.atomtypes.keys()
+                    else ''
+                )
+                for x in self.types[empty]
+            ]
+
         attrs = []
         # atom stuff
         for vals, Attr, dtype in (
@@ -539,12 +568,16 @@ def parse(self, include_dir='/usr/local/gromacs/share/gromacs/top/',
         ):
             if all(vals):
                 attrs.append(Attr(np.array(vals, dtype=dtype)))
-        
+
         if not all(self.masses):
-            masses = guessers.guess_masses(self.types)
-            attrs.append(Masses(masses, guessed=True))
+            empty = self.masses == ''
+            self.masses[empty] = guessers.guess_masses(
+                guessers.guess_types(self.types)[empty])
+            attrs.append(Masses(np.array(self.masses, dtype=np.float64),
+                                guessed=True))
         else:
-            attrs.append(Masses(np.array(self.masses, dtype=np.float64)))
+            attrs.append(Masses(np.array(self.masses, dtype=np.float64),
+                                guessed=False))
 
         # residue stuff
         resids = np.array(self.resids, dtype=np.int32)
@@ -586,10 +619,23 @@ def parse(self, include_dir='/usr/local/gromacs/share/gromacs/top/',
 
         return top
 
-
     def _pass(self, line):
         pass
 
+    def parse_atomtypes(self, line):
+        keys = ['type_bonded', 'atomic_number', 'mass', 'charge', 'p_type']
+        fields = line.split()
+        if len(fields[5]) == 1 and fields[5].isalpha():
+            values = fields[1:6]
+        elif len(fields[3]) == 1 and fields[3].isalpha():
+            values = '', '', fields[1], fields[2], fields[3]
+        elif len(fields[4]) == 1 and fields[4].isalpha():
+            if fields[1][0].isalpha():
+                values = fields[1], '', fields[2], fields[3], fields[4]
+            else:
+                values = '', fields[1], fields[2], fields[3], fields[4]
+        self.atomtypes[fields[0]] = dict(zip(keys, values))
+
     def parse_moleculetype(self, line):
         name = line.split()[0]
         self.current_mol = self.molecules[name] = Molecule(name)

diff --git a/testsuite/MDAnalysisTests/analysis/test_encore.py b/testsuite/MDAnalysisTests/analysis/test_encore.py
@@ -466,7 +466,7 @@ def test_clustering_AffinityPropagationNative_direct(self, ens1):
 
     def test_clustering_AffinityPropagation_direct(self, ens1):
         pytest.importorskip('sklearn')
-        method = encore.AffinityPropagation()
+        method = encore.AffinityPropagation(random_state=0)
         distance_matrix = encore.get_distance_matrix(ens1)
         cluster_assignment = method(distance_matrix)
         expected_value = 7
@@ -497,7 +497,8 @@ def test_clustering_two_different_methods(self, ens1):
         pytest.importorskip('sklearn')
         cluster_collection = encore.cluster(
             [ens1],
-            method=[encore.AffinityPropagation(preference=-7.5),
+            method=[encore.AffinityPropagation(preference=-7.5,
+                    random_state=0),
                     encore.DBSCAN(min_samples=2)])
         assert len(cluster_collection[0]) == len(cluster_collection[1]), \
                      "Unexpected result: {0}".format(cluster_collection)
@@ -523,7 +524,7 @@ def test_sklearn_affinity_propagation(self, ens1):
         pytest.importorskip('sklearn')
         cc1 = encore.cluster([ens1])
         cc2 = encore.cluster([ens1],
-                             method=encore.AffinityPropagation())
+                             method=encore.AffinityPropagation(random_state=0))
         assert len(cc1) == len(cc2), \
                      "Native and sklearn implementations of affinity "\
                               "propagation don't agree: mismatch in number of "\

diff --git a/testsuite/MDAnalysisTests/data/atomtypes.itp b/testsuite/MDAnalysisTests/data/atomtypes.itp
@@ -0,0 +1,30 @@
+[ defaults ]
+; nbfunc        comb-rule       gen-pairs       fudgeLJ fudgeQQ
+  1             2               no              1.0     1.0
+
+
+[ atomtypes ]
+;All possible variations of atomtypes lines
+;type   b_type  at.num  mass    charge      ptype   sigma           epsilon
+A                       10.086  2.000       A       0.356359487256  0.209200
+B       BB      13      20.989  -3.000      A       0.349832094823  0.202300
+C               15      25.935  1.000       A       0.320190238903  0.206840
+D       BD              12.129  -1.000      A       0.310128943840  0.201950
+
+
+[ moleculetype ]
+; molname    nrexcl
+TEST    1
+
+[ atoms ]
+;nr      type  resnr residue  atom   cgnr    charge    mass
+1        A     1     SURF      A      1      4.000     8.000;
+2        B     1     SURF      B      2      1.1     ;
+3        B     1     SURF      B      3      ;
+4        H     1     SURF      H      4      1.000;
+
+[ system ]
+Testing
+
+[ molecules ]
+TEST     1
diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py
@@ -187,6 +187,7 @@
     "GRO_huge_box", # for testing gro parser with hige box sizes
     "ITP", # for GROMACS generated itps
     "ITP_nomass", # for ATB generated itps
+    "ITP_atomtypes",  # atom definitions to check atomtyes section parsing
     "NAMDBIN", # for NAMD generated binary file
     "ITP_edited", # to check different directives are read properly
     "ITP_tip5p", # tip5p water from opls-aa, edited with additional keywords
@@ -542,9 +543,11 @@
 
 ITP = resource_filename(__name__, 'data/gromacs_ala10.itp')
 ITP_nomass = resource_filename(__name__, 'data/itp_nomass.itp')
+ITP_atomtypes = resource_filename(__name__, 'data/atomtypes.itp')
 ITP_edited = resource_filename(__name__, 'data/edited_itp.itp')
 ITP_tip5p = resource_filename(__name__, "data/tip5p.itp")
 ITP_spce = resource_filename(__name__, 'data/spce.itp')
+
 GMX_TOP = resource_filename(__name__, 'data/gromacs_ala10.top')
 GMX_DIR = resource_filename(__name__, 'data/gromacs/')
 GMX_TOP_BAD = resource_filename(__name__, 'data/bad_top.top')