From 9fde9d1ef85190d7a2210d0a12e4126b477b35e4 Mon Sep 17 00:00:00 2001
From: Henrik Jaeger <info@jaegerhenrik.de>
Date: Wed, 7 Apr 2021 08:35:22 +0200
Subject: [PATCH 1/2] Parse atomtypes section of itp files (#3220)

Fixes #3199
---
 package/CHANGELOG                             |  2 +
 package/MDAnalysis/topology/ITPParser.py      | 82 +++++++++++++++----
 testsuite/MDAnalysisTests/data/atomtypes.itp  | 30 +++++++
 testsuite/MDAnalysisTests/datafiles.py        |  3 +
 .../MDAnalysisTests/topology/test_itp.py      | 45 ++++++++--
 5 files changed, 137 insertions(+), 25 deletions(-)
 create mode 100644 testsuite/MDAnalysisTests/data/atomtypes.itp

diff --git a/package/CHANGELOG b/package/CHANGELOG
index cd922bb8401..76351c969e2 100644
--- a/package/CHANGELOG
+++ b/package/CHANGELOG
@@ -91,6 +91,8 @@ Fixes
   * Fix syntax warning over comparison of literals using is (Issue #3066)
 
 Enhancements
+  * ITPParser now reads [ atomtypes ] sections in ITP files, used for charges
+    and masses not defined in the [ atoms ] sections
   * Add `set_dimensions` transformation class for setting constant 
     box dimensions for all timesteps in trajectory (Issue #2691)
   * Added a ValueError raised when not given a gridcenter while
diff --git a/package/MDAnalysis/topology/ITPParser.py b/package/MDAnalysis/topology/ITPParser.py
index e2962632781..952ee866d69 100644
--- a/package/MDAnalysis/topology/ITPParser.py
+++ b/package/MDAnalysis/topology/ITPParser.py
@@ -35,9 +35,12 @@
 1 molecule of each defined ``moleculetype``. 
 If a ``[ molecules ]`` section is present, ``infer_system`` is ignored.
 
-If files are included with the `#include` directive, they will also be read. If they are not in the working directory, ITPParser will look for them in the ``include_dir`` directory. By default, this is set to ``include_dir="/usr/local/gromacs/share/gromacs/top/"``.
-Variables can be defined with the `#define` directive in files, or by passing in 
-:ref:`keyword arguments <itp-define-kwargs>`.
+If files are included with the `#include` directive, they will also be read.
+If they are not in the working directory, ITPParser will look for them in the
+``include_dir`` directory. By default, this is set to
+``include_dir="/usr/local/gromacs/share/gromacs/top/"``.
+Variables can be defined with the `#define` directive in files, or by passing
+in :ref:`keyword arguments <itp-define-kwargs>`.
 
 Examples
 --------
@@ -151,11 +154,13 @@
 )
 from ..core.topology import Topology
 
+
 class Chargegroups(AtomAttr):
     """The charge group for each Atom"""
     attrname = 'chargegroups'
     singular = 'chargegroup'
 
+
 class GmxTopIterator:
     """
     Iterate over the lines of a TOP/ITP file and its included files
@@ -324,7 +329,7 @@ def parse_atoms(self, line):
         for lst in self.atom_order:
             try:
                 lst.append(values.pop(0))
-            except IndexError: # ran out of values
+            except IndexError:  # ran out of values
                 lst.append('')
     
     def parse_bonds(self, line):
@@ -333,7 +338,7 @@ def parse_bonds(self, line):
     
     def parse_angles(self, line):
         self.add_param(line, self.angles, n_funct=3, 
-                        funct_values=(1, 2, 3, 4, 5, 6, 8, 10))
+                       funct_values=(1, 2, 3, 4, 5, 6, 8, 10))
     
     def parse_dihedrals(self, line):
         dih = self.add_param(line, self.dihedrals, n_funct=4, 
@@ -374,7 +379,6 @@ def resolve_residue_attrs(self):
 
         self.resolved_residue_attrs = True
 
-
     def shift_indices(self, atomid=0, resid=0, molnum=0, cgnr=0, n_res=0, n_atoms=0):
         """
         Get attributes ready for adding onto a larger topology.
@@ -419,9 +423,6 @@ def shift_indices(self, atomid=0, resid=0, molnum=0, cgnr=0, n_res=0, n_atoms=0)
             new_params.append(new)
 
         return atom_order, new_params, molnums, self.moltypes, residx
-        
-        
-        
 
     def add_param(self, line, container, n_funct=2, funct_values=[]):
         """Add defined GROMACS directive lines, only if the funct is in ``funct_values``"""
@@ -444,7 +445,6 @@ def index_ids(self, values):
         return tuple(map(self.ids.index, values))
 
 
-
 class ITPParser(TopologyReaderBase):
     """Read topology information from a GROMACS ITP_ or TOP_ file.
 
@@ -465,7 +465,6 @@ class ITPParser(TopologyReaderBase):
     - dihedrals
     - impropers
 
-
     .. _ITP: http://manual.gromacs.org/current/reference-manual/topologies/topology-file-formats.html#molecule-itp-file
     .. _TOP: http://manual.gromacs.org/current/reference-manual/file-formats.html#top
     """
@@ -495,6 +494,7 @@ def parse(self, include_dir='/usr/local/gromacs/share/gromacs/top/',
         MDAnalysis *Topology* object
         """
 
+        self.atomtypes = {}
         self.molecules = {}
         self._molecules = []  # for order
         self.current_mol = None
@@ -508,7 +508,10 @@ def parse(self, include_dir='/usr/local/gromacs/share/gromacs/top/',
                 if '[' in line and ']' in line:
                     section = line.split('[')[1].split(']')[0].strip()
 
-                    if section == 'moleculetype':
+                    if section == 'atomtypes':
+                        self.parser = self.parse_atomtypes
+
+                    elif section == 'moleculetype':
                         self.parser = self.parse_moleculetype
 
                     elif section == 'molecules':
@@ -527,7 +530,33 @@ def parse(self, include_dir='/usr/local/gromacs/share/gromacs/top/',
             self.system_molecules = [x.name for x in self._molecules]
 
         self.build_system()
-        
+
+        self.types = np.array(self.types)
+        self.charges = np.array(self.charges)
+        self.masses = np.array(self.masses)
+
+        if not all(self.charges):
+            empty = self.charges == ''
+            self.charges[empty] = [
+                (
+                    self.atomtypes.get(x)["charge"]
+                    if x in self.atomtypes.keys()
+                    else ''
+                )
+                for x in self.types[empty]
+            ]
+
+        if not all(self.masses):
+            empty = self.masses == ''
+            self.masses[empty] = [
+                (
+                    self.atomtypes.get(x)["mass"]
+                    if x in self.atomtypes.keys()
+                    else ''
+                )
+                for x in self.types[empty]
+            ]
+
         attrs = []
         # atom stuff
         for vals, Attr, dtype in (
@@ -539,12 +568,16 @@ def parse(self, include_dir='/usr/local/gromacs/share/gromacs/top/',
         ):
             if all(vals):
                 attrs.append(Attr(np.array(vals, dtype=dtype)))
-        
+
         if not all(self.masses):
-            masses = guessers.guess_masses(self.types)
-            attrs.append(Masses(masses, guessed=True))
+            empty = self.masses == ''
+            self.masses[empty] = guessers.guess_masses(
+                guessers.guess_types(self.types)[empty])
+            attrs.append(Masses(np.array(self.masses, dtype=np.float64),
+                                guessed=True))
         else:
-            attrs.append(Masses(np.array(self.masses, dtype=np.float64)))
+            attrs.append(Masses(np.array(self.masses, dtype=np.float64),
+                                guessed=False))
 
         # residue stuff
         resids = np.array(self.resids, dtype=np.int32)
@@ -586,10 +619,23 @@ def parse(self, include_dir='/usr/local/gromacs/share/gromacs/top/',
 
         return top
 
-
     def _pass(self, line):
         pass
 
+    def parse_atomtypes(self, line):
+        keys = ['type_bonded', 'atomic_number', 'mass', 'charge', 'p_type']
+        fields = line.split()
+        if len(fields[5]) == 1 and fields[5].isalpha():
+            values = fields[1:6]
+        elif len(fields[3]) == 1 and fields[3].isalpha():
+            values = '', '', fields[1], fields[2], fields[3]
+        elif len(fields[4]) == 1 and fields[4].isalpha():
+            if fields[1][0].isalpha():
+                values = fields[1], '', fields[2], fields[3], fields[4]
+            else:
+                values = '', fields[1], fields[2], fields[3], fields[4]
+        self.atomtypes[fields[0]] = dict(zip(keys, values))
+
     def parse_moleculetype(self, line):
         name = line.split()[0]
         self.current_mol = self.molecules[name] = Molecule(name)
diff --git a/testsuite/MDAnalysisTests/data/atomtypes.itp b/testsuite/MDAnalysisTests/data/atomtypes.itp
new file mode 100644
index 00000000000..301ac1d9e06
--- /dev/null
+++ b/testsuite/MDAnalysisTests/data/atomtypes.itp
@@ -0,0 +1,30 @@
+[ defaults ]
+; nbfunc        comb-rule       gen-pairs       fudgeLJ fudgeQQ
+  1             2               no              1.0     1.0
+
+
+[ atomtypes ]
+;All possible variations of atomtypes lines
+;type   b_type  at.num  mass    charge      ptype   sigma           epsilon
+A                       10.086  2.000       A       0.356359487256  0.209200
+B       BB      13      20.989  -3.000      A       0.349832094823  0.202300
+C               15      25.935  1.000       A       0.320190238903  0.206840
+D       BD              12.129  -1.000      A       0.310128943840  0.201950
+
+
+[ moleculetype ]
+; molname    nrexcl
+TEST    1
+
+[ atoms ]
+;nr      type  resnr residue  atom   cgnr    charge    mass
+1        A     1     SURF      A      1      4.000     8.000;
+2        B     1     SURF      B      2      1.1     ;
+3        B     1     SURF      B      3      ;
+4        H     1     SURF      H      4      1.000;
+
+[ system ]
+Testing
+
+[ molecules ]
+TEST     1
diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py
index 2d443df3b0f..d647f435169 100644
--- a/testsuite/MDAnalysisTests/datafiles.py
+++ b/testsuite/MDAnalysisTests/datafiles.py
@@ -187,6 +187,7 @@
     "GRO_huge_box", # for testing gro parser with hige box sizes
     "ITP", # for GROMACS generated itps
     "ITP_nomass", # for ATB generated itps
+    "ITP_atomtypes",  # atom definitions to check atomtyes section parsing
     "NAMDBIN", # for NAMD generated binary file
     "ITP_edited", # to check different directives are read properly
     "ITP_tip5p", # tip5p water from opls-aa, edited with additional keywords
@@ -542,9 +543,11 @@
 
 ITP = resource_filename(__name__, 'data/gromacs_ala10.itp')
 ITP_nomass = resource_filename(__name__, 'data/itp_nomass.itp')
+ITP_atomtypes = resource_filename(__name__, 'data/atomtypes.itp')
 ITP_edited = resource_filename(__name__, 'data/edited_itp.itp')
 ITP_tip5p = resource_filename(__name__, "data/tip5p.itp")
 ITP_spce = resource_filename(__name__, 'data/spce.itp')
+
 GMX_TOP = resource_filename(__name__, 'data/gromacs_ala10.top')
 GMX_DIR = resource_filename(__name__, 'data/gromacs/')
 GMX_TOP_BAD = resource_filename(__name__, 'data/bad_top.top')
diff --git a/testsuite/MDAnalysisTests/topology/test_itp.py b/testsuite/MDAnalysisTests/topology/test_itp.py
index b16fd38bb12..3f0607d8def 100644
--- a/testsuite/MDAnalysisTests/topology/test_itp.py
+++ b/testsuite/MDAnalysisTests/topology/test_itp.py
@@ -29,16 +29,18 @@
 from MDAnalysisTests.topology.base import ParserBase
 from MDAnalysisTests.datafiles import (
     ITP,  # GROMACS itp
-    ITP_nomass, # from Automated Topology Builder
+    ITP_nomass,  # from Automated Topology Builder
+    ITP_atomtypes,
     ITP_edited,
     ITP_tip5p,
     ITP_spce,
     GMX_TOP,
     GMX_DIR,
     GMX_TOP_BAD,
-    ITP_no_endif
+    ITP_no_endif,
 )
 
+
 class BaseITP(ParserBase):
     parser = mda.topology.ITPParser.ITPParser
     expected_attrs = ['ids', 'names', 'types', 'masses',
@@ -55,7 +57,6 @@ class BaseITP(ParserBase):
     expected_n_dihedrals = 0
     expected_n_impropers = 0
 
-
     @pytest.fixture
     def universe(self, filename):
         return mda.Universe(filename)
@@ -84,7 +85,6 @@ class TestITP(BaseITP):
     expected_n_angles = 91
     expected_n_dihedrals = 30
     expected_n_impropers = 29
-
     
     def test_bonds_atom_counts(self, universe):
         assert len(universe.atoms[[0]].bonds) == 3
@@ -126,7 +126,6 @@ def test_dihedrals_values(self, top):
     def test_dihedrals_type(self, universe):
         assert universe.dihedrals[0].type == (1, 1)
 
-
     def test_impropers_atom_counts(self, universe):
         assert len(universe.atoms[[0]].impropers) == 1
 
@@ -138,6 +137,7 @@ def test_impropers_values(self, top):
     def test_impropers_type(self, universe):
         assert universe.impropers[0].type == 2
 
+
 class TestITPNoMass(ParserBase):
     parser = mda.topology.ITPParser.ITPParser
     ref_filename = ITP_nomass
@@ -159,6 +159,36 @@ def test_mass_guess(self, universe):
         assert universe.atoms[0].mass not in ('', None)
 
 
+class TestITPAtomtypes(ParserBase):
+    parser = mda.topology.ITPParser.ITPParser
+    ref_filename = ITP_atomtypes
+    expected_attrs = ['ids', 'names', 'types', 'masses',
+                      'charges', 'chargegroups',
+                      'resids', 'resnames',
+                      'segids', 'moltypes', 'molnums',
+                      'bonds', 'angles', 'dihedrals', 'impropers']
+    guessed_attrs = ['masses']
+    expected_n_atoms = 4
+    expected_n_residues = 1
+    expected_n_segments = 1
+
+    @pytest.fixture
+    def universe(self, filename):
+        return mda.Universe(filename)
+
+    def test_charge_parse(self, universe):
+        assert_almost_equal(universe.atoms[0].charge, 4)
+        assert_almost_equal(universe.atoms[1].charge, 1.1)
+        assert_almost_equal(universe.atoms[2].charge, -3.000)
+        assert_almost_equal(universe.atoms[3].charge, 1.)
+
+    def test_mass_parse_or_guess(self, universe):
+        assert_almost_equal(universe.atoms[0].mass, 8.0)
+        assert_almost_equal(universe.atoms[1].mass, 20.98)
+        assert_almost_equal(universe.atoms[2].mass, 20.98)
+        assert_almost_equal(universe.atoms[3].mass, 1.008)
+
+
 class TestDifferentDirectivesITP(BaseITP):
 
     ref_filename = ITP_edited
@@ -168,7 +198,6 @@ class TestDifferentDirectivesITP(BaseITP):
     expected_n_dihedrals = 28
     expected_n_impropers = 29
 
-
     def test_no_extra_angles(self, top):
         for a in ((57, 59, 61), (60, 59, 61), (59, 61, 62)):
             assert a not in top.angles.values
@@ -183,6 +212,7 @@ def test_dihedrals_atom_counts(self, universe):
     def test_dihedrals_identity(self, universe):
         assert universe.dihedrals[0].type == (1, 1)
 
+
 class TestITPNoKeywords(BaseITP):
     """
     Test reading ITP files *without* defined keywords.
@@ -254,7 +284,6 @@ def test_defines(self, top):
     def test_kwargs_overrides_defines(self, top):
         assert_almost_equal(top.charges.values[2], 3)
 
-    
 
 class TestNestedIfs(BaseITP):
     """
@@ -282,6 +311,7 @@ def top(self, filename):
     def test_heavy_atom(self, universe):
         assert universe.atoms[5].mass > 40
 
+
 class TestReadTop(BaseITP):
     """
     Test reading top files
@@ -322,6 +352,7 @@ def test_sequential(self, universe):
         assert_equal(universe.residues.resindices, np.arange(self.expected_n_residues))
         assert_equal(universe.atoms.chargegroups[-1], 63)
 
+
 class TestErrors:
 
     parser = mda.topology.ITPParser.ITPParser

From 3aebcb5726f75eac0b433640414de0b899f7e1fb Mon Sep 17 00:00:00 2001
From: Paarth Thadani <paarth.thadani15@gmail.com>
Date: Wed, 7 Apr 2021 20:06:14 +0530
Subject: [PATCH 2/2] Removes deprecated sklearn arguments from encore (#3178)

Fixes #2986

Co-authored-by: Lily Wang <31115101+lilyminium@users.noreply.github.com>
---
 package/AUTHORS                               |  1 +
 package/CHANGELOG                             |  6 +++--
 .../encore/clustering/ClusteringMethod.py     | 22 +++----------------
 .../MDAnalysisTests/analysis/test_encore.py   |  7 +++---
 4 files changed, 12 insertions(+), 24 deletions(-)

diff --git a/package/AUTHORS b/package/AUTHORS
index 012de8ce644..12318895610 100644
--- a/package/AUTHORS
+++ b/package/AUTHORS
@@ -161,6 +161,7 @@ Chronological list of authors
   - Dimitrios Papageorgiou
   - Hannah Pollak
   - Estefania Barreto-Ojeda
+  - Paarth Thadani
 
 External code
 -------------
diff --git a/package/CHANGELOG b/package/CHANGELOG
index 76351c969e2..f2fd09f700a 100644
--- a/package/CHANGELOG
+++ b/package/CHANGELOG
@@ -17,13 +17,15 @@ The rules for this file:
          lilyminium, daveminh, jbarnoud, yuxuanzhuang, VOD555, ianmkenney,
          calcraven，xiki-tempula, mieczyslaw, manuel.nuno.melo, PicoCentauri,
          hanatok, rmeli, aditya-kamath, tirkarthi, LeonardoBarneschi, hejamu,
-         biogen98, orioncohen, z3y50n, hp115, ojeda-e
+         biogen98, orioncohen, z3y50n, hp115, ojeda-e, thadanipaarth
 
   * 2.0.0
 
 Fixes
+  * Removed deprecated parameters `n_jobs` and `precompute_distances` of
+    sklearn.cluster.KMeans (Issue #2986)
   * Helix_analysis coverage raised to 100% and `from __future__ import`
-    removed (Issue #3209) 
+    removed (Issue #3209)
   * Fixed 'sphzone', 'sphlayer', 'cyzone', and 'cylayer' to return empty if the
    zone/layer is empty, consistent with 'around' (Issue #2915)
   * A Universe created from an ROMol with no atoms returns now a Universe 
diff --git a/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py b/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py
index 7e66458f264..b18d6a54350 100644
--- a/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py
+++ b/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py
@@ -195,6 +195,9 @@ def __init__(self,
                 (default is 50). Parameter in the Affinity Propagation for
                 clustering.
 
+            **kwargs : optional
+                Other keyword arguments are passed to :class:`sklearn.cluster.AffinityPropagation`.
+
             """
             self.ap = \
                 sklearn.cluster.AffinityPropagation(
@@ -331,7 +334,6 @@ def __init__(self,
                      verbose=False,
                      random_state=None,
                      copy_x=True,
-                     n_jobs=1,
                      **kwargs):
             """
             Parameters
@@ -360,14 +362,6 @@ def __init__(self,
                 If a callable is passed, it should take arguments X, k and
                 and a random state and return an initialization.
 
-            precompute_distances : {'auto', True, False}
-                Precompute distances (faster but takes more memory).
-                'auto' : do not precompute distances if
-                n_samples * n_clusters > 12 million. This corresponds to about
-                100MB overhead per job using double precision.
-                True : always precompute distances
-                False : never precompute distances
-
             tol : float, optional (default 1e-4)
                 The relative increment in the results before declaring
                 convergence.
@@ -388,25 +382,15 @@ def __init__(self,
                 differences may be introduced by subtracting and then adding
                 the data mean.
 
-            n_jobs : int
-                The number of jobs to use for the computation. This works by
-                computing each of the n_init runs in parallel. If -1 all CPUs
-                are used. If 1 is given, no parallel computing code is used at
-                all, which is useful for debugging. For n_jobs below -1,
-                (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs
-                but one are used.
-
             """
             self.kmeans = sklearn.cluster.KMeans(n_clusters=n_clusters,
                                                  max_iter=max_iter,
                                                  n_init=n_init,
                                                  init=init,
-                                                 precompute_distances='auto',
                                                  tol=tol,
                                                  verbose=verbose,
                                                  random_state=random_state,
                                                  copy_x=copy_x,
-                                                 n_jobs=n_jobs,
                                                  **kwargs)
 
         def __call__(self, coordinates):
diff --git a/testsuite/MDAnalysisTests/analysis/test_encore.py b/testsuite/MDAnalysisTests/analysis/test_encore.py
index d7d8f9f5654..b579c02bdbb 100644
--- a/testsuite/MDAnalysisTests/analysis/test_encore.py
+++ b/testsuite/MDAnalysisTests/analysis/test_encore.py
@@ -466,7 +466,7 @@ def test_clustering_AffinityPropagationNative_direct(self, ens1):
 
     def test_clustering_AffinityPropagation_direct(self, ens1):
         pytest.importorskip('sklearn')
-        method = encore.AffinityPropagation()
+        method = encore.AffinityPropagation(random_state=0)
         distance_matrix = encore.get_distance_matrix(ens1)
         cluster_assignment = method(distance_matrix)
         expected_value = 7
@@ -497,7 +497,8 @@ def test_clustering_two_different_methods(self, ens1):
         pytest.importorskip('sklearn')
         cluster_collection = encore.cluster(
             [ens1],
-            method=[encore.AffinityPropagation(preference=-7.5),
+            method=[encore.AffinityPropagation(preference=-7.5,
+                    random_state=0),
                     encore.DBSCAN(min_samples=2)])
         assert len(cluster_collection[0]) == len(cluster_collection[1]), \
                      "Unexpected result: {0}".format(cluster_collection)
@@ -523,7 +524,7 @@ def test_sklearn_affinity_propagation(self, ens1):
         pytest.importorskip('sklearn')
         cc1 = encore.cluster([ens1])
         cc2 = encore.cluster([ens1],
-                             method=encore.AffinityPropagation())
+                             method=encore.AffinityPropagation(random_state=0))
         assert len(cc1) == len(cc2), \
                      "Native and sklearn implementations of affinity "\
                               "propagation don't agree: mismatch in number of "\