Skip to content

Commit

Permalink
RDKitReader and RDKitParser (#2707)
Browse files Browse the repository at this point in the history
  • Loading branch information
Cédric Bouysset authored Jun 19, 2020
1 parent 788f294 commit 50cd6e7
Show file tree
Hide file tree
Showing 18 changed files with 990 additions and 4 deletions.
2 changes: 1 addition & 1 deletion .appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ cache:
environment:
global:
CONDA_CHANNELS: conda-forge
CONDA_DEPENDENCIES: pip setuptools wheel cython mock six biopython networkx joblib matplotlib scipy vs2015_runtime pytest mmtf-python GridDataFormats hypothesis pytest-cov codecov chemfiles tqdm tidynamics>=1.0.0
CONDA_DEPENDENCIES: pip setuptools wheel cython mock six biopython networkx joblib matplotlib scipy vs2015_runtime pytest mmtf-python GridDataFormats hypothesis pytest-cov codecov chemfiles tqdm tidynamics>=1.0.0 rdkit
PIP_DEPENDENCIES: gsd==1.9.3 duecredit parmed
DEBUG: "False"
MINGW_64: C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin
Expand Down
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ env:
- SETUP_CMD="${PYTEST_FLAGS}"
- BUILD_CMD="pip install -e package/ && (cd testsuite/ && python setup.py build)"
- CONDA_MIN_DEPENDENCIES="mmtf-python mock six biopython networkx cython matplotlib scipy griddataformats hypothesis gsd codecov"
- CONDA_DEPENDENCIES="${CONDA_MIN_DEPENDENCIES} seaborn>=0.7.0 clustalw=2.1 netcdf4 scikit-learn joblib>=0.12 chemfiles tqdm>=4.43.0 tidynamics>=1.0.0"
- CONDA_DEPENDENCIES="${CONDA_MIN_DEPENDENCIES} seaborn>=0.7.0 clustalw=2.1 netcdf4 scikit-learn joblib>=0.12 chemfiles tqdm>=4.43.0 tidynamics>=1.0.0 rdkit"
- CONDA_CHANNELS='biobuilds conda-forge'
- CONDA_CHANNEL_PRIORITY=True
- PIP_DEPENDENCIES="duecredit parmed"
Expand Down
8 changes: 7 additions & 1 deletion package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ The rules for this file:
* release numbers follow "Semantic Versioning" http://semver.org

------------------------------------------------------------------------------
??/??/?? richardjgowers, IAlibay, hmacdope, orbeckst
??/??/?? richardjgowers, IAlibay, hmacdope, orbeckst, cbouy

* 2.0.0

Expand All @@ -23,6 +23,12 @@ Fixes
* Testsuite does not any more matplotlib.use('agg') (#2191)

Enhancements
* Added the RDKitParser which creates a `core.topology.Topology` object from
an `rdkit.Chem.rdchem.Mol` object, and the RDKitReader to read coordinates
from RDKit conformers (Issue #2468, PR #2707)
* Added the `Aromaticities` topology attribute, and the `aromatic` selection
token (Issue #2468, PR #2707)
* Added the `from_smiles` classmethod to the Universe (Issue #2468, PR #2707)
* Added computation of Mean Squared Displacements (#2438, PR #2619)

Changes
Expand Down
104 changes: 104 additions & 0 deletions package/MDAnalysis/coordinates/RDKit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#

"""RDKit molecule --- :mod:`MDAnalysis.coordinates.RDKit`
================================================================
Read coordinates data from an `RDKit <https://www.rdkit.org/docs/source/rdkit.Chem.rdchem.html#rdkit.Chem.rdchem.Mol>`_ :class:`rdkit.Chem.rdchem.Mol` with :class:`RDKitReader`
into a MDAnalysis Universe. Convert it back to a :class:`rdkit.Chem.rdchem.Mol` with
:class:`RDKitConverter`.
Example
-------
>>> from rdkit import Chem
>>> import MDAnalysis as mda
>>> mol = Chem.MolFromMol2File("docking_poses.mol2", removeHs=False)
>>> u = mda.Universe(mol)
>>> u
<Universe with 42 atoms>
>>> u.trajectory
<RDKitReader with 10 frames of 42 atoms>
Classes
-------
.. autoclass:: RDKitReader
:members:
.. autoclass:: RDKitConverter
:members:
"""

import warnings

import numpy as np

from . import memory


class RDKitReader(memory.MemoryReader):
"""Coordinate reader for RDKit.
.. versionadded:: 2.0.0
"""
format = 'RDKIT'

# Structure.coordinates always in Angstrom
units = {'time': None, 'length': 'Angstrom'}

@staticmethod
def _format_hint(thing):
"""Can this reader read *thing*?"""
try:
from rdkit import Chem
except ImportError:
# if we can't import rdkit, it's probably not rdkit
return False
else:
return isinstance(thing, Chem.Mol)

def __init__(self, filename, **kwargs):
"""Read coordinates from an RDKit molecule.
Each conformer in the original RDKit molecule will be read as a frame
in the resulting universe.
Parameters
----------
filename : rdkit.Chem.rdchem.Mol
RDKit molecule
"""
n_atoms = filename.GetNumAtoms()
coordinates = np.array([
conf.GetPositions() for conf in filename.GetConformers()],
dtype=np.float32)
if coordinates.size == 0:
warnings.warn("No coordinates found in the RDKit molecule")
coordinates = np.empty((1,n_atoms,3), dtype=np.float32)
coordinates[:] = np.nan
super(RDKitReader, self).__init__(coordinates, order='fac', **kwargs)
1 change: 1 addition & 0 deletions package/MDAnalysis/coordinates/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -738,3 +738,4 @@ class can choose an appropriate reader automatically.
from . import null
from . import NAMDBIN
from . import FHIAIMS
from . import RDKit
15 changes: 15 additions & 0 deletions package/MDAnalysis/core/selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,21 @@ class AltlocSelection(StringSelection):
field = 'altLocs'


class AromaticSelection(Selection):
"""Select aromatic atoms.
Aromaticity is available in the `aromaticities` attribute and is made
available through RDKit"""
token = 'aromatic'
field = 'aromaticities'

def __init__(self, parser, tokens):
pass

def apply(self, group):
return group[group.aromaticities].unique


class ResidSelection(Selection):
"""Select atoms based on numerical fields
Expand Down
12 changes: 12 additions & 0 deletions package/MDAnalysis/core/topologyattrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1672,6 +1672,18 @@ def _gen_initial_values(na, nr, ns):
return np.zeros(na)


class Aromaticities(AtomAttr):
"""Aromaticity (RDKit)"""
attrname = "aromaticities"
singular = "aromaticity"
per_object = "atom"
dtype = bool

@staticmethod
def _gen_initial_values(na, nr, ns):
return np.zeros(na, dtype=bool)


class ResidueAttr(TopologyAttr):
attrname = 'residueattrs'
singular = 'residueattr'
Expand Down
81 changes: 81 additions & 0 deletions package/MDAnalysis/core/universe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1285,6 +1285,87 @@ def _fragdict(self):
fragdict[a.ix] = fraginfo(i, f)

return fragdict

@classmethod
def from_smiles(cls, smiles, sanitize=True, addHs=True,
generate_coordinates=True, numConfs=1,
rdkit_kwargs={}, **kwargs):
"""Create a Universe from a SMILES string with rdkit
Parameters
----------
smiles : str
SMILES string
sanitize : bool (optional, default True)
Toggle the sanitization of the molecule
addHs : bool (optional, default True)
Add all necessary hydrogens to the molecule
generate_coordinates : bool (optional, default True)
Generate 3D coordinates using RDKit's `AllChem.EmbedMultipleConfs`
function. Requires adding hydrogens with the `addHs` parameter
numConfs : int (optional, default 1)
Number of frames to generate coordinates for. Ignored if
`generate_coordinates=False`
rdkit_kwargs : dict (optional)
Other arguments passed to the RDKit `EmbedMultipleConfs` function
kwargs : dict
Parameters passed on Universe creation
Returns
-------
:class:`~MDAnalysis.core.Universe`
Examples
--------
To create a Universe with 10 conformers of ethanol:
>>> u = mda.Universe.from_smiles('CCO', numConfs=10)
>>> u
<Universe with 9 atoms>
>>> u.trajectory
<RDKitReader with 10 frames of 9 atoms>
To use a different conformer generation algorithm, like ETKDGv3:
>>> u = mda.Universe.from_smiles('CCO', rdkit_kwargs=dict(
params=AllChem.ETKDGv3()))
>>> u.trajectory
<RDKitReader with 1 frames of 9 atoms>
.. versionadded:: 2.0.0
"""
try:
from rdkit import Chem
from rdkit.Chem import AllChem
except ImportError as e:
raise ImportError(
"Creating a Universe from a SMILES string requires RDKit but "
"it does not appear to be installed") from e

mol = Chem.MolFromSmiles(smiles, sanitize=sanitize)
if mol is None:
raise SyntaxError('Error while parsing SMILES {0}'.format(smiles))
if addHs:
mol = Chem.AddHs(mol)
if generate_coordinates:
if not addHs:
raise ValueError("Generating coordinates requires adding "
"hydrogens with `addHs=True`")

numConfs = rdkit_kwargs.pop("numConfs", numConfs)
if not (type(numConfs) is int and numConfs > 0):
raise SyntaxError("numConfs must be a non-zero positive "
"integer instead of {0}".format(numConfs))
AllChem.EmbedMultipleConfs(mol, numConfs, **rdkit_kwargs)

return cls(mol, **kwargs)


# TODO: what is the point of this function???
Expand Down
Loading

0 comments on commit 50cd6e7

Please sign in to comment.