diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index b59fae8..e4ff9bd 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -6,7 +6,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Apply formatting uses: github/super-linter@v4 @@ -29,13 +29,13 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup environment - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge - miniforge-version: latest + miniforge-version: 4.9.2-4 use-mamba: true environment-file: envs/linux.yml use-only-tar-bz2: true @@ -59,13 +59,13 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup environment - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge - miniforge-version: latest + miniforge-version: 4.9.2-4 use-mamba: true environment-file: envs/osx.yml use-only-tar-bz2: true @@ -91,13 +91,13 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup environment - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge - miniforge-version: latest + miniforge-version: 4.9.2-4 use-mamba: true python-version: 3.9 channels: conda-forge,bioconda diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index 49e3e27..ee75ce6 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -33,7 +33,7 @@ requirements: - openmpi - pandas >=1.1.4 - python =3.9 - - rdkit <=2022.09.1 + - rdkit >=2023.09.1 - snakemake >=6.3.0 - statsmodels >=0.11.1 - xtb >=6.5.1 diff --git a/envs/linux.yml b/envs/linux.yml index b1e508b..d981b2f 100644 --- a/envs/linux.yml +++ b/envs/linux.yml @@ -14,7 +14,7 @@ dependencies: - openmpi - pandas >=1.1.4 - python =3.9 - - rdkit <=2022.09.1 + - rdkit >=2023.09.1 - snakemake >=6.3.0 - statsmodels >=0.11.1 - xtb >=6.5.1 diff --git a/envs/osx.yml b/envs/osx.yml index a388eb1..38098da 100644 --- a/envs/osx.yml +++ b/envs/osx.yml @@ -12,7 +12,7 @@ dependencies: - openmpi - pandas >=1.1.4 - python =3.9 - - rdkit <=2022.09.1 + - rdkit >=2023.09.1 - snakemake >=6.3.0 - statsmodels >=0.11.1 - xtb >=6.5.1 diff --git a/isicle/adducts.py b/isicle/adducts.py index f210e6f..fc4f324 100644 --- a/isicle/adducts.py +++ b/isicle/adducts.py @@ -303,9 +303,8 @@ def set_geometry(self, geom): def set_charge(self): """ """ - if self.geom.__dict__.get("charge") is None: - self.geom.__dict__.update(charge=self.geom.get_formal_charge()) + self.geom.__dict__.update(charge=self.geom.get_charge()) def _set_ions(self, ion_path=None, ion_list=None): """ """ @@ -482,7 +481,7 @@ def _forcefield_selector(self, forcefield, mw): def _update_geometry_charge(self, geom): """ """ - geom.__dict__.update(charge=geom.get_formal_charge()) + geom.__dict__.update(charge=geom.get_charge()) def _add_ion( self, init_mol, base_atom_idx, ion_atomic_num, sanitize, forcefield, ff_iter @@ -1110,11 +1109,9 @@ def set_charge(self): if self.geom.__dict__.get("charge") is None: if self.geom.load.get("filetype") == ".xyz": # self.geom.__dict__.update(charge=charge) - raise ValueError( - "Must first run geom.set_formal_charge for an xyz structure" - ) + raise ValueError("Must first run geom.set_charge for an xyz structure") else: - self.geom.__dict__.update(charge=self.geom.get_formal_charge()) + self.geom.__dict__.update(charge=self.geom.get_charge()) def _set_ions(self, ion_path=None, ion_list=None): cations, anions, complex = _parse_ions(ion_path=ion_path, ion_list=ion_list) diff --git a/isicle/conformers.py b/isicle/conformers.py index 7cd6b0e..82c5b0b 100644 --- a/isicle/conformers.py +++ b/isicle/conformers.py @@ -1,8 +1,7 @@ -import pickle -from os.path import join import numpy as np import pandas as pd from statsmodels.stats.weightstats import DescrStatsW +from os.path import join from isicle import io from isicle.geometry import Geometry, XYZGeometry @@ -25,9 +24,8 @@ def _function_selector(func): ------- func Conformer reduction function. - """ - + # Mapping between names and functions func_map = { "boltzmann": boltzmann, "simple": simple_average, @@ -35,9 +33,11 @@ def _function_selector(func): "threshold": energy_threshold, } + # Check for function by name if func.lower() in func_map: return func_map[func.lower()] else: + # Not a named/implemented function raise ValueError("{} not a supported reduction function.".format(func)) @@ -47,19 +47,19 @@ def _energy_based(func): Parameters ---------- - func : func + func : function Conformer reduction function. Returns ------- bool True if energy based, otherwise False. - """ - + # Check if among energy-based functions if func in [boltzmann, lowest_energy, energy_threshold]: return True + # Not energy based return False @@ -74,7 +74,7 @@ def reduce(value, func="boltzmann", **kwargs): func : str Alias for function selection (one of "boltzmann", "simple", "lowest", or "threshold"). - **kwargs + kwargs Additional keyword arguments passed to `func`. Returns @@ -83,7 +83,7 @@ def reduce(value, func="boltzmann", **kwargs): Result of reduction operation. """ - + # Select function f = _function_selector(func) # Energy-based method @@ -114,32 +114,45 @@ def boltzmann(value, energy, index=None, atom=None): ------- :obj:`~pandas.DataFrame` Result of reduction operation. - """ + # Placeholder for index if index is None: index = np.full_like(value, -1) + + # Placeholder for atom if atom is None: atom = np.full_like(value, -1) + # Initialize data frame df = pd.DataFrame.from_dict( {"value": value, "energy": energy, "index": index, "atom": atom} ) + # Result container res = [] + + # Iterate over unique indices for name, group in df.groupby(["index", "atom"]): + # Compute relative delta G g = group["energy"] * 627.503 mn = g.min() relG = g - mn + + # Compute Boltzmann weighting factors b = np.exp(-relG / 0.5924847535) w = (b / b.sum()) * len(b) + # Compute weighted statistics ws = DescrStatsW(group["value"], weights=w, ddof=0) + # Append to container res.append([name[0], name[1], ws.mean, ws.std, len(group.index)]) + # Initialize data frame res = pd.DataFrame(res, columns=["index", "atom", "mean", "std", "n"]) + # Drop index if not supplied if np.all(index == -1): return res.drop(columns=["index", "atom"]).iloc[0] @@ -163,21 +176,27 @@ def simple_average(value, index=None, atom=None): ------- :obj:`~pandas.DataFrame` Result of reduction operation. - """ + # Placeholder for index if index is None: index = np.full_like(value, -1) + + # Placeholder for atom if atom is None: atom = np.full_like(value, -1) + # Initialize data frame df = pd.DataFrame.from_dict({"value": value, "index": index, "atom": atom}) + # Average per unique index res = df.groupby(["index", "atom"], as_index=False).agg( {"value": ["mean", "std", "count"]} ) + # Rename columns res.columns = ["index", "atom", "mean", "std", "n"] + # Drop indices if not supplied if np.all(index == -1): return res.drop(columns=["index", "atom"]).iloc[0] @@ -203,20 +222,24 @@ def lowest_energy(value, energy, index=None, atom=None): ------- :obj:`~pandas.DataFrame` Result of reduction operation. - """ + # Placeholder for index if index is None: index = np.full_like(value, -1) + + # Placeholder for atom if atom is None: atom = np.full_like(value, -1) + # Initialize data frame df = pd.DataFrame.from_dict( {"value": value, "energy": energy, "index": index, "atom": atom} ) - + # Take minimum energy per unique index res = df.loc[df.groupby(["index", "atom"])["energy"].idxmin()] + # Drop indices if not supplied if np.all(index == -1): return res.drop(columns=["index", "atom"]).iloc[0] @@ -243,25 +266,33 @@ def energy_threshold(value, energy, threshold=5, index=None, atom=None): ------- :obj:`~pandas.DataFrame` Result of reduction operation. - """ + # Placeholder for index if index is None: index = np.full_like(value, -1) + + # Placeholder for atom if atom is None: atom = np.full_like(value, -1) + # Initialize data frame df = pd.DataFrame.from_dict( {"value": value, "energy": energy, "index": index, "atom": atom} ) + # Filter by energy df = df.loc[df["energy"] <= threshold, :] + # Aggregate res = df.groupby(["index", "atom"], as_index=False).agg( {"value": ["mean", "std", "count"]} ) + + # Rename columns res.columns = ["index", "atom", "mean", "std", "n"] + # Drop indices if not supplied if index is None: return res.drop(columns=["index", "atom"]).iloc[0] @@ -291,16 +322,20 @@ def transform( ------- :obj: `~pandas.DataFrame` Result of transformation operation. - """ + # Placeholder for index if index is None: index = np.full_like(value, -1) + + # Placeholder for atom if atom is None: atom = np.full_like(value, -1) + # Initialize data frame df = pd.DataFrame.from_dict({"value": value, "index": index, "atom": atom}) + # Process with per-atom values if isinstance(m, dict): res = pd.DataFrame() for idx in m: @@ -308,6 +343,8 @@ def transform( part["new_value"] = part["value"].apply(lambda x: m[idx] * x + b[idx]) res = pd.concat([res, part]) + + # Process with global values else: res = df.copy() res["new_value"] = res["value"].apply(lambda x: m * x + b) @@ -328,76 +365,15 @@ def build_conformational_ensemble(geometries): ------- :obj:`~isicle.conformers.ConformationalEnsemble` Conformational ensemble. - """ return ConformationalEnsemble(geometries) -def load_pickle(path): - """ - Load pickled file. - - Parameters - ---------- - path : str - Path to pickle. - - Returns - ------- - :obj:`~isicle.conformers.ConformationalEnsemble` - Previously pickled :obj:`~isicle.conformers.ConformationalEnsemble` - instance. - - Raises - ------ - IOError - If file cannot be read. - TypeError - If file is not a supported class instance. - - """ - - # Load file - with open(path, "rb") as f: - try: - ensemble = pickle.load(f) - except pickle.UnpicklingError: - raise IOError("Could not read file as pickle: {}".format(path)) - - # Check for valid class type - if isinstance(ensemble, ConformationalEnsemble): - return ensemble - - # Failure. This is not a ConformationalEnsemble instance - raise TypeError("Unsupported format: {}.".format(ensemble.__class__)) - - -def load(path): - """ - Load file. - - Parameters - ---------- - path : str - Path to file. - - Returns - ------- - :obj:`~isicle.conformers.ConformationalEnsemble` - Previously pickled :obj:`~isicle.conformers.ConformationalEnsemble` - instance. - - """ - - return load_pickle(path) - - class ConformationalEnsemble(TypedList): """ Collection of :obj:`~isicle.geometry.Geometry`, or related subclass, instances. - """ def __init__(self, *args): @@ -452,7 +428,7 @@ def reduce(self, attr, func="boltzmann", **kwargs): func : str Alias for function selection (one of "boltzmann", "simple", "lowest", or "threshold"). - **kwargs + kwargs Additional keyword arguments passed to `func`. Returns @@ -462,6 +438,7 @@ def reduce(self, attr, func="boltzmann", **kwargs): """ + # Select reduction function f = _function_selector(func) # Check for primary attribute @@ -518,7 +495,6 @@ def reduce(self, attr, func="boltzmann", **kwargs): # Execute other method return f(value, index=index, atom=atom, **kwargs) - # TODO: automatically unpack multiple returned objects def _apply_method(self, method, **kwargs): """ Process conformational ensemble members according to supplied method. @@ -527,7 +503,7 @@ def _apply_method(self, method, **kwargs): ---------- method : str Method by which ensemble members will be processed. - **kwargs + kwargs Keyword arguments passed to `method`. Returns @@ -555,7 +531,6 @@ def _apply_method(self, method, **kwargs): except: return result - # TODO: automatically unpack multiple returned objects def _apply_function(self, func, **kwargs): """ Process conformational ensemble members according to supplied function. @@ -564,7 +539,7 @@ def _apply_function(self, func, **kwargs): ---------- func : function Function by which ensemble members will be processed. - **kwargs + kwargs Keyword arguments passed to `func`. Returns @@ -585,7 +560,6 @@ def _apply_function(self, func, **kwargs): except: return result - # TODO: automatically unpack multiple returned objects def apply(self, func=None, method=None, **kwargs): """ Process conformational ensemble members according to supplied function @@ -597,7 +571,7 @@ def apply(self, func=None, method=None, **kwargs): Function by which ensemble members will be processed. method : str Method by which ensemble members will be processed. - **kwargs + kwargs Keyword arguments passed to `method`. Returns @@ -611,41 +585,17 @@ def apply(self, func=None, method=None, **kwargs): If neither `func` nor `method` is supplied. """ + + # Apply function if func is not None: return self._apply_function(func, **kwargs) + # Apply method if method is not None: return self._apply_method(method, **kwargs) raise ValueError("Must supply `func` or `method`.") - def save_pickle(self, path): - """ - Pickle this class instance. - - Parameters - ---------- - path : str - Path to save pickle object to. - - """ - - with open(path, "wb") as f: - pickle.dump(self, f) - - def save(self, path): - """ - Save this class instance. - - Parameters - ---------- - path : str - Path to save object to. - - """ - - io.save(path, self) - def get_structures(self): """ Extract all structures from containing object as a conformational ensemble. diff --git a/isicle/geometry.py b/isicle/geometry.py index 57df6f8..b2168fa 100644 --- a/isicle/geometry.py +++ b/isicle/geometry.py @@ -1,123 +1,65 @@ import os -import isicle import numpy as np -import pandas as pd -from isicle.interfaces import GeometryInterface, XYZGeometryInterface +from openbabel import pybel from rdkit import Chem from rdkit.Chem.MolStandardize import rdMolStandardize from rdkit.Chem.SaltRemover import SaltRemover -from openbabel import pybel + +import isicle +from isicle.interfaces import GeometryInterface, XYZGeometryInterface + class XYZGeometry(XYZGeometryInterface): """ Molecule information, specialized for XYZ files (bonding info not provided). - It is not recommended to manipulate or retrieve - attributes of this class without using class functions. - - Attributes - ---------- - xyz: list of str - Lines of XYZ block used to represent this compound's structure - global_properties : dict - Dictionary of properties calculated for this structure. Always at least - contains the "from" key, which reports the last operation performed on - this compound (e.g. load, dft_optimize). To generate compound - properties, use get_* and *_optimize functions. + It is not recommended to manipulate or retrieve attributes of this class + without using class functions. """ _defaults = ["xyz"] _default_value = None def __init__(self, **kwargs): - self.__dict__.update(dict.fromkeys(self._defaults, self._default_value)) - self.__dict__.update(kwargs) - - def _upgrade_to_Geometry(self, mol): """ - Upgrade this class to the Geometry class using a provided mol object. - Hard copies everything but the xyz structure. + Initialize :obj:`~isicle.geometry.XYZGeometry` instance. Parameters ---------- - mol : RDKit Mol object - Structure to use in new Geometry object - - Returns - ------- - Geometry - Instance containing the new mol representation. + kwargs + Keyword arguments to initialize instance attributes. """ - if mol is None: - mol = self.mol - # Create dict to load in - d = self.__dict__.copy() - d["mol"] = mol - d.pop("xyz") - - return Geometry(**d) + self.__dict__.update(dict.fromkeys(self._defaults, self._default_value)) + self.__dict__.update(kwargs) - def _update_structure( - self, inplace, mol=None, xyz=None, xyz_filename=None, event=None - ): + def get_basename(self): """ - Return object with given structure. If inplace and a xyz structure is - provided, manipulates the current instance. Otherwise, a new - XYZGeometry or Geometry object is created with hard copied attributes. - Only one of mol, xyz, or xyz_filename is to be provided. If multiple - are provided, they are prioritized in the following order: (1) mol, - (2) xyz_filename, (3) xyz. If none of these are provided, an error is - raised. - - Parameters - ---------- - inplace : boolean - If true, update this instance with the new structure. Otherwise, - create a new XYZGeometry or Geometry instance and populate it with - the structure. - mol : RDKit Mol object (optional) - Structure with 3D abilities. - xyz_filename: str (optional) - Path to file to load in xyz from - xyz: list of str (optional) - Lines from xyz block to use as structure representation. + Returns a copy of this object's basename (original filename). Returns ------- - XYZGeometry or Geometry - Updated structure instance. Geometry is returned if mol was - provided, otherwise XYZGeometry is returned. + str + Basename of original filepath. """ - if mol is not None: - # Upgrade to the Geometry class - geom = self._upgrade_to_Geometry(mol) - - else: - if inplace: # Modify this object - geom = self - else: # Make a new object - geom = self.__copy__() - - # Ensure exactly one of xyz or a filename is provided - # Add as structure of the new XYZGeometry - if xyz_filename is not None: - geom.xyz = load_xyz(xyz_filename) - elif xyz is not None: - geom.xyz = xyz - - return geom - - def get_basename(self): - """Returns a copy of this object's basename (original filename).""" return self.basename def add___dict__(self, d, override=False): - """Accepts a dictionary of values and adds any non-conflicting - information to the attribute dictionary""" + """ + Accepts a dictionary of values and adds any non-conflicting + information to the attribute dictionary. + + Parameters + ---------- + d : dict + Attribute dictionary. + override : bool + Singal whether to override existing attributes. + + """ if not override: # Remove keys that are already present in attribute dictionary @@ -125,32 +67,52 @@ def add___dict__(self, d, override=False): # Add to attributes self.__dict__.update(d) - return - def dft(self, program="NWChem", template=None, **kwargs): + def dft(self, backend="NWChem", **kwargs): """ - Optimize geometry from XYZ, using stated functional and basis set. - Additional inputs can be grid size, optimization criteria level, + Perform density functional theory calculations according to supplied task list + and configuration parameters for specified backend. + + Parameters + ---------- + backend : str + Alias for backend selection (NWChem, ORCA). + kwargs + Keyword arguments supplied to selected program. See `run` method of relevant + wrapper for configuration parameters, e.g. :meth:`~isicle.qm.NWChemWrapper.run`. + + Returns + ------- + :obj:`~isicle.qm.{program}Wrapper` + Wrapper object containing relevant outputs from the simulation. + """ - geom = isicle.qm.dft( - self.__copy__(), program=program, template=template, **kwargs - ) - return geom + return isicle.qm.dft(self, backend=backend, **kwargs) def md(self, program="xtb", **kwargs): """ - Optimize geometry or generate conformers or adducts from XYZ using stated forcefield. - Additional inputs can be energy window, optimization criteria level, charge, or ion. + Optimize geometry or generate conformers or adducts using stated forcefield. + + Parameters + ---------- + kwargs + Keyword arguments supplied to selected program. See `run` method of relevant + wrapper for configuration parameters, e.g. :meth:`~isicle.md.XTBWrapper.run`. + + Returns + ------- + :obj:`~isicle.md.{program}Wrapper` + Wrapper object containing relevant outputs from the simulation. + """ - geom = isicle.md.md(self.__copy__(), program=program, **kwargs) - return geom + return isicle.md.md(self, program=program, **kwargs) - def ionize(self, ion_path=None, ion_list=None, save=False, path=None, **kwargs): + def ionize(self, ion_path=None, ion_list=None, **kwargs): """ - Calls xtb CREST to ionize xyz geometry, using specified list of ions and method of ionization. - WARNING: must run isicle.geometry.XYZGeometry.set_formal_charge prior to this. + Calls xtb CREST to ionize XYZ geometry, using specified list of ions and method of ionization. + WARNING: must run isicle.geometry.XYZGeometry.set_charge prior to this. Parameters ---------- @@ -161,42 +123,57 @@ def ionize(self, ion_path=None, ion_list=None, save=False, path=None, **kwargs): List of strings of adducts to be considered. Must be specifed in syntax `Atom+` or `Atom-`, eg. `H+`, `Na+`, `H-Na+` Either ion_path or ion_list must be specified - save : bool - Saves wrapper object to .pkl in specified path directory - path : str (optional) - Directory to write output files - **kwargs : - see :meth: `~isicle.adducts.CRESTIonizationWrapper.submit` + kwargs + See :meth: `~isicle.adducts.CRESTIonizationWrapper.submit` for more options Returns ------- Dictionary of adducts, `{:[]}` + """ + if self.__dict__.get("charge") is None: raise ValueError( - "Must first run isicle.geometry.XYZGeometry.set_formal_charge for an xyz structure" + "Must first run isicle.geometry.XYZGeometry.set_charge for an XYZ structure" ) iw = isicle.adducts.ionize("crest").run( self.__copy__(), ion_path=ion_path, ion_list=ion_list, **kwargs ) - if save == True and path is not None: - iw.save(path) - return iw - def set_formal_charge(self, charge): - """Specify and set the known formal charge of the xyz molecule to __dict__""" + def set_charge(self, charge): + """ + Specify and set the known formal charge of the XYZ molecule. + + Parameters + ---------- + charge : int + Nominal charge of the molecule. + + """ + + # Attempt cast to int try: charge = int(charge) except: - raise TypeError("Charge specifed must be str or int") + raise TypeError("Charge specifed must be str or int.") + + # Update attribute self.__dict__.update(charge=charge) - return self.charge def get_natoms(self): - """Calculate total number of atoms.""" + """ + Calculate total number of atoms. + + Returns + ------- + int + Number of atoms. + + """ + self.__dict__["natoms"] = int(self.xyz[0].strip()) return self.__dict__["natoms"] @@ -213,35 +190,93 @@ def get_atom_indices(self, atoms=["C", "H"]): ------- list of int Atom indices. + """ + + # Index container idx = [] + # Enumerate rows for i in range(2, len(self.xyz)): + # Extract atom atom = self.xyz[i].split(" ")[0] + + # Check if of interest if atom in atoms: + # Append index idx.append(i - 2) + return idx def get___dict__(self): - """Return a copy of this object's attribute dictionary""" + """ + Return a copy of this object's attributes as a dictionary. + + Returns + ------- + dict + Instance attributes. + + """ + return self.__dict__.copy() - def __copy__(self): - """Return hard copy of this class instance.""" - # TODO: manage what should be passed, rather than all? + def __copy__(self, **kwargs): + """ + Copy this class instance. + + Parameters + ---------- + kwargs + Keyword arguments to update copy. + + Returns + ------- + :obj:`~isicle.geometry.XYZGeometry` + Molecule representation. + + """ + + # Copy attributes d = self.__dict__.copy() - return type(self)(**d) + + # Safely copy mol if present + if "mol" in d: + d["mol"] = self.to_mol() + + # Build self instance from scratch + instance = type(self)(**d) + + # Update from kwargs + instance.__dict__.update(kwargs) + + return instance def to_xyzblock(self): - """Get XYZ text for this structure.""" + """ + Get XYZ text for this structure. + + Returns + ------- + str + XYZ text. + + """ + return "\n".join(self.xyz) def to_mol(self): + """ + Get structure representation in :obj:`~rdkit.Chem.rdchem.Mol` format. + + :obj:`~rdkit.Chem.rdchem.Mol` + RDKit Mol instance. + + """ + self.temp_dir = isicle.utils.mkdtemp() - geomfile = os.path.join(self.temp_dir, - '{}.{}'.format(self.basename, - "xyz")) + geomfile = os.path.join(self.temp_dir, "{}.{}".format(self.basename, "xyz")) isicle.io.save(geomfile, self) mols = list(pybel.readfile("xyz", geomfile)) @@ -252,16 +287,9 @@ def to_mol(self): class Geometry(XYZGeometry, GeometryInterface): """ - Molecule information, specialized for 3D representations. - It is not recommended to manipulate or retrieve - attributes of this class without using class functions. - - Attributes - ---------- = - mol : RDKit Mol object - Current structure. - __dict__ : dict - Dictionary of properties calculated for this structure. + Molecule information, specialized for 3D representations. It is not + recommended to manipulate or retrieve attributes of this class without + using class functions. """ _defaults = ["mol"] @@ -271,103 +299,63 @@ def __init__(self, **kwargs): self.__dict__.update(dict.fromkeys(self._defaults, self._default_value)) self.__dict__.update(kwargs) - - def _downgrade_to_XYZGeometry(self, xyz): + def _is_embedded(self): """ - Downgrade this class to the XYZGeometry class using a provided xyz. - Hard copies everything but the mol structure. - - Parameters - ---------- - xyz : list of str - Lines of xyz block to use for new object. + Check if molecule has embedded 3D coordinates. Returns ------- - XYZGeometry - Instance containing the new xyz representation. + bool + Indication of 3D coordinate presence. """ - # Create dict to load in - d = self.__dict__.copy() - d["xyz"] = xyz - - return XYZGeometry(**d) + try: + self.mol.GetConformers() + return True + except: + return False - def _update_structure( - self, inplace, mol=None, xyz=None, xyz_filename=None): + def addHs(self): """ - Return object with given structure. If inplace and a mol structure is - provided, manipulates the current instance. Otherwise, a new - XYZGeometry or Geometry object is created with hard copied attributes. - Only one of mol, xyz, or xyz_filename is to be provided. If multiple - are provided, they are prioritized in the following order: (1) mol, - (2) xyz_filename, (3) xyz. If none of these are provided, an error is - raised. - - Parameters - ---------- - inplace : boolean - If true, update this instance with the new structure. Otherwise, - create a new XYZGeometry or Geometry instance and populate it with - the structure. - mol : RDKit Mol object (optional) - Structure with 3D abilities. - xyz_filename: str (optional) - Path to file to load in xyz from - xyz: list of str (optional) - Lines from xyz block to use as structure representation. + Add implicit hydrogens to molecule. Returns ------- - XYZGeometry or Geometry - Updated structure instance. Geometry is returned if mol was - provided, otherwise XYZGeometry is returned. + :obj:`~isicle.geometry.Geometry` + Molecule representation. """ - if mol is None: - - if xyz_filename is not None: - # Load in file first - xyz = load_xyz(xyz_filename) - - if xyz is not None: - # Downgrade to XYZGeometry class - geom = self._downgrade_to_XYZGeometry(xyz) - - else: - if inplace: # Modify this object - geom = self - else: # Make a new object - geom = self.__copy__() - geom.mol = mol + # Get copy of mol object + mol = self.to_mol() - # Add event that led to this change in structure - # If event is None, nothing will happen + # Add Hs with coordinates + mol = Chem.AddHs(mol, addCoords=True) - return geom + # Return new geometry instance + return self.__copy__(mol=mol) - def initial_optimize( - self, embed=False, forcefield="UFF", ff_iter=200, inplace=False - ): + def initial_optimize(self, embed=False, forcefield="UFF", ff_iter=200): """ - Params - ------ + Initial molecule optimization by basic force field to establish rough + 3D coordinates. Further optimization (molecular dynamics, density + functional theory) recommended. + + Parameters + ---------- embed: bool - Try embedding molecule with eigenvalues of distance matrix - Except failure seeds with random coordinates + Attempt molecule embedding with eigenvalues of distance matrix. + Failure results in seeding with random coordinates. forcefield: str - Specify "UFF" for Universal Force Field optimization by RDKit - Specify "MMFF" or "MMFF94" for Merck Molecular Force Field 94 - Specify "MMFF94s" for the MMFF94 s variant - inplace: bool - If geom.mol is modified inplace + Specify "UFF" for universal force field, "MMFF" or "MMFF94" for + Merck molecular force field 94, or "MMFF94s" for the MMFF94 s variant. + Returns ------- - Geometry - Updated structure instance + :obj:`~isicle.geometry.Geometry` + Molecule representation. + """ def _forcefield_selector(forcefield, mw): @@ -392,25 +380,39 @@ def _forcefield_selector(forcefield, mw): "RDKit only supports UFF, MMFF, MMFF94, MMFF94s as forcefields." ) + # Get rdkit mol mol = self.to_mol() - if embed == True: + + # Embed molecule 3D coordinates + if embed is True: + # Attempt embedding try: Chem.AllChem.EmbedMolecule(mol) + + # Use random coordinates except: Chem.AllChem.EmbedMolecule(mol, useRandomCoords=True) + + # Optimize according to supplied forcefield if forcefield is not None: - if "mmff94s" in forcefield.lower(): - _forcefield_selector(forcefield, mol)( - mol, mmffVariant=forcefield, maxIters=ff_iter - ) - else: - _forcefield_selector(forcefield, mol)(mol, maxIters=ff_iter) + # Check if embedded + if self._is_embedded(): + # Forcefield selection + if "mmff94s" in forcefield.lower(): + _forcefield_selector(forcefield, mol)( + mol, mmffVariant=forcefield, maxIters=ff_iter + ) + else: + _forcefield_selector(forcefield, mol)(mol, maxIters=ff_iter) - geom = self._update_structure(inplace, mol=mol) + # Not embedded + else: + raise ValueError("Molecule must have embedded 3D coordinates.") - return geom + # Return copy with updated mol + return self.__copy__(mol=mol) - def desalt(self, salts=None, inplace=False): + def desalt(self, salts=None): """ Desalts RDKit mol object using Chem.SaltRemover module. @@ -418,22 +420,18 @@ def desalt(self, salts=None, inplace=False): ---------- salts : str (optional) Salt type to remove. Ex: 'Cl', 'Br', '[Na+]'. Default: None. - inplace : boolean (optional) - If true, update this instance with the new structure. Otherwise, - create a new Geometry instance and populate it with the structure. - Default: False. Returns ------- - Geometry - Molecule with given salt(S) removed. + :obj:`~isicle.geometry.Geometry` + Moleculerepresentation. """ # TODO: should this raise an error instead? # If no salts given, skip desalting if salts is None: - return self._update_structure(inplace, mol=self.to_mol()) + return self.__copy__() remover = SaltRemover(defnFormat="smiles", defnData=salts) # defnData="[Cl,Br,Na]" *sample definition of salts to be removed* @@ -446,26 +444,16 @@ def desalt(self, salts=None, inplace=False): # atomno = res.GetNumAtoms # if relevant to future use, returns atom count post desalting - geom = self._update_structure(inplace, mol=mol) - # TODO: add any properties from this operation to global_props? - - return geom + return self.__copy__(mol=mol) - def neutralize(self, inplace=False): + def neutralize(self): """ Neutralizes RDKit mol object using neutralization reactions. - Parameters - ---------- - inplace : boolean (optional) - If true, update this instance with the new structure. Otherwise, - create a new Geometry instance and populate it with the structure. - Default: False. - Returns ------- - Geometry - Neutralized form of the molecule. + :obj:`~isicle.geometry.Geometry` + Molecule representation. """ @@ -495,6 +483,8 @@ def _initialize_neutralisation_reactions(): reactions = _initialize_neutralisation_reactions() mol = self.to_mol() + + # TODO: `replaced` is unused replaced = False for i, (reactant, product) in enumerate(reactions): while mol.HasSubstructMatch(reactant): @@ -502,12 +492,9 @@ def _initialize_neutralisation_reactions(): rms = Chem.AllChem.ReplaceSubstructs(mol, reactant, product) mol = rms[0] - geom = self._update_structure(inplace, mol=mol) - # TODO: add any properties from this operation to global_props? + return self.__copy__(mol=mol) - return geom - - def tautomerize(self, return_all=False, inplace=False): + def tautomerize(self, return_all=False): """ Generate tautomers according to RDKit TautomerEnumerator() method. @@ -516,14 +503,10 @@ def tautomerize(self, return_all=False, inplace=False): return_all : boolean (optional) If true, return all tautomers generated. Otherwise, only return the most common. Default=False - inplace : boolean (optional) - If true, update this instance with the new structure. Otherwise, - create a new Geometry instance and populate it with the structure. - Default: False. Returns ------- - Geometry or list(Geometry) + :obj:`~isicle.geometry.Geometry` or list of :obj:`~isicle.geometry.Geometry` Tautomer(s) of starting structure. """ @@ -543,74 +526,69 @@ def tautomerize(self, return_all=False, inplace=False): if return_all: new_geoms = [] for r in res: - geom = self._update_structure(False, mol=r, event="tautomerize") + geom = self.__copy__(mol=r) new_geoms.append(geom) return new_geoms - geom = self._update_structure(inplace, mol=res[0]) - # TODO: add any properties from this operation to global_props? - - return geom + return self.__copy__(mol=res[0]) - def kekulize(self, inplace=False): + def kekulize(self): """ - `Kekulizes the molecule if possible. Otherwise the molecule is not modified` + Kekulizes the molecule if possible. Otherwise the molecule is not modified. This is recommended for aromatic molecules undergoing explicit ionization. Aromatic bonds are explicitly defined and aromatic flags are cleared. + """ + mol = Chem.KekulizeIfPossible(self.to_mol(), clearAromaticFlags=True) - geom = self._update_structure(inplace, mol=mol, event="kekulize") - return geom - - def ionize( - self, - ion_path=None, - ion_list=None, - method="explicit", - save=False, - path=None, - **kwargs, - ): + return self.__copy__(mol=mol) + + def ionize(self, ion_path=None, ion_list=None, method="explicit", **kwargs): """ Ionize geometry, using specified list of ions and method of ionization. Parameters ---------- ion_path : str - Filepath to text file containing ions with charge (eg. `H+`) to be considered - Either ion_path or ion_list must be specified + Filepath to text file containing ions with charge (eg. `H+`) to be + considered. Either ion_path or ion_list must be specified. ion_list : list - List of strings of adducts to be considered. - Must be specifed in syntax `Atom+` or `Atom-`, eg. `H+`, `Na+`, `H-Na+` - Either ion_path or ion_list must be specified + List of strings of adducts to be considered. Must be specifed in + syntax `Atom+` or `Atom-`, eg. `H+`, `Na+`, `H-Na+`. Either ion_path + or ion_list must be specified method : str Method of ionization to be used, 'explicit' or 'crest' is accepted - save : bool - Saves wrapper object to .pkl in specified path directory - path : str (optional) - Directory to write output files ensembl : bool (optional) Returns instead a list of adduct geometries - **kwargs: + kwargs: see :meth: `~isicle.adducts.ExplicitIonizationWrapper.submit` or `~isicle.adducts.CRESTIonizationWrapper.submit` for more options Returns ------- - Dictionary of adducts, `{:[]}` + :obj:`~isicle.adducts.IonizationWrapper` + Ionization result. + """ + iw = isicle.adducts.ionize(method).run( self.__copy__(), ion_path=ion_path, ion_list=ion_list, **kwargs ) - if save == True and path is not None: - iw.save(path) - return iw def get_natoms(self): - """Calculate total number of atoms.""" + """ + Calculate total number of atoms. + + Returns + ------- + int + Number of atoms. + + """ + natoms = Chem.Mol.GetNumAtoms(self.to_mol()) self.__dict__["natoms"] = natoms return self.__dict__["natoms"] @@ -632,6 +610,7 @@ def get_atom_indices( ------- list of int Atom indices. + """ atoms = [lookup[x] for x in atoms] @@ -643,7 +622,16 @@ def get_atom_indices( return idx def get_total_partial_charge(self): - """Sum the partial charge across all atoms.""" + """ + Sum the partial charge across all atoms. + + Returns + ------- + float + Total partial charge. + + """ + mol = self.to_mol() Chem.AllChem.ComputeGasteigerCharges(mol) contribs = [ @@ -652,80 +640,153 @@ def get_total_partial_charge(self): ] return np.nansum(contribs) - def get_formal_charge(self): - """Calculate formal charge of the molecule.""" - mol = self.to_mol() - return Chem.rdmolops.GetFormalCharge(mol) + def get_charge(self): + """ + Get formal charge of the molecule. + + Returns + ------- + int + Formal charge. + + """ + + return Chem.rdmolops.GetFormalCharge(self.to_mol()) + + def set_charge(self, charge=None): + """ + Set formal charge of the molecule. + + Parameters + ---------- + charge : int + Formal charge of molecule. + + """ - def set_formal_charge(self): - """Set formal charge of the molecule to __dict__""" - self.__dict__.update(charge=self.get_formal_charge()) - return self.charge + if charge is None: + charge = self.get_charge() - def __copy__(self): - """Return hard copy of this class instance.""" - # TODO: manage what should be passed, rather than all? + self.__dict__.update(charge=int(charge)) + + def __copy__(self, **kwargs): + """ + Return hard copy of this class instance. + + Parameters + ---------- + kwargs + Keyword arguments to update copy. + + Returns + ------- + :obj:`~isicle.geometry.Geometry` + Molecule representation. + + """ + + # Copy attributes d = self.__dict__.copy() - d["mol"] = self.to_mol() - return type(self)(**d) + + # Safely copy mol if present + if "mol" in d: + d["mol"] = self.to_mol() + + # Build self instance from scratch + instance = type(self)(**d) + + # Update from kwargs + instance.__dict__.update(kwargs) + + return instance def to_mol(self): """ - Returns RDKit Mol object for this Geometry. + Returns :obj:`~rdkit.Chem.rdchem.Mol` instance for this Geometry. Returns ------- - RDKit Mol object - Copy of structure + :obj:`~rdkit.Chem.rdchem.Mol` + RDKit Mol instance. """ return self.mol.__copy__() def to_smiles(self): - """Get SMILES for this structure.""" + """ + Get SMILES for this structure. + + Returns + ------- + str + SMILES string. + + """ + return Chem.MolToSmiles(self.to_mol()) def to_inchi(self): - """Get InChI for this structure.""" + """ + Get InChI for this structure. + + Returns + ------- + str + InChI string. + + """ + return Chem.MolToInchi(self.to_mol()) def to_smarts(self): - """Get SMARTS for this structure.""" + """ + Get SMARTS for this structure. + + Returns + ------- + str + SMARTS string. + + """ + return Chem.MolToSmarts(self.to_mol()) def to_xyzblock(self): - """Get XYZ text for this structure.""" + """ + Get XYZ text for this structure. + + Returns + ------- + str + XYZ representation as string. + + """ + return Chem.MolToXYZBlock(self.to_mol()) def to_pdbblock(self): - """Get PDB text for this structure""" - return Chem.MolToPDBBlock(self.to_mol()) + """ + Get PDB text for this structure. - def to_molblock(self): - """Get PDB text for this structure""" - return Chem.MolToMolBlock(self.to_mol()) + Returns + ------- + str + PDB representation as string. + """ -class MDOptimizedGeometry(Geometry): - """ - Builds off of the 3D representation, with additional methods specific to a - representation with MD optimized 3D coordinates. Any methods that would - result in a more defined representation (e.g. DFT optimized) should yield - the appropriate subclass. - """ + return Chem.MolToPDBBlock(self.to_mol()) - def __init__(self): - # Initialize the base class - super().__init__() + def to_molblock(self): + """ + Get :obj:`~rdkit.Chem.rdchem.Mol` text for this structure. + Returns + ------- + str + :obj:`~rdkit.Chem.rdchem.Mol` representation as string. -class DFTOptimizedGeometry(Geometry): - """ - Builds off of the 3D representation, with additional methods specific to a - representation with DFT optimized 3D coordinates. - """ + """ - def __init__(self): - # Initialize the base class - super().__init__() + return Chem.MolToMolBlock(self.to_mol()) diff --git a/isicle/interfaces.py b/isicle/interfaces.py index 377a747..709420a 100644 --- a/isicle/interfaces.py +++ b/isicle/interfaces.py @@ -2,10 +2,10 @@ class FileParserInterface(metaclass=abc.ABCMeta): - ''' + """ Abstract base class for file parser interface. All file parsers conform to this definition. - ''' + """ @classmethod def __subclasshook__(cls, subclass): @@ -19,17 +19,17 @@ def __subclasshook__(cls, subclass): @abc.abstractmethod def load(self, path: str): - '''Load in the data file''' + """Load in the data file""" raise NotImplementedError @abc.abstractmethod def parse(self): - '''Extract relevant information from data''' + """Extract relevant information from data""" raise NotImplementedError @abc.abstractmethod def save(self, path: str): - '''Write parsed object to file''' + """Write parsed object to file""" raise NotImplementedError @@ -51,47 +51,41 @@ def __subclasshook__(cls, subclass): and callable(subclass.__copy__) and hasattr(subclass, 'to_xyzblock') and callable(subclass.to_xyzblock) - and hasattr(subclass, 'save_xyz') - and callable(subclass.save_xyz) - and hasattr(subclass, 'save_pickle') - and callable(subclass.save_pickle) - and hasattr(subclass, 'save') - and callable(subclass.save) or NotImplemented) @abc.abstractmethod def dft(self): - '''Optimize geometry using density function theory (DFT) methods.''' + """Optimize geometry using density function theory (DFT) methods.""" raise NotImplementedError @abc.abstractmethod def md(self): - '''Optimize geometry using molecule dynamics methods (MD).''' + """Optimize geometry using molecule dynamics methods (MD).""" raise NotImplementedError @abc.abstractmethod def get_natoms(self): - '''Count number of atoms''' + """Count number of atoms""" raise NotImplementedError @abc.abstractmethod def get_atom_indices(self): - '''Extract indices of each atom from the internal geometry.''' + """Extract indices of each atom from the internal geometry.""" raise NotImplementedError @abc.abstractmethod def get___dict__(self): - '''Return a copy of this object's attributes dictionary''' + """Return a copy of this object's attributes dictionary""" raise NotImplementedError @abc.abstractmethod def __copy__(self): - '''Return hard copy of this class instance.''' + """Return hard copy of this class instance.""" raise NotImplementedError @abc.abstractmethod def to_xyzblock(self): - '''Get XYZ text for this structure.''' + """Get XYZ text for this structure.""" raise NotImplementedError @@ -113,35 +107,34 @@ def __subclasshook__(cls, subclass): @abc.abstractmethod def to_mol(self, path: str): - '''Returns RDKit Mol object for this Geometry.''' + """Returns RDKit Mol object for this Geometry.""" raise NotImplementedError @abc.abstractmethod def get_total_partial_charge(self): - '''Determine total partial charge of molecule''' + """Determine total partial charge of molecule""" raise NotImplementedError @abc.abstractmethod def to_smiles(self, path: str): - '''Return SMILES representation''' + """Return SMILES representation""" raise NotImplementedError @abc.abstractmethod def to_inchi(self, path: str): - '''Return InChI representation''' + """Return InChI representation""" raise NotImplementedError @abc.abstractmethod def to_smarts(self, path: str): - '''Return SMARTS representation''' + """Return SMARTS representation""" raise NotImplementedError class WrapperInterface(metaclass=abc.ABCMeta): - ''' - Abstract base class for wrapper interface. All QM - wrappers conform to this definition. - ''' + """ + Abstract base class for wrapper interfaces. + """ @classmethod def __subclasshook__(cls, subclass): @@ -159,64 +152,25 @@ def __subclasshook__(cls, subclass): @abc.abstractmethod def set_geometry(self): - '''Load the input geometry file''' + """Assign the input geometry.""" raise NotImplementedError @abc.abstractmethod def configure(self): - '''Configure the run.''' + """Configure the run.""" raise NotImplementedError @abc.abstractmethod def submit(self): - '''Configure the run.''' - raise NotImplementedError - - @abc.abstractmethod - def run(self): - '''Execute/submit the run.''' - raise NotImplementedError - - @abc.abstractmethod - def finish(self, path: str): - '''Finalize, parse, return result object.''' - raise NotImplementedError - - -class MDWrapperInterface(metaclass=abc.ABCMeta): - ''' - Abstract base class for molecular dynamics wrapper interface. All QM - wrappers conform to this definition. - ''' - - @classmethod - def __subclasshook__(cls, subclass): - return (hasattr(subclass, 'set_geometry') - and callable(subclass.set_geometry) - and hasattr(subclass, 'job_type') - and callable(subclass.job_type) - and hasattr(subclass, 'run') - and callable(subclass.run) - and hasattr(subclass, 'finish') - and callable(subclass.finish) - or NotImplemented) - - @abc.abstractmethod - def set_geometry(self): - '''Load the input geometry file''' - raise NotImplementedError - - @abc.abstractmethod - def job_type(self): - '''Make list of jobs.''' + """Configure the run.""" raise NotImplementedError @abc.abstractmethod def run(self): - '''Execute/submit the run.''' + """Execute/submit the run.""" raise NotImplementedError @abc.abstractmethod def finish(self, path: str): - '''Finalize, parse, return result object.''' + """Finalize, parse, return result object.""" raise NotImplementedError diff --git a/isicle/io.py b/isicle/io.py index 3f290a1..fd0cb45 100644 --- a/isicle/io.py +++ b/isicle/io.py @@ -303,6 +303,7 @@ def load_smiles(path, force=False, basename=None): Molecule representation. """ + extension = os.path.splitext(path)[-1].lower() if "smi" in extension: return _load_line_notation( @@ -331,6 +332,7 @@ def load_inchi(path, force=False, basename=None): Molecule representation. """ + if "inchi=" in path.lower(): return _load_line_notation( path, func=Chem.MolFromInchi, force=force, string=True, basename=basename @@ -385,11 +387,11 @@ def load_joblib(path): def _check_mol_obj(mol_obj): """ """ - try: - Chem.MolToMolBlock(mol_obj) - except ValueError: - print("Invalid RDKit mol object") - raise + + if isinstance(mol_obj, Chem.Mol): + return + else: + raise IOError("Not a valid RDKit Mol object passed.") def load_mol_obj(mol_obj, basename=None): @@ -445,6 +447,7 @@ def load(path, force=False, basename=None): Molecule representation. """ + if (type(path)) == str: path = path.strip() extension = os.path.splitext(path)[-1].lower() diff --git a/isicle/md.py b/isicle/md.py index ab865d7..0ee3bf6 100644 --- a/isicle/md.py +++ b/isicle/md.py @@ -4,6 +4,7 @@ from rdkit.Chem import AllChem from rdkit.Chem import rdForceFieldHelpers from rdkit.Chem import ChemicalForceFields +from rdkit.Chem import rdDistGeom import isicle from isicle.geometry import Geometry, XYZGeometry @@ -30,7 +31,7 @@ def _program_selector(program): ------- program Wrapped functionality of the selected program. Must implement - :class:`~isicle.interfaces.MDWrapperInterface`. + :class:`~isicle.interfaces.WrapperInterface`. """ @@ -482,47 +483,241 @@ def get_structure(self): class RDKitWrapper(Geometry, WrapperInterface): """ - Wrapper for rdkit functionality. + Wrapper for RDKit functionality. Implements :class:`~isicle.interfaces.WrapperInterface` to ensure required methods are exposed. Attributes ---------- - temp_dir : str - Path to temporary directory used for simulation. - task_map : dict - Alias mapper for supported molecular dynamic presets. Includes - "optimize", "conformer", "nmr", "protonate", "deprotonate", and "tautomer". geom : :obj:`isicle.geometry.Geometry` Internal molecule representation. - fmt : str - File extension indicator. - job_list : str - List of commands for simulation. - + method: str + Method of RDKit conformer generation specified. + numConfs: int + The number of conformers to generate. """ - _defaults = ["geom"] + _defaults = ["geom", "method", "numConfs"] _default_value = None def __init__(self, **kwargs): self.__dict__.update(dict.fromkeys(self._defaults, self._default_value)) self.__dict__.update(**kwargs) - def set_geometry(self): - return + def set_geometry(self, geom): + """ + Set :obj:`~isicle.geometry.Geometry` instance for simulation. - def configure(self): - return + Parameters + ---------- + geom : :obj:`~isicle.geometry.Geometry` + Molecule representation. - def submit(self): - return + """ - def run(self): - return + # Assign geometry + self.geom = geom + self.basename = self.geom.basename + + def configure(self, method: str = "distance", numConfs: int = 10, **kwargs): + """ + Set conformer generation parameters. + Parameters + ---------- + method: str + `distance` for distance geometry method (default) + `etkdg` for ETKDG method + `etkdgv2` for ETKDG method + `etkdgv3` for ETKDG method + `sretkdgv3` for ETKDG method + numConfs: int + the number of conformers to generate + **kwargs: + Keyword arguments to configure the simulation + See :meth:`~isicle.md.RDKitWrapper._configure_distance_geometry` + """ + lookup = { + "distance": self._configure_distance_geometry, + "etdg": self._configure_etdg, + "etkdg": self._configure_etkdg1, + "etkdgv1": self._configure_etkdg1, + "etkdgv2": self._configure_etkdg2, + "etkdgv3": self._configure_etkdg3, + "sretkdgv3": self._configure_etkdg3_variant, + } + method = str(method) + try: + lookup[method.lower()](**kwargs) + except KeyError: + raise + self.method = method.lower() + try: + numConfs = int(numConfs) + except ValueError: + raise + self.numConfs = numConfs + + def _configure_distance_geometry( + self, + pruneRmsThresh: float = -1.0, + forceTol: float = 0.001, + randomSeed: int = -1, + ): + """ + Set parameters for distance geometry based conformer generation. + Parameters + ---------- + pruneRmsThresh: float + Greedy pruning mainting conformers are apart based upon RMSD of heavy atoms. + Default: no pruning, -1.0 + forceTol: float + Tolerance to be used in force-field minimizations + randomSeed: int + provide a seed for the random number generator + `-1` causes RNG to not be seeded + """ + self.pruneRmsThresh = pruneRmsThresh + self.forceTol = forceTol + self.randomSeed = randomSeed + + def _configure_etdg(self): + """ + Set parameters for ETDG conformer generation. + """ + self.params = rdDistGeom.ETDG() + + def _configure_etkdg1(self): + """ + Set parameters for ETKDG conformer generation, based on work by Riniker and Landrum. + Version 1: RDKit default + """ + self.params = rdDistGeom.ETKDG() + + def _configure_etkdg2(self): + """ + Set parameters for ETKDG conformer generation, based on work by Riniker and Landrum. + Version 2: (default) 2016 release, updated torsion angle potentials + """ + self.params = rdDistGeom.ETKDGv2() + + def _configure_etkdg3(self): + """ + Set parameters for ETKDG conformer generation, based on work by Riniker and Landrum. + Version 3: Updated sampling small rings AND macrocycles + """ + self.params = rdDistGeom.ETKDGv3() + + def _configure_etkdg3_variant(self): + """ + Set parameters for ETKDG conformer generation, based on work by Riniker and Landrum. + Version 3 variant: Updated sampling for small rings, NOT macrocycles + """ + self.params = rdDistGeom.srETKDGv3() + + def submit(self): + """ + Execute conformer generation with user-specifed method, parameters. + """ + if self.method == "distance": + rdDistGeom.EmbedMultipleConfs( + self.geom.mol, + numConfs=self.numConfs, + randomSeed=self.randomSeed, + pruneRmsThresh=self.pruneRmsThresh, + forceTol=self.forceTol, + ) + elif "etkdg" in self.method or "etdg" in self.method: + rdDistGeom.EmbedMultipleConfs( + self.geom.mol, numConfs=self.numConfs, params=self.params + ) + else: + raise ValueError( + "Failure to run RDKit MD, method and/or variant not recognized" + ) def finish(self): - return + """ + Parse RDKit conformers generated. + """ + confCount = self.geom.mol.GetNumConformers() + conformers = [ + isicle.load(Chem.Mol(self.geom.mol, confId=i), basename=self.basename) + for i in range(confCount) + ] + + self.geom = conformers + for conf, label in zip(self.geom, range(confCount)): + conf.__dict__.update(conformerID=label) + + def run(self, geom, **kwargs): + """ + Generate conformers using RKDit and supplied parameters. + + Parameters + ---------- + geom : :obj:`~isicle.geometry.Geometry` + Molecule representation. + **kwargs + Keyword arguments to configure the simulation. + See :meth:`~isicle.md.RDKitWrapper.configure`. + + Returns + ------- + :obj:`~isicle.md.RDKitWrapper` + Wrapper object containing relevant outputs from the simulation. + + """ + + # New instance + self = RDKitWrapper() + + # Set geometry + self.set_geometry(geom) + + # Configure + self.configure(**kwargs) + + # Run QM simulation + self.submit() + + # Finish/clean up + self.finish() + + return self + + def get_structures(self): + """ + Extract all structures from containing object as a conformational ensemble. + + Returns + ------- + :obj:`~isicle.conformers.ConformationalEnsemble` + Conformational ensemble. + + """ + if isinstance(self.geom, isicle.conformers.ConformationalEnsemble): + return self.geom + + raise TypeError( + "Object does not contain multiple structures. Use `get_structure` instead." + ) + + def get_structure(self): + """ + Extract structure from containing object. + + Returns + ------- + :obj:`~isicle.geometry.XYZGeometry` + Structure instance. + + """ + if isinstance(self.geom, isicle.conformers.ConformationalEnsemble): + raise TypeError( + "Object contains multiple structures. Use `get_structures` instead." + ) + + return self.geom class TINKERWrapper(Geometry, WrapperInterface): diff --git a/isicle/mobility.py b/isicle/mobility.py index ec8d9bc..55a2e45 100644 --- a/isicle/mobility.py +++ b/isicle/mobility.py @@ -27,7 +27,7 @@ def __init__(self): pass def set_geometry(self, geom): - ''' + """ Set :obj:`~isicle.geometry.Geometry` instance for simulation. Parameters @@ -35,7 +35,7 @@ def set_geometry(self, geom): geom : :obj:`~isicle.geometry.Geometry` Molecule representation. - ''' + """ # Assign geometry self.geom = geom @@ -44,7 +44,7 @@ def set_geometry(self, geom): self.save_geometry() def save_geometry(self): - ''' + """ Save internal :obj:`~isicle.geometry.Geometry` representation to file. Raises @@ -52,7 +52,7 @@ def save_geometry(self): TypeError If geometry loaded from .xyz is saved to another format. - ''' + """ # Temp directory self.temp_dir = isicle.utils.mkdtemp() diff --git a/isicle/parse.py b/isicle/parse.py index 6a8b890..7a0be92 100644 --- a/isicle/parse.py +++ b/isicle/parse.py @@ -1,16 +1,336 @@ -from isicle.interfaces import FileParserInterface -import pandas as pd -from os.path import splitext import glob import os import pickle +import re +from os.path import splitext + import numpy as np +import pandas as pd from openbabel import pybel + import isicle +from isicle.interfaces import FileParserInterface + + +class ORCAParser(FileParserInterface): + """Extract information from an ORCA simulation output files.""" + + def __init__(self, data=None): + self.data = data + + self.result = {} + + def load(self, path): + self.data = isicle.io.load_pickle(path) + + def _find_output_by_header(self, header): + # Fat regex + pattern = ( + r"(-{2,})\n\s{0,}(" + + re.escape(header) + + ")\s{0,}\n-{2,}\n([\s\S]*?)(?=-{2,}\n[^\n]*\n-{2,}\n|$)" + ) + + # Search ORCA output file + matches = re.findall(pattern, self.data["out"]) + + # Return "body" of each match + return [x[2].strip() for x in matches] + + def _parse_protocol(self): + return self.data["inp"] + + def _parse_geom(self): + return self.data["xyz"] + + def _parse_energy(self): + # Split text + lines = self.data["property"].split("\n") + + # Search for energy values + elines = [x for x in lines if "Total DFT Energy" in x] + + # Energy values not found + if len(elines) == 0: + return None + + # Map float over values + evals = [float(x.split()[-1].strip()) for x in elines] + + # Return last energy value + return evals[-1] + + def _parse_frequency(self): + # Define columns + columns = ["wavenumber", "eps", "intensity", "TX", "TY", "TZ"] + + # Split sections by delimiter + blocks = self.data["hess"].split("$") + + # Search for frequency values + freq_block = [x for x in blocks if x.startswith("ir_spectrum")] + + # Frequency values not found + if len(freq_block) == 0: + return None + + # Grab last frequency block + # Doubtful if more than one, but previous results in list + freq_block = freq_block[-1] + + # Split block into lines + lines = freq_block.split("\n") + + # Map float over values + vals = np.array( + [ + list(map(float, x.split())) + for x in lines + if len(x.split()) == len(columns) + ] + ) + + # Zip columns and values + return dict(zip(columns, vals.T)) + + def _parse_timing(self): + # Grab only last few lines + lines = self.data["out"].split("\n")[-100:] + + # Find start of timing section + parts = [] + start_idx = None + for i, line in enumerate(lines): + if line.startswith("Timings for individual modules"): + start_idx = i + 2 + + # Strip out extraneous info + parts.append( + [x.strip() for x in line.split(" ") if x and x.strip() != "..."] + ) + + # Timing not found + if start_idx is None: + return None + + # Split out timing section + tlines = lines[start_idx:] + tparts = parts[start_idx:] + + # Individual timings + timings = [x for x in tparts if any([" sec " in y for y in x])] + timings = {x[0].strip("..."): float(x[1].split()[0]) for x in timings} + + # Boolean indication of success + success = len([x for x in tlines if "ORCA TERMINATED NORMALLY" in x]) > 0 + timings["success"] = success + + # Total time + total_time = [x for x in tlines if "TOTAL RUN TIME" in x] + + if len(total_time) > 0: + total_time = total_time[-1].split(":")[-1].strip() + times = list(map(int, total_time.split()[::2])) + units = total_time.split()[1::2] + else: + total_time = None + + timings["Total run time"] = dict(zip(units, times)) + + return timings + + def _parse_shielding(self): + # Filter comments + property = [ + x.strip() + for x in self.data["property"].split("\n") + if not x.startswith("#") + ] + property = "\n".join(property) + + # Split sections by delimiter + blocks = property.split("$ ") + + # Search for shielding values + shielding_block = [x for x in blocks if x.startswith("EPRNMR_OrbitalShielding")] + + # Shielding values not found + if len(shielding_block) == 0: + return None + + # Grab last shielding block + # Doubtful if more than one, but previous results in list + shielding_block = shielding_block[-1] + + # Define a pattern for extracting relevant information + pattern = re.compile( + r"Nucleus: (\d+) (\w+)\n(Shielding tensor.*?P\(iso\) \s*[-+]?\d*\.\d+)", + re.DOTALL, + ) + + # Match against pattern + matches = pattern.findall(shielding_block) + + # Result container + shielding = {} + + # Enumerate matches + for match in matches: + # Per-nucleus info + nucleus_index = match[0] + nucleus_name = match[1] + nucleus_data = match[2] + + # Extracting values using regex + tensors = re.findall(r"(-?\d+\.\d+|-?\d+.\d+e[+-]\d+)", nucleus_data) + tensors = [float(val) for val in tensors] + + # Creating arrays from extracted values + shielding_tensor = np.array(tensors[:9]).reshape(3, 3) + p_tensor_eigenvectors = np.array(tensors[9:18]).reshape(3, 3) + p_eigenvalues = np.array(tensors[18:21]) + p_iso = float(tensors[21]) + + # Constructing the dictionary with nuclei index and name + shielding[f"{nucleus_index}{nucleus_name}"] = { + "shielding tensor": shielding_tensor, + "P tensor eigenvectors": p_tensor_eigenvectors, + "P eigenvalues": p_eigenvalues, + "P(iso)": p_iso, + } + + # Add shielding summary + shielding["shielding_summary"] = self._parse_shielding_summary() + + return shielding + + def _parse_orbital_energies(self): + header = "ORBITAL ENERGIES" + text = self._find_output_by_header(header) + + # Orbital energies not found + if len(text) == 0: + return None + + # Get last relevant output + text = text[-1].split("\n") + + # Parse table + text = [x.strip() for x in text if x.strip() != "" and "*" not in x] + columns = text[0].split() + body = [x.split() for x in text[1:]] + + # Construct data frame + df = pd.DataFrame(body, columns=columns, dtype=float) + + # Map correct types + df["NO"] = df["NO"].astype(int) + + # Drop unoccupied orbitals? + return df + + def _parse_spin(self): + header = "SUMMARY OF ISOTROPIC COUPLING CONSTANTS (Hz)" + text = self._find_output_by_header(header) + + # Spin couplings not found + if len(text) == 0: + return None + + # Get last relevant output + text = text[-1].split("\n") + + # Parse table + text = [x.strip() for x in text if x.strip() != "" and "*" not in x] + columns = [x.replace(" ", "") for x in re.split("\s{2,}", text[0])] + body = [re.split("\s{2,}", x)[1:] for x in text[1:-1]] + + # Construct data frame + return pd.DataFrame(body, dtype=float, columns=columns, index=columns) + + def _parse_shielding_summary(self): + header = "CHEMICAL SHIELDING SUMMARY (ppm)" + text = self._find_output_by_header(header) + + # Shielding values not found + if len(text) == 0: + return None + + # Get last relevant output + text = text[-1].split("\n") + + # Parse table + text = [x.strip() for x in text if x.strip() != ""] + + # Find stop index + stop_idx = -1 + for i, row in enumerate(text): + if all([x == "-" for x in row]): + stop_idx = i + break + + # Split columns and body + columns = text[0].split() + body = [x.split() for x in text[2:stop_idx]] + + # Construct data frame + df = pd.DataFrame(body, columns=columns) + + # Map correct types + for col, dtype in zip(df.columns, (int, str, float, float)): + df[col] = df[col].astype(dtype) + return df + + def _parse_thermo(self): + # In hessian file + header = "THERMOCHEMISTRY_Energies" + + def _parse_molden(self): + return None + + def _parse_charge(self): + return None + + def _parse_connectivity(self): + return None + + def parse(self): + result = { + "protocol": self._parse_protocol(), + "geom": self._parse_geom(), + "total_dft_energy": self._parse_energy(), + "orbital_energies": self._parse_orbital_energies(), + "shielding": self._parse_shielding(), + "spin": self._parse_spin(), + "frequency": self._parse_frequency(), + "molden": self._parse_molden(), + "charge": self._parse_charge(), + "timing": self._parse_timing(), + "connectivity": self._parse_connectivity(), + } + + # Pop success from timing + if result["timing"] is not None: + result["success"] = result["timing"].pop("success") + else: + result["success"] = False + + # Filter empty fields + result = {k: v for k, v in result.items() if v is not None} + + # Store attribute + self.result = result + + return result + + def save(self, path): + isicle.io.save_pickle(path, self.result) class NWChemParser(FileParserInterface): - """Extract text from an NWChem simulation output file.""" + """ + Extract text from an NWChem simulation output file. + """ def __init__(self): self.contents = None @@ -18,13 +338,18 @@ def __init__(self): self.path = None def load(self, path: str): - """Load in the data file""" + """ + Load in the data file + """ with open(path, "r") as f: self.contents = f.readlines() self.path = path return self.contents def _parse_geometry(self): + """ + Add docstring + """ search = os.path.dirname(self.path) geoms = sorted(glob.glob(os.path.join(search, "*.xyz"))) @@ -34,6 +359,9 @@ def _parse_geometry(self): raise Exception def _parse_energy(self): + """ + Add docstring + """ # TO DO: Add Initial energy and final energy if different # Init @@ -48,6 +376,9 @@ def _parse_energy(self): return energy def _parse_shielding(self): + """ + Add docstring + """ # Init ready = False shield_idxs = [] @@ -78,6 +409,9 @@ def _parse_shielding(self): raise Exception def _parse_spin(self): + """ + Add docstring + """ # TO DO: Add g-factors # Declaring couplings @@ -117,6 +451,9 @@ def _parse_spin(self): } def _parse_frequency(self): + """ + Add docstring + """ # TO DO: Add freq intensities # TO DO: Add rotational/translational/vibrational Cv and entropy freq = None @@ -185,6 +522,9 @@ def _parse_frequency(self): raise Exception def _parse_charge(self): + """ + Add docstring + """ # TO DO: Parse molecular charge and atomic charges # TO DO: Add type of charge # TO DO: Multiple instances of charge analysis seen (two Mulliken and one Lowdin, difference?) @@ -234,6 +574,9 @@ def _parse_charge(self): raise Exception def _parse_timing(self): + """ + Add docstring + """ # Init indices = [] preoptTime = 0 @@ -284,6 +627,9 @@ def _parse_timing(self): } def _parse_molden(self): + """ + Add docstring + """ search = splitext(self.path)[0] m = glob.glob(search + "*.molden") @@ -293,7 +639,9 @@ def _parse_molden(self): return m[0] def _parse_protocol(self): - """Parse out dft protocol""" + """ + Parse out dft protocol + """ functional = [] basis_set = [] solvation = [] @@ -338,6 +686,9 @@ def _parse_protocol(self): } def _parse_connectivity(self): + """ + Add docstring + """ coor_substr = "internuclear distances" # Extracting Atoms & Coordinates @@ -360,12 +711,13 @@ def _parse_connectivity(self): def parse(self): """ - Extract relevant information from NWChem output + Extract relevant information from NWChem output. Parameters ---------- to_parse : list of str geometry, energy, shielding, spin, frequency, molden, charge, timing + """ # Check that the file is valid first @@ -437,21 +789,30 @@ def save(self, path): class ImpactParser(FileParserInterface): - """Extract text from an Impact mobility calculation output file.""" + """ + Extract text from an Impact mobility calculation output file. + """ def __init__(self): + """ + Add docstring + """ self.contents = None self.result = None def load(self, path: str): - """Load in the data file""" + """ + Load in the data file + """ with open(path, "rb") as f: self.contents = f.readlines() return self.contents def parse(self): - """Extract relevant information from data""" + """ + Extract relevant information from data + """ # Check CCS results == 1 count = 0 @@ -488,27 +849,38 @@ def parse(self): return result # TODO: return CCS? def save(self, path: str, sep="\t"): - """Write parsed object to file""" + """ + Write parsed object to file + """ pd.DataFrame(self.result).to_csv(path, sep=sep, index=False) return class MobcalParser(FileParserInterface): - """Extract text from a MOBCAL mobility calculation output file.""" + """ + Extract text from a MOBCAL mobility calculation output file. + """ def __init__(self): + """ + Add docstring + """ self.contents = None self.result = {} def load(self, path: str): - """Load in the data file""" + """ + Load in the data file + """ with open(path, "r") as f: self.contents = f.readlines() return self.contents def parse(self): - """Extract relevant information from data""" + """ + Extract relevant information from data + """ done = False for line in self.contents: # if "average (second order) TM mobility" in line: @@ -525,41 +897,63 @@ def parse(self): return self.result def save(self, path: str, sep="\t"): - """Write parsed object to file""" + """ + Write parsed object to file + """ pd.DataFrame(self.result).to_csv(path, sep=sep, index=False) return class SanderParser(FileParserInterface): - """Extract text from an Sander simulated annealing simulation output file.""" + """ + Extract text from an Sander simulated annealing simulation output file. + """ def load(self, path: str): - """Load in the data file""" + """ + Load in the data file + """ raise NotImplementedError def parse(self): - """Extract relevant information from data""" + """ + Extract relevant information from data + """ raise NotImplementedError def save(self, path: str): - """Write parsed object to file""" + """ + Write parsed object to file + """ raise NotImplementedError class XTBParser(FileParserInterface): + """ + Add docstring + """ + def __init__(self): + """ + Add docstring + """ self.contents = None self.result = None self.path = None def load(self, path: str): - """Load in the data file""" + """ + Load in the data file + """ with open(path, "r") as f: self.contents = f.readlines() self.path = path # return self.contents def _crest_energy(self): + """ + Add docstring + """ relative_energy = [] total_energy = [] population = [] @@ -588,6 +982,10 @@ def _crest_energy(self): } def _crest_timing(self): + """ + Add docstring + """ + def grab_time(line): line = line.replace(" ", "") line = line.split(":") @@ -626,6 +1024,9 @@ def grab_time(line): } def _isomer_energy(self): + """ + Add docstring + """ complete = False relative_energies = [] total_energies = [] @@ -647,6 +1048,10 @@ def _isomer_energy(self): return {"relative energy": relative_energies, "total energy": total_energies} def _isomer_timing(self): + """ + Add docstring + """ + def grab_time(line): line = line.replace(" ", "") line = line.split(":") @@ -670,6 +1075,9 @@ def grab_time(line): } def _opt_energy(self): + """ + Add docstring + """ for line in self.contents: if "TOTAL ENERGY" in line: energy = line.split()[3] + " Hartrees" @@ -677,6 +1085,10 @@ def _opt_energy(self): return {"Total energy": energy} def _opt_timing(self): + """ + Add docstring + """ + def grab_time(line): line = line.replace(" ", "") line = line.split(":") @@ -707,6 +1119,9 @@ def grab_time(line): } def _parse_energy(self): + """ + Add docstring + """ if self.parse_crest == True: return self._crest_energy() if self.parse_opt == True: @@ -715,6 +1130,9 @@ def _parse_energy(self): return self._isomer_energy() def _parse_timing(self): + """ + Add docstring + """ if self.parse_crest == True: return self._crest_timing() if self.parse_opt == True: @@ -723,6 +1141,9 @@ def _parse_timing(self): return self._isomer_timing() def _parse_protocol(self): + """ + Add docstring + """ protocol = None for line in self.contents: @@ -755,14 +1176,21 @@ def _parse_xyz(self): return isicle.conformers.ConformationalEnsemble(x) def parse(self): - """Extract relevant information from data""" + """ + Extract relevant information from data + """ # Check that the file is valid first if len(self.contents) == 0: raise RuntimeError("No contents to parse: {}".format(self.path)) - if "terminated normally" not in self.contents[-1]: - if "ratio" not in self.contents[-2]: - raise RuntimeError("XTB job failed: {}".format(self.path)) + + last_lines = "".join(self.contents[-10:]) + if ( + ("terminat" not in last_lines) + & ("normal" not in last_lines) + & ("ratio" not in last_lines) + ): + raise RuntimeError("XTB job failed: {}".format(self.path)) self.parse_crest = False self.parse_opt = False @@ -770,11 +1198,8 @@ def parse(self): # Initialize result object to store info result = {} + result["protocol"] = self._parse_protocol() - try: - result["protocol"] = self._parse_protocol() - except: - pass try: result["timing"] = self._parse_timing() except: @@ -788,13 +1213,12 @@ def parse(self): # Parse geometry from assoc. XYZ file try: if self.path.endswith("xyz"): - if "geometry" in to_parse: - try: - self.xyz_path = self.path - result["geom"] = self._parse_xyz() + try: + self.xyz_path = self.path + result["geom"] = self._parse_xyz() - except: - pass + except: + pass if self.path.endswith("out") or self.path.endswith("log"): # try geometry parsing @@ -835,24 +1259,39 @@ def parse(self): return result def save(self, path): + """ + Add docstring + """ with open(path, "wb") as f: pickle.dump(self, f) return class TINKERParser(FileParserInterface): + """ + Add docstring + """ + def __init__(self): + """ + Add docstring + """ self.contents = None self.result = None self.path = None def load(self, path: str): - """Load in the data file""" + """ + Load in the data file + """ with open(path, "r") as f: self.contents = f.readlines() self.path = path def _parse_energy(self): + """ + Add docstring + """ inp = self.contents if len(inp) < 13: quit() @@ -869,7 +1308,12 @@ def _parse_energy(self): return energies def _parse_conformers(self): + """ + Add docstring + """ + def parse_atom_symbol(AtomNum): + # TODO: modify lookup to use resources/atomic_masses.tsv Lookup = [ "H", "He", @@ -970,434 +1414,8 @@ def parse_atom_symbol(AtomNum): conffile.close() conformers = [] atoms = [] - atomtypes = [ - "CR", - "C=C", - "CSP", - "C=O", - "C=N", - "CGD", - "C=O", - "C=O", - "CON", - "COO", - "COO", - "COO", - "C=O", - "C=S", - "C=S", - "CSO", - "CS=", - "CSS", - "C=P", - "CSP", - "=C=", - "HC", - "HSI", - "OR", - "OC=", - "OC=", - "OC=", - "OC=", - "ONO", - "ON=", - "OSO", - "OSO", - "OSO", - "OS=", - "-OS", - "OPO", - "OPO", - "OPO", - "-OP", - "-O-", - "O=C", - "O=C", - "O=C", - "O=C", - "O=N", - "O=S", - "O=S", - "NR", - "N=C", - "N=N", - "NC=", - "NC=", - "NN=", - "NN=", - "F", - "CL", - "BR", - "I", - "S", - "S=C", - "S=O", - ">S=", - "SO2", - "SO2", - "SO3", - "SO4", - "=SO", - "SNO", - "SI", - "CR4", - "HOR", - "HO", - "HOM", - "CR3", - "HNR", - "H3N", - "HPY", - "HNO", - "HNM", - "HN", - "HOC", - "HOP", - "PO4", - "PO3", - "PO2", - "PO", - "PTE", - "P", - "HN=", - "HN=", - "HNC", - "HNC", - "HNC", - "HNC", - "HNN", - "HNN", - "HNS", - "HNP", - "HNC", - "HSP", - "HOC", - "HOC", - "CE4", - "HOH", - "O2C", - "OXN", - "O2N", - "O2N", - "O3N", - "O-S", - "O2S", - "O3S", - "O4S", - "OSM", - "OP", - "O2P", - "O3P", - "O4P", - "O4C", - "HOS", - "NR+", - "OM", - "OM2", - "HNR", - "HIM", - "HPD", - "HNN", - "HNC", - "HGD", - "HN5", - "CB", - "NPY", - "NPY", - "NC=", - "NC=", - "NC=", - "NC%", - "CO2", - "CS2", - "NSP", - "NSO", - "NSO", - "NPO", - "NPO", - "NC%", - "STH", - "NO2", - "NO3", - "N=O", - "NAZ", - "NSO", - "O+", - "HO+", - "O=+", - "HO=", - "=N=", - "N+=", - "N+=", - "NCN", - "NGD", - "CGD", - "CNN", - "NPD", - "OFU", - "C%", - "NR%", - "NM", - "C5A", - "C5B", - "N5A", - "N5B", - "N2O", - "N3O", - "NPO", - "OH2", - "HS", - "HS=", - "HP", - "S-P", - "S2C", - "SM", - "SSM", - "SO2", - "SSO", - "=S=", - "-P=", - "N5M", - "CLO", - "C5", - "N5", - "CIM", - "NIM", - "N5A", - "N5B", - "N5+", - "N5A", - "N5B", - "N5O", - "FE+", - "FE+", - "F-", - "CL-", - "BR-", - "LI+", - "NA+", - "K+", - "ZIN", - "ZN+", - "CA+", - "CU+", - "CU+", - "MG+", - ] - anums = [ - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 1, - 1, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 9, - 17, - 35, - 53, - 16, - 16, - 16, - 16, - 16, - 16, - 16, - 16, - 16, - 16, - 14, - 6, - 1, - 1, - 1, - 6, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 15, - 15, - 15, - 15, - 15, - 15, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 6, - 1, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 1, - 7, - 8, - 8, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 6, - 7, - 7, - 7, - 7, - 7, - 7, - 6, - 6, - 7, - 7, - 7, - 7, - 7, - 7, - 16, - 7, - 7, - 7, - 7, - 7, - 8, - 1, - 8, - 1, - 7, - 7, - 7, - 7, - 7, - 6, - 6, - 7, - 8, - 6, - 7, - 7, - 6, - 6, - 7, - 7, - 7, - 7, - 7, - 8, - 1, - 1, - 1, - 16, - 16, - 16, - 16, - 16, - 16, - 16, - 15, - 7, - 17, - 6, - 7, - 6, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 26, - 26, - 9, - 17, - 35, - 3, - 11, - 19, - 30, - 30, - 20, - 29, - 29, - 12, - ] + atomtypes = isicle.utils.tinker_lookup()["atomtypes"].to_list() + anums = isicle.utils.tinker_lookup()["anums"].to_list() atypes = [x[:3] for x in atomtypes] # Parse data from arc file, extract coordinates and atom type @@ -1444,7 +1462,9 @@ def parse_atom_symbol(AtomNum): return isicle.conformers.ConformationalEnsemble(x) def parse(self): - """Extract relevant information from data""" + """ + Extract relevant information from data + """ # Check that the file is valid first if len(self.contents) == 0: @@ -1471,4 +1491,7 @@ def parse(self): return result def save(self): + """ + Add docstring + """ return diff --git a/isicle/qm.py b/isicle/qm.py index 71059bd..aef2526 100644 --- a/isicle/qm.py +++ b/isicle/qm.py @@ -1,105 +1,93 @@ +import glob import os import subprocess -from itertools import combinations, cycle +from itertools import cycle from string import Template import isicle -from isicle.geometry import XYZGeometry from isicle.interfaces import WrapperInterface from isicle.parse import NWChemParser from isicle.utils import safelist -def _program_selector(program): - ''' - Selects a supported quantum mechanical program for associated simulation. - Currently only NWChem has been implemented. +def _backend_selector(backend): + """ + Selects a supported quantum mechanical backend for associated simulation. + Currently only NWChem and ORCA have been implemented. Parameters ---------- - program : str - Alias for program selection (e.g. NWChem). + backend : str + Alias for backend selection (e.g. NWChem, ORCA). Returns ------- - program - Wrapped functionality of the selected program. Must implement + backend + Wrapped functionality of the selected backend. Must implement :class:`~isicle.interfaces.WrapperInterface`. - ''' + """ - program_map = {'nwchem': NWChemWrapper} + backend_map = {'nwchem': NWChemWrapper, + 'orca': ORCAWrapper} - if program.lower() in program_map.keys(): - return program_map[program.lower()]() + if backend.lower() in backend_map.keys(): + return backend_map[backend.lower()]() else: - raise ValueError(('{} not a supported quantum mechanical program.') - .format(program)) + raise ValueError(('{} not a supported quantum mechanical backend.') + .format(backend)) -def dft(geom, program='NWChem', template=None, **kwargs): - ''' - Optimize geometry via density functional theory using supplied functional - and basis set. +def dft(geom, backend='NWChem', **kwargs): + """ + Perform density functional theory calculations according to supplied task list + and configuration parameters. Parameters ---------- geom : :obj:`~isicle.geometry.Geometry` Molecule representation. - program : str - Alias for program selection (NWChem). - template : str - Path to optional template to bypass default configuration process. - **kwargs - Keyword arguments to configure the simulation. - See :meth:`~isicle.qm.NWChemWrapper.configure`. + backend : str + Alias for backend selection (NWChem, ORCA). + kwargs + Keyword arguments passed to selected backend. Returns ------- :obj:`~isicle.qm.QMWrapper` - Instance of selected QMWrapper. + Wrapper object containing relevant outputs from the simulation. - ''' + """ - # Select program - return _program_selector(program).run(geom, template=template, **kwargs) + # Select backend + return _backend_selector(backend).run(geom, **kwargs) -class NWChemWrapper(XYZGeometry, WrapperInterface): - ''' +class NWChemWrapper(WrapperInterface): + """ Wrapper for NWChem functionality. - Implements :class:`~isicle.interfaces.WrapperInterface` to ensure - required methods are exposed. - Attributes ---------- temp_dir : str Path to temporary directory used for simulation. task_map : dict Alias mapper for supported quantum mechanical presets. Thses include - "optimze", "shielding", and "spin". + "optimze", "energy", "frequency", "shielding", and "spin". + task_order : dict + Indicates order of tasks in `task_map`. geom : :obj:`~isicle.geometry.Geometry` Internal molecule representation. - fmt : str - File extension indicator. config : str Configuration information for simulation. - ''' - _defaults = ['geom'] - _default_value = None + """ - def __init__(self, **kwargs): - ''' + def __init__(self): + """ Initialize :obj:`~isicle.qm.NWChemWrapper` instance. - Establishes aliases for preconfigured tasks. - - ''' - self.__dict__.update(dict.fromkeys( - self._defaults, self._default_value)) - self.__dict__.update(**kwargs) + """ self.task_map = {'optimize': self._configure_optimize, 'energy': self._configure_energy, @@ -117,7 +105,7 @@ def __init__(self, **kwargs): self.temp_dir = isicle.utils.mkdtemp() def set_geometry(self, geom): - ''' + """ Set :obj:`~isicle.geometry.Geometry` instance for simulation. Parameters @@ -125,39 +113,37 @@ def set_geometry(self, geom): geom : :obj:`~isicle.geometry.Geometry` Molecule representation. - ''' + """ # Assign geometry - self.geom = geom + self.geom = geom.__copy__() self.basename = self.geom.basename # Save - self.save_geometry() + self._save_geometry() - def save_geometry(self, fmt='xyz'): - ''' + def _save_geometry(self): + """ Save internal :obj:`~isicle.geometry.Geometry` representation to file. Parameters ---------- fmt : str - Filetype used by xtb. Must be "xyz", "smi", ".inchi", ".mol", ".xyz", - ".pdb", ".pkl". + File format. + + """ - ''' # Path operations - self.fmt = fmt.lower() geomfile = os.path.join(self.temp_dir, - '{}.{}'.format(self.geom.basename, - self.fmt.lower())) + '{}.xyz'.format(self.geom.basename)) - # All other formats + # Save isicle.io.save(geomfile, self.geom) self.geom.path = geomfile def _configure_header(self, scratch_dir=None, mem_global=1600, mem_heap=100, mem_stack=600): - ''' + """ Generate header block of NWChem configuration. Parameters @@ -176,7 +162,7 @@ def _configure_header(self, scratch_dir=None, mem_global=1600, str Header block of NWChem configuration. - ''' + """ d = {'basename': self.geom.basename, 'dirname': self.temp_dir, @@ -195,7 +181,7 @@ def _configure_header(self, scratch_dir=None, mem_global=1600, 'print low\n').format(**d) def _configure_load(self, charge=0): - ''' + """ Generate geometry load block of NWChem configuration. Parameters @@ -206,9 +192,9 @@ def _configure_load(self, charge=0): Returns ------- str - Geometry load block of NWChem configuration. + Load block of NWChem configuration. - ''' + """ d = {'basename': self.geom.basename, 'dirname': self.temp_dir, @@ -220,7 +206,7 @@ def _configure_load(self, charge=0): 'end\n').format(**d) def _configure_basis(self, basis_set='6-31G*', ao_basis='cartesian'): - ''' + """ Generate basis set block of NWChem configuration. Parameters @@ -235,7 +221,7 @@ def _configure_basis(self, basis_set='6-31G*', ao_basis='cartesian'): str Basis set block of NWChem configuration. - ''' + """ d = {'ao_basis': ao_basis, 'basis_set': basis_set} @@ -245,7 +231,7 @@ def _configure_basis(self, basis_set='6-31G*', ao_basis='cartesian'): 'end\n').format(**d) def _configure_dft(self, functional='b3lyp', odft=False): - ''' + """ Generate DFT block of NWChem configuration. Parameters @@ -261,7 +247,7 @@ def _configure_dft(self, functional='b3lyp', odft=False): str DFT block of NWChem configuration. - ''' + """ d = {'functional': functional, 'dft': 'odft' if odft is True else 'dft'} @@ -279,7 +265,7 @@ def _configure_dft(self, functional='b3lyp', odft=False): return s def _configure_driver(self, max_iter=150): - ''' + """ Generate driver block of NWChem configuration. Parameters @@ -292,7 +278,7 @@ def _configure_driver(self, max_iter=150): str Driver block of NWChem configuration. - ''' + """ d = {'basename': self.geom.basename, 'max_iter': max_iter} @@ -303,7 +289,7 @@ def _configure_driver(self, max_iter=150): 'end\n').format(**d) def _configure_cosmo(self, solvent='H2O', gas=False): - ''' + """ Generate COSMO block of NWChem configuration. Parameters @@ -318,7 +304,7 @@ def _configure_cosmo(self, solvent='H2O', gas=False): str COSMO block of NWChem configuration. - ''' + """ d = {'solvent': solvent, 'gas': gas} @@ -331,11 +317,9 @@ def _configure_cosmo(self, solvent='H2O', gas=False): def _configure_frequency(self, temp=298.15, basis_set='6-31G*', ao_basis='cartesian', functional='b3lyp', cosmo=False, solvent='H2O', gas=False, **kwargs): - ''' + """ Configure frequency block of NWChem configuration. - Includes basis and DFT blocks; can include COSMO block. - Parameters ---------- temp : float @@ -353,15 +337,13 @@ def _configure_frequency(self, temp=298.15, basis_set='6-31G*', ao_basis='cartes gas : bool Indicate whether to use gas phase calculations. Only used if `cosmo` is True. - **kwargs - Arbitrary additional arguments (unused). Returns ------- str Frequency block of NWChem configuration. - ''' + """ # Add basis block s = self._configure_basis(basis_set=basis_set, ao_basis=ao_basis) @@ -384,11 +366,9 @@ def _configure_frequency(self, temp=298.15, basis_set='6-31G*', ao_basis='cartes def _configure_energy(self, basis_set='6-31G*', ao_basis='cartesian', functional='b3lyp', cosmo=False, solvent='H2O', gas=False, **kwargs): - ''' + """ Configure energy block of NWChem configuration. - Includes basis and DFT blocks; can include COSMO block. - Parameters ---------- basis_set : str @@ -404,14 +384,13 @@ def _configure_energy(self, basis_set='6-31G*', ao_basis='cartesian', gas : bool Indicate whether to use gas phase calculations. Only used if `cosmo` is True. - **kwargs - Arbitrary additional arguments (unused). Returns ------- str Energy block of NWChem configuration. - ''' + + """ # Add basis block s = self._configure_basis(basis_set=basis_set, ao_basis=ao_basis) @@ -429,9 +408,8 @@ def _configure_energy(self, basis_set='6-31G*', ao_basis='cartesian', def _configure_optimize(self, basis_set='6-31G*', ao_basis='cartesian', functional='b3lyp', max_iter=150, - cosmo=False, solvent='H2O', gas=False, - **kwargs): - ''' + cosmo=False, solvent='H2O', gas=False, **kwargs): + """ Generate meta optimization block of NWChem configuration. Includes basis, DFT, and driver blocks; can include COSMO block. @@ -453,15 +431,13 @@ def _configure_optimize(self, basis_set='6-31G*', ao_basis='cartesian', gas : bool Indicate whether to u se gas phase calculations. Only used if `cosmo` is True. - **kwargs - Arbitrary additional arguments (unused). Returns ------- str Optimization meta block of NWChem configuration. - ''' + """ # Add basis block s = self._configure_basis(basis_set=basis_set, ao_basis=ao_basis) @@ -484,12 +460,9 @@ def _configure_optimize(self, basis_set='6-31G*', ao_basis='cartesian', def _configure_shielding(self, basis_set='6-31G*', ao_basis='cartesian', functional='b3lyp', cosmo=True, solvent='H2O', gas=False, **kwargs): - ''' + """ Generate meta shielding block of NWChem configuration. - Includes basis and DFT blocks; can include COSMO and/or single-point - energy calculation blocks. - Parameters ---------- basis_set : str @@ -507,22 +480,21 @@ def _configure_shielding(self, basis_set='6-31G*', ao_basis='cartesian', gas : bool Indicate whether to use gas phase calculations. Only used if `cosmo` is True. - **kwargs - Arbitrary additional arguments (unused). Returns ------- str Shielding meta block of NWChem configuration. - ''' + """ + # Extract atom index information - #idx = self.geom.mol.get_atom_indices(atoms=atoms) - #new_idx = [] + # idx = self.geom.mol.get_atom_indices(atoms=atoms) + # new_idx = [] # for i in idx: # new_idx.append(str(int(i)+1)) - #self.idx = new_idx + # self.idx = new_idx # Add basis block s = self._configure_basis(basis_set=basis_set, ao_basis=ao_basis) @@ -546,12 +518,9 @@ def _configure_shielding(self, basis_set='6-31G*', ao_basis='cartesian', def _configure_spin(self, bonds=1, basis_set='6-31G*', ao_basis='spherical', functional='b3lyp', cosmo=True, solvent='H2O', gas=False, **kwargs): - ''' + """ Generate meta spin-spin coupling block of NWChem configuration. - Includes basis and DFT blocks; can include COSMO and/or single-point - energy calculation blocks. - Parameters ---------- max_pairs : int @@ -570,15 +539,13 @@ def _configure_spin(self, bonds=1, basis_set='6-31G*', gas : bool Indicate whether to use gas phase calculations. Only used if `cosmo` is True. - **kwargs - Arbitrary additional arguments (unused). Returns ------- str Spin-spin coupling meta block of NWChem configuration. - ''' + """ def generate_pairs(mol, bonds=bonds): @@ -661,8 +628,8 @@ def configure(self, tasks='energy', functional='b3lyp', basis_set='6-31g*', ao_basis='cartesian', charge=0, atoms=['C', 'H'], bonds=1, temp=298.15, cosmo=False, solvent='H2O', gas=False, max_iter=150, mem_global=1600, mem_heap=100, - mem_stack=600, scratch_dir=None, processes=12, command='nwchem'): - ''' + mem_stack=600, scratch_dir=None, processes=12): + """ Configure NWChem simulation. Parameters @@ -709,7 +676,7 @@ def configure(self, tasks='energy', functional='b3lyp', str NWChem configuration. - ''' + """ # Cast to list safely tasks = safelist(tasks) @@ -773,9 +740,6 @@ def configure(self, tasks='energy', functional='b3lyp', # Store tasks as attribute self.tasks = tasks - # Store cluster as attribute - self.command = command - # Store number of processes as attribute self.processes = processes @@ -789,7 +753,7 @@ def configure(self, tasks='energy', functional='b3lyp', def configure_from_template(self, path, basename_override=None, dirname_override=None, **kwargs): - ''' + """ Configure NWChem simulation from template file. Use for NWChem functionality not exposed by the wrapper. @@ -814,7 +778,7 @@ def configure_from_template(self, path, basename_override=None, str NWChem configuration. - ''' + """ # Add/override class-managed kwargs if basename_override is not None: @@ -840,10 +804,10 @@ def configure_from_template(self, path, basename_override=None, return self.config def save_config(self): - ''' + """ Write generated NWChem configuration to file. - ''' + """ # Write to file with open(os.path.join(self.temp_dir, @@ -851,54 +815,53 @@ def save_config(self): f.write(self.config) def submit(self): - ''' + """ Submit the NWChem simulation according to configured inputs. - ''' + """ infile = os.path.join(self.temp_dir, self.geom.basename + '.nw') outfile = os.path.join(self.temp_dir, self.geom.basename + '.out') logfile = os.path.join(self.temp_dir, self.geom.basename + '.log') - if self.command == 'nwchem': - s = 'mpirun -n {} nwchem {} > {}'.format(self.processes, - infile, - outfile) - else: - s = '{} {} {} {}'.format(self.command, - self.processes, - infile, - outfile) + s = 'mpirun -n {} nwchem {} > {} 2> {}'.format(self.processes, + infile, + outfile, + logfile) subprocess.call(s, shell=True) def finish(self): - ''' + """ Parse NWChem simulation results. Returns ------- - :obj:`~isicle.parse.NWChemResult` - Parsed result data. + dict + Dictionary containing simulation results. - ''' + """ + # Initialize parser parser = NWChemParser() - parser.load(os.path.join(self.temp_dir, - self.basename + '.out')) - result = parser.parse() - self.__dict__.update(result) - self.geom.add___dict__( - {k: v for k, v in result.items() if k != 'geom'}) + # Open output file self.output = parser.load(os.path.join(self.temp_dir, self.basename + '.out')) - return self - def run(self, geom, template=None, **kwargs): - ''' - Optimize geometry via density functional theory using supplied functional - and basis set. + # Parse results + result = parser.parse() + + return result + + def run(self, geom, template=None, tasks='energy', functional='b3lyp', + basis_set='6-31g*', ao_basis='cartesian', charge=0, + atoms=['C', 'H'], bonds=1, temp=298.15, cosmo=False, solvent='H2O', + gas=False, max_iter=150, mem_global=1600, mem_heap=100, + mem_stack=600, scratch_dir=None, processes=12): + """ + Perform density functional theory calculations according to supplied task list + and configuration parameters. Parameters ---------- @@ -906,16 +869,49 @@ def run(self, geom, template=None, **kwargs): Molecule representation. template : str Path to optional template to bypass default configuration process. - **kwargs - Keyword arguments to configure the simulation. - See :meth:`~isicle.qm.NWChemWrapper.configure`. + tasks : str or list of str + List of calculations to perform. One or more of "optimize", "energy", + "frequency", "shielding", "spin". + functional : str or list of str + Functional selection. Supply globally or per task. + basis_set : str or list of str + Basis set selection. Supply globally or per task. + ao_basis : str or list of str + Angular function selection ("spherical", "cartesian"). Supply + globally or per task. + charge : int + Nominal charge of the molecule to be optimized. + atoms : list of str + Atom types of interest. Only used for `spin` and `shielding` tasks. + temp : float + Temperature for frequency calculation. Only used if `frequency` is + a selected task. + cosmo : bool + Indicate whether to include COSMO block. Supply globally or per + task. + solvent : str + Solvent selection. Only used if `cosmo` is True. Supply globally or + per task. + gas : bool + Indicate whether to use gas phase calculations. Only used if + `cosmo` is True. Supply globally or per task. + max_iter : int + Maximum number of optimization iterations. + scratch_dir : str + Path to simulation scratch directory. + mem_global : int + Global memory allocation in MB. + mem_heap : int + Heap memory allocation in MB. + mem_stack : int + Stack memory allocation in MB. Returns ------- - :obj:`~isicle.qm.NWChemWrapper` - Wrapper object containing relevant outputs from the simulation. + dict + Dictionary containing simulation results. - ''' + """ # Set geometry self.set_geometry(geom) @@ -924,25 +920,285 @@ def run(self, geom, template=None, **kwargs): if template is not None: self.configure_from_template(template) else: - self.configure(**kwargs) + self.configure(tasks=tasks, + functional=functional, basis_set=basis_set, + ao_basis=ao_basis, charge=charge, + atoms=atoms, bonds=bonds, temp=temp, + cosmo=cosmo, solvent=solvent, gas=gas, + max_iter=max_iter, mem_global=mem_global, + mem_heap=mem_heap, mem_stack=mem_stack, + scratch_dir=scratch_dir, processes=processes) # Run QM simulation self.submit() # Finish/clean up - self.finish() + result = self.finish() + + return result + + +class ORCAWrapper(WrapperInterface): + """ + Wrapper for ORCA functionality. + + Implements :class:`~isicle.interfaces.WrapperInterface` to ensure + required methods are exposed. + + Attributes + ---------- + temp_dir : str + Path to temporary directory used for simulation. + geom : :obj:`~isicle.geometry.Geometry` + Internal molecule representation. + fmt : str + File extension indicator. + config : str + Configuration information for simulation. + result : dict + Dictionary containing simulation results. + + """ + + def __init__(self): + """ + Initialize :obj:`~isicle.qm.ORCAWrapper` instance. + + """ + + # Set up temporary directory + self.temp_dir = isicle.utils.mkdtemp() + + def set_geometry(self, geom): + """ + Set :obj:`~isicle.geometry.Geometry` instance for simulation. + + Parameters + ---------- + geom : :obj:`~isicle.geometry.Geometry` + Molecule representation. - return self + """ - def get_structure(self): - ''' - Extract structure from containing object. + # Assign geometry + self.geom = geom.__copy__() + self.basename = self.geom.basename + + # Save + self._save_geometry() + + def _save_geometry(self): + """ + Save internal :obj:`~isicle.geometry.Geometry` representation to file in + XYZ format. + + """ + + # Path to output + geomfile = os.path.join(self.temp_dir, + '{}.xyz'.format(self.geom.basename)) + + # Save + isicle.save(geomfile, self.geom) + + # Store path + self.geom.path = geomfile + + def configure(self, simple_input=[], block_input={}, processes=1, **kwargs): + """ + Configure ORCA simulation. + + Parameters + ---------- + simple_input : list + List of simple input keywords. See `this `__ + section of the ORCA docs. + block_input : dict + Dictionary defining configuration "blocks". Use names of blocks as keys, + lists of each block's content as values. To configure a line of block content + directly, include as a complete string. Include key:value pairs as tuples. + See `this `__ + section of the ORCA docs. + processes : int + Number of parallel processes. + kwargs + Additional keyword arguments fed as key:value pairs. Returns ------- - :obj:`~isicle.geometry.XYZGeometry` - Structure instance. + str + ORCA configuration text. + + """ + + # Safely cast to list + simple_input = isicle.utils.safelist(simple_input) + + # Expand simple inputs + config = '! ' + ' '.join(simple_input) + '\n' + + # Add processes + if processes > 1: + config += '%PAL NPROCS {} END\n'.format(processes) + + # Add geometry context + config += '* xyzfile 0 1 {}\n'.format(self.geom.path) + + # Expand keyword args + for k, v in kwargs.items(): + config += '%{} {}\n'.format(k, v) + + # Expand block inputs + for block, params in block_input.items(): + # Safely cast to list + params = isicle.utils.safelist(params) - ''' + # Block header + block_text = '%{}\n'.format(block) - return self.geom + # Block configuration + for param in params: + if type(param) is str: + block_text += param + '\n' + elif type(param) is tuple: + block_text += ' '.join(map(str, param)) + '\n' + else: + raise TypeError + + # End block + block_text += 'end\n' + + # Append block to config + config += block_text + + # Assign to attribute + self.config = config + + # Save configuration + self._save_config() + + return self.config + + def _save_config(self): + """ + Write generated ORCA configuration to file. + + """ + + # Write to file + with open(os.path.join(self.temp_dir, + self.geom.basename + '.inp'), 'w') as f: + f.write(self.config) + + def submit(self): + """ + Submit the ORCA simulation according to configured inputs. + + """ + + infile = os.path.join(self.temp_dir, self.geom.basename + '.inp') + outfile = os.path.join(self.temp_dir, self.geom.basename + '.out') + logfile = os.path.join(self.temp_dir, self.geom.basename + '.log') + + s = '`which orca` {} > {} 2> {}'.format(infile, + outfile, + logfile) + + subprocess.call(s, shell=True) + + def finish(self): + """ + Collect ORCA simulation results. + + Returns + ------- + dict + Dictionary containing relevant outputs from the simulation. + + """ + + # Get list of outputs + outfiles = glob.glob(os.path.join(self.temp_dir, '*')) + + # Filter out temp files + outfiles = [x for x in outfiles if not x.endswith('.tmp')] + + # Result container + result = {} + + # Enumerate output files + for outfile in outfiles: + # Split name and extension + basename, ext = os.path.basename(outfile).rsplit('.', 1) + + # Grab suitable variable names + if any(basename.endswith(x) for x in ['_property', '_trj']): + var_name = basename.split('_')[-1] + else: + var_name = ext + + # Read output content + with open(outfile, 'rb') as f: + contents = f.read() + + # Attempt utf-8 decode + try: + result[var_name] = contents.decode('utf-8') + except UnicodeDecodeError: + result[var_name] = contents + + # Assign to attribute + self.result = result + return self.result + + def run(self, geom, **kwargs): + """ + Optimize geometry via density functional theory using supplied functional + and basis set. + + Parameters + ---------- + geom : :obj:`~isicle.geometry.Geometry` + Molecule representation. + template : str + Path to optional template to bypass default configuration process. + **kwargs + Keyword arguments to configure the simulation. + See :meth:`~isicle.qm.ORCAWrapper.configure`. + + Returns + ------- + dict + Dictionary containing relevant outputs from the simulation. + + """ + + # Set geometry + self.set_geometry(geom) + + # Configure + self.configure(**kwargs) + + # Run QM simulation + self.submit() + + # Parse outputs + self.finish() + + return self.result + + def save(self, path): + """ + Save result as pickle file. + + Parameters + ---------- + path : str + Path to output file. + + """ + + if hasattr(self, 'result'): + isicle.io.save_pickle(path, self.result) + else: + raise AttributeError("Object must have `result` attribute") diff --git a/isicle/resources/tinker_lookup.tsv b/isicle/resources/tinker_lookup.tsv new file mode 100644 index 0000000..fb832a1 --- /dev/null +++ b/isicle/resources/tinker_lookup.tsv @@ -0,0 +1,213 @@ +atomtypes anums +CR 6 +C=C 6 +CSP 6 +C=O 6 +C=N 6 +CGD 6 +C=O 6 +C=O 6 +CON 6 +COO 6 +COO 6 +COO 6 +C=O 6 +C=S 6 +C=S 6 +CSO 6 +CS= 6 +CSS 6 +C=P 6 +CSP 6 +=C= 6 +HC 1 +HSI 1 +OR 8 +OC= 8 +OC= 8 +OC= 8 +OC= 8 +ONO 8 +ON= 8 +OSO 8 +OSO 8 +OSO 8 +OS= 8 +-OS 8 +OPO 8 +OPO 8 +OPO 8 +-OP 8 +-O- 8 +O=C 8 +O=C 8 +O=C 8 +O=C 8 +O=N 8 +O=S 8 +O=S 8 +NR 7 +N=C 7 +N=N 7 +NC= 7 +NC= 7 +NN= 7 +NN= 7 +F 9 +CL 17 +BR 35 +I 53 +S 16 +S=C 16 +S=O 16 +>S= 16 +SO2 16 +SO2 16 +SO3 16 +SO4 16 +=SO 16 +SNO 16 +SI 14 +CR4 6 +HOR 1 +HO 1 +HOM 1 +CR3 6 +HNR 1 +H3N 1 +HPY 1 +HNO 1 +HNM 1 +HN 1 +HOC 1 +HOP 1 +PO4 15 +PO3 15 +PO2 15 +PO 15 +PTE 15 +P 15 +HN= 1 +HN= 1 +HNC 1 +HNC 1 +HNC 1 +HNC 1 +HNN 1 +HNN 1 +HNS 1 +HNP 1 +HNC 1 +HSP 1 +HOC 1 +HOC 1 +CE4 6 +HOH 1 +O2C 8 +OXN 8 +O2N 8 +O2N 8 +O3N 8 +O-S 8 +O2S 8 +O3S 8 +O4S 8 +OSM 8 +OP 8 +O2P 8 +O3P 8 +O4P 8 +O4C 8 +HOS 1 +NR+ 7 +OM 8 +OM2 8 +HNR 1 +HIM 1 +HPD 1 +HNN 1 +HNC 1 +HGD 1 +HN5 1 +CB 6 +NPY 7 +NPY 7 +NC= 7 +NC= 7 +NC= 7 +NC% 7 +CO2 6 +CS2 6 +NSP 7 +NSO 7 +NSO 7 +NPO 7 +NPO 7 +NC% 7 +STH 16 +NO2 7 +NO3 7 +N=O 7 +NAZ 7 +NSO 7 +O+ 8 +HO+ 1 +O=+ 8 +HO= 1 +=N= 7 +N+= 7 +N+= 7 +NCN 7 +NGD 7 +CGD 6 +CNN 6 +NPD 7 +OFU 8 +C% 6 +NR% 7 +NM 7 +C5A 6 +C5B 6 +N5A 7 +N5B 7 +N2O 7 +N3O 7 +NPO 7 +OH2 8 +HS 1 +HS= 1 +HP 1 +S-P 16 +S2C 16 +SM 16 +SSM 16 +SO2 16 +SSO 16 +=S= 16 +-P= 15 +N5M 7 +CLO 17 +C5 6 +N5 7 +CIM 6 +NIM 7 +N5A 7 +N5B 7 +N5+ 7 +N5A 7 +N5B 7 +N5O 7 +FE+ 26 +FE+ 26 +F- 9 +CL- 17 +BR- 35 +LI+ 3 +NA+ 11 +K+ 19 +ZIN 30 +ZN+ 30 +CA+ 20 +CU+ 29 +CU+ 29 +MG+ 12 diff --git a/isicle/utils.py b/isicle/utils.py index 486b980..400ed96 100644 --- a/isicle/utils.py +++ b/isicle/utils.py @@ -8,7 +8,7 @@ def safelist(x): - ''' + """ Ensures passed object is of correct format. Parameters @@ -20,7 +20,7 @@ def safelist(x): list, :obj:`~pd.core.series.Series`, or :obj:`~np.ndarray` Input safely cast to list-like. - ''' + """ if not isinstance(x, (list, pd.core.series.Series, np.ndarray)): return [x].copy() @@ -28,7 +28,7 @@ def safelist(x): class TypedList(collections.abc.MutableSequence): - ''' + """ Mutable sequence that requires all members be of select type(s). Attributes @@ -38,10 +38,10 @@ class TypedList(collections.abc.MutableSequence): list : list Internal list representation. - ''' + """ def __init__(self, oktypes, *args): - ''' + """ Initialize :obj:`~isicle.utils.TypedList` instance. Parameters @@ -51,7 +51,7 @@ def __init__(self, oktypes, *args): *args Objects to comprise the type-restricted list. - ''' + """ self.oktypes = oktypes self.list = list() @@ -61,7 +61,7 @@ def __init__(self, oktypes, *args): self.extend(list(args)) def check(self, v): - ''' + """ Check if supplied value is of allowed type(s). Raises @@ -69,7 +69,7 @@ def check(self, v): TypeError If value is not of allowed type(s). - ''' + """ if not isinstance(v, self.oktypes): raise TypeError(v) @@ -99,12 +99,17 @@ def __repr__(self): def atomic_masses(): - path = resources.files('isicle') / 'resources/atomic_masses.tsv' + path = resources.files("isicle") / "resources/atomic_masses.tsv" return pd.read_csv(path, delim_whitespace=True) +def tinker_lookup(): + path = resources.files("isicle") / "resources/tinker_lookup.tsv" + return pd.read_csv(path, sep="\t") + + def gettempdir(): - ''' + """ Return the name of the directory used for temporary files. Returns @@ -112,18 +117,16 @@ def gettempdir(): str Path to temporary directory. - ''' + """ root = os.path.join(tempfile.gettempdir(), 'isicle') - - if not os.path.exists(root): - os.makedirs(root) + os.makedirs(root, exist_ok=True) return root def mkdtemp(prefix=None, suffix=None): - ''' + """ An ISiCLE-specific wrapper of :func:`~tempfile.mkdtemp` to create a temporary directory for temporary ISiCLE files. The temporary directory is not automatically removed. @@ -139,17 +142,15 @@ def mkdtemp(prefix=None, suffix=None): ------- str Path to temporary directory. - + """ - ''' - return tempfile.mkdtemp(dir=gettempdir(), prefix=prefix, suffix=suffix) def rmdtemp(): - ''' + """ Removes all temporary directories and files created by ISiCLE. - ''' + """ shutil.rmtree(gettempdir(), ignore_errors=True) diff --git a/requirements.txt b/requirements.txt index aecc600..7a559a9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,6 @@ joblib numpy >= 1.19.4 # openbabel pandas >= 1.1.4 -# rdkit <= 2022.09.1 +# rdkit >= 2023.09.1 snakemake >= 6.3.0 statsmodels >= 0.11.1 diff --git a/tests/test_md.py b/tests/test_md.py index 8e947dd..2105ad1 100644 --- a/tests/test_md.py +++ b/tests/test_md.py @@ -1,6 +1,6 @@ import pytest import isicle -from isicle.md import _program_selector, XTBWrapper, md +from isicle.md import _program_selector, XTBWrapper, md, RDKitWrapper from isicle.geometry import Geometry import os import shutil @@ -15,35 +15,64 @@ def localfile(path): def xtb(): return XTBWrapper() -#test capitalization ignore -@pytest.mark.parametrize('program,expected', - [('xtb', XTBWrapper), - ('XTB', XTBWrapper), - ('xTb', XTBWrapper)]) + +@pytest.fixture() +def rdw(): + return RDKitWrapper() + + +# test capitalization ignore +@pytest.mark.parametrize( + "program,expected", + [ + ("xtb", XTBWrapper), + ("XTB", XTBWrapper), + ("xTb", XTBWrapper), + ("rdkit", RDKitWrapper), + ("RDKit", RDKitWrapper), + ("RDKIT", RDKitWrapper), + ], +) def test__program_selector(program, expected): # Check correct class is yielded assert isinstance(_program_selector(program), expected) -@pytest.mark.parametrize('program', - [('xt'), - ('amber')]) +@pytest.mark.parametrize("program", [("xt"), ("amber"), ("rd")]) def test__program_selector_fail(program): # Check unsupported selections with pytest.raises(ValueError): _program_selector(program) -def test_md(xtb): +def test_md_xtb(xtb): # Load geometry externally - geom = isicle.geometry.load(localfile('resources/geom_test_3D.mol')) + geom = isicle.load(localfile("resources/geom_test_3D.mol")) # Run molecular dynamics - md(geom, program='xtb', fmt='xyz', task = 'optimize', forcefield='gff',optlevel='Normal') + md( + geom, + program="xtb", + fmt="xyz", + task="optimize", + forcefield="gff", + optlevel="Normal", + ) - -class TestXTBWrapper: +def test_md_rdkit(rdw): + # Load geometry externally + geom = isicle.load(localfile("resources/geom_test_3D.mol")) + + # Run molecular dynamics + md( + geom, + program="rdkit", + method="etkdgv3", + ) + + +class TestXTBWrapper: def test_init(self, xtb): # Check class initialization assert isinstance(xtb, XTBWrapper) @@ -54,11 +83,12 @@ def test_init(self, xtb): # Clean up xtb.temp_dir.cleanup() - @pytest.mark.parametrize('path,expected', - [('resources/geom_test_3D.mol', 'geom_test_3D')]) + @pytest.mark.parametrize( + "path,expected", [("resources/geom_test_3D.mol", "geom_test_3D")] + ) def test_set_geometry(self, xtb, path, expected): # Load geometry externally - geom = isicle.geometry.load(localfile(path)) + geom = isicle.load(localfile(path)) # Set geometry xtb.set_geometry(geom) @@ -72,13 +102,10 @@ def test_set_geometry(self, xtb, path, expected): # Clean up xtb.temp_dir.cleanup() - @pytest.mark.parametrize('fmt', - [('xyz'), - ('pdb'), - ('mol')]) + @pytest.mark.parametrize("fmt", [("xyz"), ("pdb"), ("mol")]) def test_save_geometry(self, xtb, fmt): # Load geometry externally - geom = isicle.geometry.load(localfile('resources/geom_test_3D.mol')) + geom = isicle.load(localfile("resources/geom_test_3D.mol")) # Set geometry xtb.set_geometry(geom) @@ -87,34 +114,43 @@ def test_save_geometry(self, xtb, fmt): xtb.save_geometry(fmt=fmt) # Check if file exists - assert os.path.exists(os.path.join(xtb.temp_dir.name, - '{}.{}'.format(xtb.geom.basename, - fmt.lower()))) + assert os.path.exists( + os.path.join( + xtb.temp_dir.name, "{}.{}".format(xtb.geom.basename, fmt.lower()) + ) + ) # Clean up xtb.temp_dir.cleanup() - - @pytest.mark.parametrize('tasks, forcefield, optlevel', - [('optimize', ['gff', 'gfn2'], ['Normal', 'Tight']), - (['crest', 'protonate'], 'gfn2', 'Normal')]) - def test_configure_failure(self,xtb,tasks,forcefield,optlevel): + @pytest.mark.parametrize( + "tasks, forcefield, optlevel", + [ + ("optimize", ["gff", "gfn2"], ["Normal", "Tight"]), + (["crest", "protonate"], "gfn2", "Normal"), + ], + ) + def test_configure_failure(self, xtb, tasks, forcefield, optlevel): with pytest.raises(TypeError): xtb.configure(task=tasks, forcefield=forcefield, optlevel=optlevel) - @pytest.mark.parametrize('tasks,forcefield,optlevel', - [('optimize', 'gff', 'Normal'), - ('crest', 'gff', 'Normal'), - ('protonate', 'gff', 'Normal')]) + @pytest.mark.parametrize( + "tasks,forcefield,optlevel", + [ + ("optimize", "gff", "Normal"), + ("crest", "gff", "Normal"), + ("protonate", "gff", "Normal"), + ], + ) def test_configure(self, xtb, tasks, forcefield, optlevel): # Load geometry externally - geom = isicle.geometry.load(localfile('resources/geom_test_3D.mol')) + geom = isicle.load(localfile("resources/geom_test_3D.mol")) # Set geometry xtb.set_geometry(geom) # Save geometry - xtb.save_geometry(fmt='xyz') + xtb.save_geometry(fmt="xyz") # Set up commandline xtb.configure(task=tasks, forcefield=forcefield, optlevel=optlevel) @@ -122,15 +158,15 @@ def test_configure(self, xtb, tasks, forcefield, optlevel): # Clean up xtb.temp_dir.cleanup() - def test_run(self, xtb): + def test_run(self, xtb): # Load geometry externally - geom = isicle.geometry.load(localfile('resources/geom_test_3D.mol')) + geom = isicle.load(localfile("resources/geom_test_3D.mol")) # Set geometry xtb.set_geometry(geom) # Save geometry - xtb.save_geometry(fmt='xyz') + xtb.save_geometry(fmt="xyz") # Set up commandline xtb.configure() @@ -143,13 +179,13 @@ def test_run(self, xtb): def test_finish(self, xtb): # Load geometry externally - geom = isicle.geometry.load(localfile('resources/geom_test_3D.mol')) + geom = isicle.load(localfile("resources/geom_test_3D.mol")) # Set geometry xtb.set_geometry(geom) # Save geometry - xtb.save_geometry(fmt='xyz') + xtb.save_geometry(fmt="xyz") # Set up commandline xtb.configure() @@ -161,4 +197,101 @@ def test_finish(self, xtb): xtb.finish() # Clean up - xtb.temp_dir.cleanup() \ No newline at end of file + xtb.temp_dir.cleanup() + + +class TestRDKitWrapper: + def test_init(self, rdw): + # Check class initialization + assert isinstance(rdw, RDKitWrapper) + + @pytest.mark.parametrize( + "path,expected", [("resources/geom_test_3D.mol", "geom_test_3D")] + ) + def test_set_geometry(self, rdw, path, expected): + # Load geometry externally + geom = isicle.load(localfile(path)) + + # Set geometry + rdw.set_geometry(geom) + + # Check class instance + assert isinstance(rdw.geom, Geometry) + + # Check basename attribute + assert geom.basename == expected + + @pytest.mark.parametrize( + "method, numConfs", + [("distance", "test"), ("etkdg", "test")], + ) + def test_configure_failure(self, rdw, method, numConfs): + with pytest.raises(ValueError): + rdw.configure(method=method, numConfs=numConfs) + + @pytest.mark.parametrize( + "method, numConfs, pruneRmsThresh, forceTol, randomSeed", + [ + ("distance", 10, -1.0, 0.001, -1), + ], + ) + def test_configure_distance( + self, rdw, method, numConfs, pruneRmsThresh, forceTol, randomSeed + ): + # Load geometry externally + geom = isicle.load(localfile("resources/geom_test_3D.mol")) + + # Set geometry + rdw.set_geometry(geom) + + # Set up commandline + rdw.configure( + method=method, + numConfs=numConfs, + pruneRmsThresh=pruneRmsThresh, + forceTol=forceTol, + randomSeed=randomSeed, + ) + + @pytest.mark.parametrize( + "method, numConfs", + [("etkdg", 10), ("etdg", 10)], + ) + def test_configure_et(self, rdw, method, numConfs): + # Load geometry externally + geom = isicle.load(localfile("resources/geom_test_3D.mol")) + + # Set geometry + rdw.set_geometry(geom) + + # Set up commandline + rdw.configure(method=method, numConfs=numConfs) + + def test_submit(self, rdw): + # Load geometry externally + geom = isicle.load(localfile("resources/geom_test_3D.mol")) + + # Set geometry + rdw.set_geometry(geom) + + # Set up commandline + rdw.configure() + + # Run + rdw.submit() + + def test_finish(self, rdw): + # Load geometry externally + geom = isicle.load(localfile("resources/geom_test_3D.mol")) + + # Set geometry + rdw.set_geometry(geom) + + # Set up commandline + rdw.configure() + + # Run + rdw.submit() + + # Finish + rdw.finish()