diff --git a/CHANGES b/CHANGES index c0bc0208..3d2f802f 100644 --- a/CHANGES +++ b/CHANGES @@ -27,6 +27,12 @@ Changes Enhancements +* convert figure components to SVG, save as individual PDFs, + and generate PDF of all figures combined in one file, + for workflows dihedrals module (#243) +* add RDKit mol object to dihedrals plot with dihedral atom + indices labeled and dihedral atom group bonds highlighted + for workflows dihedrals module (#243) * new workflows registry that contains each EnsembleAnalysis for which a workflows module exists, for use with workflows base module (#229) * new workflows base module that provides iterative workflow use for diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml index 96b0e921..c283e3e4 100644 --- a/devtools/conda-envs/test_env.yaml +++ b/devtools/conda-envs/test_env.yaml @@ -18,6 +18,9 @@ dependencies: - pymbar >=4 - rdkit - seaborn +- svgutils +- cairosvg +- pypdf # Testing - pytest diff --git a/doc/requirements.txt b/doc/requirements.txt index 717b60c4..3e243106 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -10,3 +10,6 @@ mdanalysis rdkit seaborn matplotlib +svgutils +cairosvg +pypdf diff --git a/doc/sphinx/source/conf.py b/doc/sphinx/source/conf.py index a8000fbe..d32857d0 100644 --- a/doc/sphinx/source/conf.py +++ b/doc/sphinx/source/conf.py @@ -248,6 +248,9 @@ 'https://www.rdkit.org/docs/': None, 'https://pandas.pydata.org/docs/': None, 'https://seaborn.pydata.org': None, + 'https://cairosvg.org/documentation/': None, + 'https://svgutils.readthedocs.io/en/latest/': None, + 'https://pypdf.readthedocs.io/en/stable/': None, } diff --git a/mdpow/tests/test_automated_dihedral_analysis.py b/mdpow/tests/test_automated_dihedral_analysis.py index 6535608a..b5f70139 100644 --- a/mdpow/tests/test_automated_dihedral_analysis.py +++ b/mdpow/tests/test_automated_dihedral_analysis.py @@ -1,32 +1,30 @@ import os import sys import yaml -import pybol -import pytest +import py.path import pathlib import logging import scipy -import numpy as np -import pandas as pd - +from scipy.stats import circvar, circmean import seaborn - +import numpy as np from numpy.testing import assert_almost_equal -from scipy.stats import circvar, circmean +import pandas as pd +import pybol +import pytest from . import RESOURCES - -import py.path - -from mdpow.workflows import dihedrals - from pkg_resources import resource_filename +from mdpow.workflows import dihedrals RESOURCES = pathlib.PurePath(resource_filename(__name__, 'testing_resources')) MANIFEST = RESOURCES / "manifest.yml" -@pytest.fixture(scope="function") +resname = "UNK" +molname = "SM25" + +@pytest.fixture def molname_workflows_directory(tmp_path, molname='SM25'): m = pybol.Manifest(str(MANIFEST)) m.assemble('workflows', tmp_path) @@ -34,39 +32,56 @@ def molname_workflows_directory(tmp_path, molname='SM25'): class TestAutomatedDihedralAnalysis(object): - @pytest.fixture(scope="function") + @pytest.fixture def SM25_tmp_dir(self, molname_workflows_directory): dirname = molname_workflows_directory return dirname - @pytest.fixture(scope="function") - def atom_indices(self, SM25_tmp_dir): - atom_group_indices = dihedrals.dihedral_indices(dirname=SM25_tmp_dir, resname=self.resname) + @pytest.fixture + def mol_sol_data(self, SM25_tmp_dir): + u = dihedrals.build_universe(dirname=SM25_tmp_dir) + mol, solute = dihedrals.rdkit_conversion(u=u, resname=resname) + return mol, solute + + @pytest.fixture + def atom_indices(self, mol_sol_data): + mol, _ = mol_sol_data + atom_group_indices = dihedrals.get_atom_indices(mol=mol) # testing optional user input of alternate SMARTS string # for automated dihedral atom group selection - atom_group_indices_alt = dihedrals.dihedral_indices(dirname=SM25_tmp_dir, - resname=self.resname, - SMARTS='[!$(*#*)&!D1]-!@[!$(*#*)&!D1]') + atom_group_indices_alt = dihedrals.get_atom_indices(mol=mol, SMARTS='[!$(*#*)&!D1]-!@[!$(*#*)&!D1]') return atom_group_indices, atom_group_indices_alt # fixture output, tuple: # atom_indices[0]=atom_group_indices # atom_indices[1]=atom_group_indices_alt - @pytest.fixture(scope="function") + @pytest.fixture + def bond_indices(self, mol_sol_data, atom_indices): + mol, _ = mol_sol_data + atom_index, _ = atom_indices + bond_indices = dihedrals.get_bond_indices(mol=mol, atom_indices=atom_index) + return bond_indices + + @pytest.fixture + def dihedral_groups(self, mol_sol_data, atom_indices): + _, solute = mol_sol_data + atom_index, _ = atom_indices + dihedral_groups = dihedrals.get_dihedral_groups(solute=solute, atom_indices=atom_index) + return dihedral_groups + + @pytest.fixture def dihedral_data(self, SM25_tmp_dir, atom_indices): atom_group_indices, _ = atom_indices - df = dihedrals.dihedral_groups_ensemble(atom_group_indices=atom_group_indices, + df = dihedrals.dihedral_groups_ensemble(atom_indices=atom_group_indices, dirname=SM25_tmp_dir, solvents=('water',)) - df_aug = dihedrals.periodic_angle(df) + df_aug = dihedrals.periodic_angle_padding(df) return df, df_aug # fixture output, tuple: # dihedral_data[0]=df # dihedral_data[1]=df_aug - resname = 'UNK' - # tuple-tuples of dihedral atom group indices # collected using mdpow.workflows.dihedrals.SMARTS_DEFAULT check_atom_group_indices = ((0, 1, 2, 3),(0, 1, 12, 13),(1, 2, 3, 11),(1, 2, 3, 10), @@ -79,6 +94,23 @@ def dihedral_data(self, SM25_tmp_dir, atom_indices): # see: fixture - atom_indices().atom_group_indices_alt check_atom_group_indices_alt = ((1, 2), (1, 12), (2, 3), (3, 4), (12, 13), (13, 14)) + check_atom_name_index_pairs = {'O1-C2-N3-S4': (0, 1, 2, 3), + 'O1-C2-C13-C14': (0, 1, 12, 13), + 'C2-N3-S4-O12': (1, 2, 3, 11), + 'C2-N3-S4-O11': (1, 2, 3, 10), + 'C2-N3-S4-C5': (1, 2, 3, 4), + 'C2-C13-C14-C15': (1, 12, 13, 14), + 'N3-S4-C5-C6': (2, 3, 4, 5), + 'N3-S4-C5-C10': (2, 3, 4, 9), + 'N3-C2-C13-C14': (2, 1, 12, 13), + 'S4-N3-C2-C13': (3, 2, 1, 12), + 'C6-C5-S4-O12': (5, 4, 3, 11), + 'C6-C5-S4-O11': (5, 4, 3, 10), + 'C10-C5-S4-O12': (9, 4, 3, 11), + 'C10-C5-S4-O11': (9, 4, 3, 10), + 'C13-C14-C15-C16': (12, 13, 14, 15), + 'C13-C14-C15-C20': (12, 13, 14, 19)} + check_groups = [np.array(['O1', 'C2', 'N3', 'S4'], dtype=object), np.array(['O1', 'C2', 'C13', 'C14'], dtype=object), np.array(['C2', 'N3', 'S4', 'O12'], dtype=object), @@ -132,29 +164,49 @@ def test_build_universe(self, SM25_tmp_dir): # between RDKIT versions; issue raised (#239) to identify and # resolve exact package/version responsible def test_dihedral_indices(self, atom_indices): + atom_group_indices = atom_indices[0] assert set(atom_group_indices) == set(self.check_atom_group_indices) # Possible ordering issue (#239) def test_SMARTS(self, atom_indices): - atom_group_indices_alt = atom_indices[1] + _, atom_group_indices_alt = atom_indices assert atom_group_indices_alt == self.check_atom_group_indices_alt # Use set comparison because ordering of indices appears to change # between RDKIT versions; issue raised (#239) to identify and # resolve exact package/version responsible - def test_dihedral_groups(self, SM25_tmp_dir): - groups = dihedrals.dihedral_groups(dirname=SM25_tmp_dir, resname=self.resname) + def test_dihedral_groups(self, dihedral_groups): + groups = dihedral_groups values = [g.all() for g in groups] reference = [g.all() for g in self.check_groups] assert set(values) == set(reference) + # atom indices are determined by RDKit Mol object + # bond indices are determined by atom indices and are subsequently self-consistent + # dihedral group names are determined by the MDAnalysis solute object from RDKit-derived atom indices + # this test checks if indexing schemes for RDKit and MDAnalysis are consistent + def test_RDKit_MDAnalysis_atom_index_consistency(self, atom_indices, bond_indices, dihedral_groups): + atom_index, _ = atom_indices + bond_index = bond_indices + groups = dihedral_groups + + name_index_pairs = dihedrals.get_paired_indices(atom_indices=atom_index, bond_indices=bond_index, + dihedral_groups=groups) + + atom_name_index_pairs = {} + + for key in name_index_pairs.keys(): + atom_name_index_pairs[key] = name_index_pairs[key][0] + + assert atom_name_index_pairs == self.check_atom_name_index_pairs + # Possible ordering issue (#239) def test_dihedral_groups_ensemble(self, dihedral_data): - df = dihedral_data[0] + df, _ = dihedral_data dh1_result = df.loc[df['selection'] == 'O1-C2-N3-S4']['dihedral'] dh1_mean = circmean(dh1_result, high=180, low=-180) @@ -172,19 +224,21 @@ def test_dihedral_groups_ensemble(self, dihedral_data): dh2_var == pytest.approx(self.DG_C13141520_var) def test_save_df(self, dihedral_data, SM25_tmp_dir): - dihedrals.save_df(df=dihedral_data[0], df_save_dir=SM25_tmp_dir, molname='SM25') + df, _ = dihedral_data + dihedrals.save_df(df=df, df_save_dir=SM25_tmp_dir, resname='UNK', molname='SM25') assert (SM25_tmp_dir / 'SM25' / 'SM25_full_df.csv.bz2').exists(), 'Compressed csv file not saved' def test_save_df_info(self, dihedral_data, SM25_tmp_dir, caplog): + df, _ = dihedral_data caplog.clear() caplog.set_level(logging.INFO, logger='mdpow.workflows.dihedrals') - dihedrals.save_df(df=dihedral_data[0], df_save_dir=SM25_tmp_dir, molname='SM25') + dihedrals.save_df(df=df, df_save_dir=SM25_tmp_dir, resname='UNK', molname='SM25') assert f'Results DataFrame saved as {SM25_tmp_dir}/SM25/SM25_full_df.csv.bz2' in caplog.text, 'Save location not logged or returned' # Possible ordering issue (#239) def test_periodic_angle(self, dihedral_data): - df_aug = dihedral_data[1] + _, df_aug = dihedral_data aug_dh2_result = df_aug.loc[df_aug['selection'] == 'C13-C14-C15-C20']['dihedral'] @@ -195,37 +249,50 @@ def test_periodic_angle(self, dihedral_data): aug_dh2_var == pytest.approx(self.ADG_C13141520_var) # Possible ordering issue (#239) + # Tests using similar instances of the automated analyses + # will use module or class-scoped fixtures, pending #235 def test_save_fig(self, SM25_tmp_dir): dihedrals.automated_dihedral_analysis(dirname=SM25_tmp_dir, figdir=SM25_tmp_dir, - resname=self.resname, molname='SM25', + resname=resname, molname='SM25', solvents=('water',)) assert (SM25_tmp_dir / 'SM25' / 'SM25_C10-C5-S4-O11_violins.pdf').exists(), 'PDF file not generated' # Possible ordering issue (#239) + # Tests using similar instances of the automated analyses + # will use module or class-scoped fixtures, pending #235 def test_save_fig_info(self, SM25_tmp_dir, caplog): caplog.clear() caplog.set_level(logging.INFO, logger='mdpow.workflows.dihedrals') dihedrals.automated_dihedral_analysis(dirname=SM25_tmp_dir, figdir=SM25_tmp_dir, - resname=self.resname, molname='SM25', + resname=resname, molname='SM25', solvents=('water',)) assert f'Figure saved as {SM25_tmp_dir}/SM25/SM25_C10-C5-S4-O11_violins.pdf' in caplog.text, 'PDF file not saved' - def test_DataFrame_input(self, SM25_tmp_dir): - test_df = pd.DataFrame([['C1-C2-C3-C4', 'water', 'Coulomb', 0, 0, 60.0], - ['C1-C2-C3-C5', 'water', 'Coulomb', 0, 0, 60.0]], - [1,2],['selection', 'solvent', 'interaction', 'lambda', 'time', 'dihedral']) - plot = dihedrals.automated_dihedral_analysis(dirname=SM25_tmp_dir, figdir=SM25_tmp_dir, - resname=self.resname, - solvents=('water',), dataframe=test_df) - assert isinstance(plot, seaborn.axisgrid.FacetGrid) + # Tests using similar instances of the automated analyses + # will use module or class-scoped fixtures, pending #235 + def test_DataFrame_input(self, SM25_tmp_dir, dihedral_data): + df, _ = dihedral_data + dihedrals.automated_dihedral_analysis(dirname=SM25_tmp_dir, figdir=SM25_tmp_dir, + resname=resname, molname=molname, + solvents=('water',), dataframe=df) + assert (SM25_tmp_dir / 'SM25' / 'SM25_C10-C5-S4-O11_violins.pdf').exists(), 'PDF file not generated' - def test_DataFrame_input_info(self, SM25_tmp_dir, caplog): + # Tests using similar instances of the automated analyses + # will use module or class-scoped fixtures, pending #235 + def test_DataFrame_input_info(self, SM25_tmp_dir, dihedral_data, caplog): caplog.clear() caplog.set_level(logging.INFO, logger='mdpow.workflows.dihedrals') - test_df = pd.DataFrame([['C1-C2-C3-C4', 'water', 'Coulomb', 0, 0, 60.0], - ['C1-C2-C3-C5', 'water', 'Coulomb', 0, 0, 60.0]], - [1,2],['selection', 'solvent', 'interaction', 'lambda', 'time', 'dihedral']) + df, _ = dihedral_data dihedrals.automated_dihedral_analysis(dirname=SM25_tmp_dir, figdir=SM25_tmp_dir, - resname=self.resname, - solvents=('water',), dataframe=test_df) + resname=resname, molname=molname, + solvents=('water',), dataframe=df) assert 'Proceeding with results DataFrame provided.' in caplog.text, 'No dataframe provided or dataframe not recognized' + + # testing resources only contain analyses with single solvent input + def test_single_solvent(self, dihedral_data): + df, _ = dihedral_data + # all analysis data in one violin plot + g = dihedrals.dihedral_violins(df=df, width=0.9, solvents=('water',), plot_title='test') + # number of solvents in DataFrame used to generate plot + number_of_solvents = g.data['solvent'].nunique() + assert number_of_solvents == 1 \ No newline at end of file diff --git a/mdpow/tests/test_workflows_base.py b/mdpow/tests/test_workflows_base.py index 6b0ce2ec..3d33875b 100644 --- a/mdpow/tests/test_workflows_base.py +++ b/mdpow/tests/test_workflows_base.py @@ -2,19 +2,16 @@ import os import sys import yaml -import pybol -import pytest import pathlib import logging +import pybol +import pytest import pandas as pd -from mdpow.workflows import base - -from pkg_resources import resource_filename - from . import RESOURCES, MANIFEST, STATES - +from pkg_resources import resource_filename +from mdpow.workflows import base @pytest.fixture(scope='function') def molname_workflows_directory(tmp_path): @@ -62,17 +59,23 @@ def test_project_paths_csv_input(self, csv_input_data): pd.testing.assert_frame_equal(project_paths, csv_df) - def test_automated_project_analysis(self, project_paths_data, caplog): + def test_dihedral_analysis_figdir_requirement(self, project_paths_data, caplog): + caplog.clear() + caplog.set_level(logging.ERROR, logger='mdpow.workflows.base') + project_paths = project_paths_data # change resname to match topology (every SAMPL7 resname is 'UNK') # only necessary for this dataset, not necessary for normal use project_paths['resname'] = 'UNK' - base.automated_project_analysis(project_paths, solvents=('water',), - ensemble_analysis='DihedralAnalysis') + with pytest.raises(AssertionError, + match="figdir MUST be set, even though it is a kwarg. Will be changed with #244"): + + base.automated_project_analysis(project_paths, solvents=('water',), + ensemble_analysis='DihedralAnalysis') - assert 'all analyses completed' in caplog.text, ('automated_dihedral_analysis ' - 'did not iteratively run to completion for the provided project') + assert 'all analyses completed' in caplog.text, ('automated_dihedral_analysis ' + 'did not iteratively run to completion for the provided project') def test_automated_project_analysis_KeyError(self, project_paths_data, caplog): caplog.clear() diff --git a/mdpow/workflows/base.py b/mdpow/workflows/base.py index 4317af95..733eee2a 100644 --- a/mdpow/workflows/base.py +++ b/mdpow/workflows/base.py @@ -22,10 +22,10 @@ import os import re -import pandas as pd - import logging +import pandas as pd + logger = logging.getLogger('mdpow.workflows.base') def project_paths(parent_directory=None, csv=None, csv_save_dir=None): diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index f26ab2b5..de3178ad 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -13,46 +13,57 @@ depending on the desired results. Details of the completely automated workflow are discussed under :func:`~mdpow.workflows.dihedrals.automated_dihedral_analysis`. +Atom indices obtained by :func:`get_atom_indices` are 0-based, +atom index labels on the molecule in the plots are 0-based, +but atom `names` in plots and file names are 1-based. + .. autodata:: SOLVENTS_DEFAULT :annotation: = ('water', 'octanol') .. autodata:: INTERACTIONS_DEFAULT :annotation: = ('Coulomb', 'VDW') .. autodata:: SMARTS_DEFAULT :annotation: = [!#1]~[!$(*#*)&!D1]-!@[!$(*#*)&!D1]~[!#1] +.. autodata:: PLOT_WIDTH_DEFAULT + :annotation: = 190 .. autofunction:: automated_dihedral_analysis .. autofunction:: build_universe .. autofunction:: rdkit_conversion -.. autofunction:: dihedral_indices -.. autofunction:: dihedral_groups +.. autofunction:: get_atom_indices +.. autofunction:: get_bond_indices +.. autofunction:: get_dihedral_groups +.. autofunction:: get_paired_indices .. autofunction:: dihedral_groups_ensemble .. autofunction:: save_df -.. autofunction:: periodic_angle +.. autofunction:: periodic_angle_padding .. autofunction:: dihedral_violins -.. autofunction:: plot_violins +.. autofunction:: build_svg +.. autofunction:: plot_dihedral_violins """ import os +import logging import pathlib + import numpy as np import pandas as pd - +import MDAnalysis as mda +from MDAnalysis.topology.guessers import guess_types from rdkit import Chem - -import seaborn as sns +from rdkit.Chem import rdCoordGen +from rdkit.Chem.Draw import rdMolDraw2D import matplotlib.pyplot as plt - -import MDAnalysis as mda -from MDAnalysis.topology.guessers import guess_atom_element +import seaborn as sns +import pypdf +import cairosvg +import svgutils +import svgutils.compose +import svgutils.transform from ..analysis import ensemble, dihedral -import logging - logger = logging.getLogger('mdpow.workflows.dihedrals') - - SOLVENTS_DEFAULT = ('water', 'octanol') """Default solvents are water and octanol: @@ -88,12 +99,21 @@ """ -def build_universe(dirname): - """Builds :class:`~MDAnalysis.core.universe.Universe` from - ``water/Coulomb/0000`` topology and trajectory for the specified project. +PLOT_WIDTH_DEFAULT = 190 +"""Plot width (`plot_pdf_width`) should be provided in millimeters (mm), + and is converted to pixels (px) for use with :mod:`cairosvg`. + + conversion factor: 1 mm = 3.7795275591 px + default value: 190 mm = 718.110236229 pixels +""" - Used by :func:`~mdpow.workflows.dihedrals.rdkit_conversion` - and :func:`~mdpow.workflows.dihedrals.dihedral_indices` to obtain atom indices +def build_universe(dirname, solvents=SOLVENTS_DEFAULT): + """Builds :class:`~MDAnalysis.core.universe.Universe` from the + ``./Coulomb/0000`` topology and trajectory of the project for + the first solvent specified. + + Output used by :func:`~mdpow.workflows.dihedrals.rdkit_conversion` + and :func:`~mdpow.workflows.dihedrals.get_atom_indices` to obtain atom indices for each dihedral atom group. :keywords: @@ -108,6 +128,11 @@ def build_universe(dirname): and .xtc files for trajectory. It will default to using the tpr file available. + *solvents* + The default solvents are documented under :data:`SOLVENTS_DEFAULT`. + Normally takes a two-tuple, but analysis is compatible with single solvent selections. + Single solvent analyses will result in a figure with fully filled violins for the single solvent. + :returns: *u* @@ -116,8 +141,8 @@ def build_universe(dirname): """ path = pathlib.Path(dirname) - topology = path / 'FEP/water/Coulomb/0000' / 'md.tpr' - trajectory = path / 'FEP/water/Coulomb/0000' / 'md.xtc' + topology = path / f'FEP/{solvents[0]}/Coulomb/0000' / 'md.tpr' + trajectory = path / f'FEP/{solvents[0]}/Coulomb/0000' / 'md.xtc' u = mda.Universe(str(topology), str(trajectory)) return u @@ -131,7 +156,8 @@ def rdkit_conversion(u, resname): :func:`~mdpow.workflows.dihedrals.build_universe` and a `resname` as input. Uses `resname` to select the solute for conversion by :class:`~MDAnalysis.converters.RDKit.RDKitConverter` to :class:`rdkit.Chem.rdchem.Mol`, - and will add element attributes for Hydrogen if not listed in the topology. + and will add element attributes for Hydrogen if not listed in the topology, + using :func:`MDAnalysis.topology.guessers.guess_atom_element`. :keywords: @@ -151,8 +177,7 @@ def rdkit_conversion(u, resname): :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` *solute* - molecule specified by :func:`~MDAnalysis.core.groups.select_atoms` - for :class:`~MDAnalysis.core.universe.Universe` object + the :any:`MDAnalysis` `AtomGroup` for the solute """ @@ -160,89 +185,97 @@ def rdkit_conversion(u, resname): solute = u.select_atoms(f'resname {resname}') mol = solute.convert_to('RDKIT') except AttributeError: - elements = [guess_atom_element(name) for name in u.atoms.names] - u.add_TopologyAttr("elements", elements) + u.add_TopologyAttr("elements", guess_types(u.atoms.names)) solute = u.select_atoms(f'resname {resname}') mol = solute.convert_to('RDKIT') + rdCoordGen.AddCoords(mol) + + for atom in mol.GetAtoms(): + atom.SetProp("atomNote", str(atom.GetIdx())) + return mol, solute -def dihedral_indices(dirname, resname, SMARTS=SMARTS_DEFAULT): - '''Uses a SMARTS selection string to identify indices for - relevant dihedral atom groups. +def get_atom_indices(mol, SMARTS=SMARTS_DEFAULT): + '''Uses a SMARTS selection string to identify atom indices + for relevant dihedral atom groups. - Requires an MDPOW project directory and `resname` - as input. With :func:`~mdpow.workflows.dihedrals.build_universe` and - :func:`~mdpow.workflows.dihedrals.rdkit_conversion`, uses the topology - and trajectory from ``water/Coulomb/0000`` and creates a - :class:`~MDAnalysis.core.universe.Universe` object. - Uses a SMARTS string to obtain all relevant dihedral atom groups. + Requires a :class:`rdkit.Chem.rdchem.Mol` object as input + for the :data:`SMARTS_DEFAULT` kwarg to match patterns to and + identify relevant dihedral atom groups. :keywords: - *dirname* - Molecule Simulation directory. Loads simulation files present in - lambda directories into the new instance. With this method for - generating an :class:`~mdpow.analysis.ensemble.Ensemble` the - lambda directories are explored and - :meth:`~mdpow.analysis.ensemble.Ensemble._load_universe_from_dir` - searches for .gro, .gro.bz2, .gro.gz, and .tpr files for topology, - and .xtc files for trajectory. It will default to using the tpr file - available. - - *resname* - `resname` for the molecule as defined in - the topology and trajectory + *mol* + :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` *SMARTS* The default SMARTS string is described in detail under :data:`SMARTS_DEFAULT`. :returns: - *atom_group_indices* + *atom_indices* tuple of tuples of indices for each dihedral atom group ''' - u = build_universe(dirname=dirname) - mol = rdkit_conversion(u=u, resname=resname)[0] pattern = Chem.MolFromSmarts(SMARTS) - atom_group_indices = mol.GetSubstructMatches(pattern) + atom_indices = mol.GetSubstructMatches(pattern) + + return atom_indices - return atom_group_indices +def get_bond_indices(mol, atom_indices): + '''From the :class:`rdkit.Chem.rdchem.Mol` object, uses + `atom_indices` to determine the indices of the bonds + between those atoms for each dihedral atom group. -def dihedral_groups(dirname, resname, SMARTS=SMARTS_DEFAULT): - '''Uses the indices of the relevant dihedral atom groups determined - by :func:`~mdpow.workflows.dihedral.dihedral_indices` - and returns the names for each atom in each group. + :keywords: - Requires an MDPOW project directory and `resname` - as input. Expands upon usage of - :func:`~mdpow.workflows.dihedral.dihedral_indices` - to return an array of the names of each atom within - its respective dihedral atom group as identified by - the SMARTS selection string. Not necessary - for automation, useful for verifying all atom groups of interest - are properly identified before analysis. + *mol* + :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` - :keywords: + *atom_indices* + tuple of tuples of indices for each dihedral atom group - *dirname* - Molecule Simulation directory. Loads simulation files present in - lambda directories into the new instance. With this method for - generating an :class:`~mdpow.analysis.ensemble.Ensemble` the - lambda directories are explored and - :meth:`~mdpow.analysis.ensemble.Ensemble._load_universe_from_dir` - searches for .gro, .gro.bz2, .gro.gz, and .tpr files for topology, - and .xtc files for trajectory. It will default to using the tpr file - available. + :returns: - *resname* - `resname` for the molecule as defined in - the topology and trajectory + *bond_indices* + tuple of tuples of indices for the bonds in each dihedral atom group - *SMARTS* - The default SMARTS string is described in detail under :data:`SMARTS_DEFAULT`. + ''' + + bonds = [] + + for atom_index in atom_indices: + + x = mol.GetBondBetweenAtoms(atom_index[0], atom_index[1]).GetIdx() + y = mol.GetBondBetweenAtoms(atom_index[1], atom_index[2]).GetIdx() + z = mol.GetBondBetweenAtoms(atom_index[2], atom_index[3]).GetIdx() + bix = (x, y, z) + + bonds.append(bix) + + bond_indices = tuple(bonds) + + return bond_indices + +def get_dihedral_groups(solute, atom_indices): + '''Uses the 0-based `atom_indices` of the relevant dihedral atom groups + determined by :func:`~mdpow.workflows.dihedrals.get_atom_indices` + and returns the 1-based index names for each atom in each group. + + Requires the `atom_indices` from :func:`~mdpow.workflows.dihedrals.get_atom_indices` + to index the `solute` specified by :func:`~MDAnalysis.core.groups.select_atoms` + and return an array of the names of each atom within its respective + dihedral atom group as identified by the SMARTS selection string. + + :keywords: + + *solute* + the :any:`MDAnalysis` `AtomGroup` for the solute + + *atom_indices* + tuple of tuples of indices for each dihedral atom group :returns: @@ -250,16 +283,53 @@ def dihedral_groups(dirname, resname, SMARTS=SMARTS_DEFAULT): list of :func:`numpy.array` for atom names in each dihedral atom group ''' - - u = build_universe(dirname=dirname) - solute = rdkit_conversion(u=u, resname=resname)[1] - atom_group_indices = dihedral_indices(dirname=dirname, resname=resname, SMARTS=SMARTS) - dihedral_groups = [solute.atoms[list(dihedral_indices)].names for dihedral_indices - in atom_group_indices] + # currently uses RDKit Mol object atom indices to retrieve + # atom names from the MDAnalysis solute object + # RDKit-MDAnalysis index consistency is currently tested + dihedral_groups = [solute.atoms[list(atom_index)].names for atom_index + in atom_indices] return dihedral_groups -def dihedral_groups_ensemble(dirname, atom_group_indices, +def get_paired_indices(atom_indices, bond_indices, dihedral_groups): + '''Combines `atom_indices` and `bond_indices` in tuples + to be paired with their respective dihedral atom groups. + + A dictionary is created with key-value pairs as follows: + `atom_indices` and `bond_indices` are joined in a tuple + as the value, with the key being the respective member + of `dihedral_groups` to facilitate highlighting the + relevant dihedral atom group when generating violin plots. + As an example, `'C1-N2-O3-S4': ((0, 1, 2, 3), (0, 1, 2))`, + would be one key-value pair in the dictionary. + + :keywords: + + *atom_indices* + tuple of tuples of indices for each dihedral atom group + + *bond_indices* + tuple of tuples of indices for the bonds in each dihedral atom group + + *dihedral_groups* + list of :func:`numpy.array` for atom names in each dihedral atom group + + :returns: + + *name_index_pairs* + dictionary with key-value pair for dihedral atom group, + atom indices, and bond indices + + ''' + + all_dgs = [f'{dg[0]}-{dg[1]}-{dg[2]}-{dg[3]}' for dg in dihedral_groups] + + name_index_pairs = {} + name_index_pairs = {all_dgs[i]: (atom_indices[i], bond_indices[i]) for i in range(len(all_dgs))} + + return name_index_pairs + +def dihedral_groups_ensemble(dirname, atom_indices, solvents=SOLVENTS_DEFAULT, interactions=INTERACTIONS_DEFAULT, start=None, stop=None, step=None): @@ -285,13 +355,15 @@ def dihedral_groups_ensemble(dirname, atom_group_indices, and .xtc files for trajectory. It will default to using the tpr file available. - *atom_group_indices* + *atom_indices* tuples of atom indices for dihedral atom groups - .. seealso:: :func:`~mdpow.workflows.dihedrals.dihedral_indices` + .. seealso:: :func:`~mdpow.workflows.dihedrals.get_atom_indices`, :data:`SMARTS_DEFAULT` *solvents* The default solvents are documented under :data:`SOLVENTS_DEFAULT`. + Normally takes a two-tuple, but analysis is compatible with single solvent selections. + Single solvent analyses will result in a figure with fully filled violins for the single solvent. *interactions* The default interactions are documented under :data:`INTERACTIONS_DEFAULT`. @@ -313,7 +385,7 @@ def dihedral_groups_ensemble(dirname, atom_group_indices, dih_ens = ensemble.Ensemble(dirname=dirname, solvents=solvents, interactions=interactions) - indices = atom_group_indices + indices = atom_indices all_dihedrals = [dih_ens.select_atoms(f'index {i[0]}', f'index {i[1]}', f'index {i[2]}', @@ -325,7 +397,7 @@ def dihedral_groups_ensemble(dirname, atom_group_indices, return df -def save_df(df, df_save_dir, resname=None, molname=None): +def save_df(df, df_save_dir, resname, molname=None): '''Takes a :class:`pandas.DataFrame` of results from :class:`~mdpow.analysis.dihedral.DihedralAnalysis` as input before padding the angles to optionaly save the raw @@ -369,25 +441,27 @@ def save_df(df, df_save_dir, resname=None, molname=None): newdir = os.path.join(df_save_dir, subdir) os.mkdir(newdir) - # time and compress level can be adjusted as kwargs + # time and compression level can be adjusted as kwargs df.to_csv(f'{newdir}/{molname}_full_df.csv.bz2', index=False, compression='bz2') logger.info(f'Results DataFrame saved as {newdir}/{molname}_full_df.csv.bz2') -def periodic_angle(df, padding=45): +def periodic_angle_padding(df, padding=45): '''Pads the angles from the results :class:`~pandas.DataFrame` to maintain periodicity in the violin plots. Takes a :class:`pandas.DataFrame` of results from :class:`~mdpow.analysis.dihedral.DihedralAnalysis` + or :func:`~mdpow.workflows.dihedrals.dihedral_groups_ensemble` as input and pads the angles to maintain periodicity for properly plotting dihedral angle frequencies as KDE violins - with :func:`~mdpow.workflows.dihedrals.dihedral_violins`. - Creates two new :class:`pandas.DataFrame` based on the - cutoff value specified, adds to the angle values, concatenates + with :func:`~mdpow.workflows.dihedrals.dihedral_violins` and + :func:`~mdpow.workflows.dihedrals.plot_dihedral_violins`. + Creates two new :class:`pandas.DataFrame` based on the + `padding` value specified, pads the angle values, concatenates all three :class:`pandas.DataFrame`, maintaining original data and - adding padding, and returns new augmented :class:`pandas.DataFrame`. + adding padded values, and returns new augmented :class:`pandas.DataFrame`. :keywords: @@ -396,7 +470,7 @@ def periodic_angle(df, padding=45): results, including all dihedral atom groups for molecule of current project *padding* - value in degrees + value in degrees to specify angle augmentation threshold default: 45 :returns: @@ -405,16 +479,6 @@ def periodic_angle(df, padding=45): augmented results :class:`pandas.DataFrame` containing padded dihedral angles as specified by `padding` - .. rubric:: Example - - Typical Workflow:: - - da = DihedralAnalysis(all_dihedrals) - da.run(start=0, stop=100, step=10) - df = da.results - df_aug = periodic_angle(df, padding=45) - plot = dihedral_violins(df_aug, width=0.9) - ''' df1 = df[df.dihedral > 180 - padding].copy(deep=True) @@ -425,16 +489,19 @@ def periodic_angle(df, padding=45): return df_aug -def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT): - '''Plots distributions of dihedral angles for one dihedral atom group - as violin plots, using as input the augmented :class:`pandas.DataFrame` - from :func:`~mdpow.workflows.dihedrals.periodic_angle`. +def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT, plot_title=None): + '''Plots kernel density estimates (KDE) of dihedral angle frequencies for + one dihedral atom group as violin plots, using as input the augmented + :class:`pandas.DataFrame` from :func:`~mdpow.workflows.dihedrals.periodic_angle_padding`. + + Output is converted to SVG by :func:`~mdpow.workflows.dihedrals.build_svg` + and final output is saved as PDF by :func:`~mdpow.workflows.dihedrals.plot_dihedral_violins` :keywords: *df* augmented results :class:`pandas.DataFrame` from - :func:`~mdpow.workflows.dihedrals.periodic_angle` + :func:`~mdpow.workflows.dihedrals.periodic_angle_padding` *width* width of the violin element (>1 overlaps) @@ -442,13 +509,20 @@ def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT): *solvents* The default solvents are documented under :data:`SOLVENTS_DEFAULT`. + Normally takes a two-tuple, but analysis is compatible with single solvent selections. + Single solvent analyses will result in a figure with fully filled violins for the single solvent. + + *plot_title* + generated by :func:`~mdpow.workflows.dihedrals.build_svg` using + `molname`, `dihedral_groups`, `atom_indices`, and `interactions` + in this order and format: f'{molname}, {name[0]} {a} | ''{col_name}' :returns: - *violin plot* + *g* returns a :class:`seaborn.FacetGrid` object containing a violin plot of the - kernel density estimations (KDE) of the dihedral angle frequencies for each - relevant dihedral atom group in the molecule from the current MDPOW project + kernel density estimates (KDE) of the dihedral angle frequencies for each + dihedral atom group identified by :data:`SMARTS_DEFAULT` ''' @@ -458,25 +532,26 @@ def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT): "interaction", "lambda"]).reset_index(drop=True) - # number of Coul windows + 1 / number of VDW windows - # (+1 for additional space with axes) width_ratios = [len(df[df['interaction'] == "Coulomb"]["lambda"].unique()) + 1, - len(df[df['interaction'] == "VDW"]["lambda"].unique())] - - solvs = np.asarray(solvents) - solv2 = 'octanol' - if solvs.size > 1: - solv2 = solvs[1] + len(df[df['interaction'] == "VDW"]["lambda"].unique()), + len(df[df['interaction'] == "Coulomb"]["lambda"].unique()) - 1] + # Usage in Jupyter causes matplotlib figure object output, not the completed figure + # Upcoming fix in issue #260 + assert 0 < len(solvents) < 3, "one or two solvents must be specified, otherwise SOLVENTS_DEFAULT is used" + split = len(solvents) > 1 g = sns.catplot(data=df, x="lambda", y="dihedral", hue="solvent", col="interaction", - kind="violin", split=True, width=width, inner=None, cut=0, + kind="violin", split=split, width=width, inner=None, cut=0, linewidth=0.5, - hue_order=[solvs[0], solv2], col_order=["Coulomb", "VDW"], + hue_order=list(solvents), col_order=["Coulomb", "VDW", "Structure"], sharex=False, sharey=True, - height=2, aspect=2.5, + height=3.0, aspect=2.0, facet_kws={'ylim': (-180, 180), 'gridspec_kws': {'width_ratios': width_ratios, - }}) + } + } + ) + g.set_xlabels(r"$\lambda$") g.set_ylabels(r"dihedral angle $\phi$") g.despine(offset=5) @@ -490,33 +565,117 @@ def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT): axV.yaxis.set_visible(False) axV.spines["left"].set_visible(False) + axIM = g.axes_dict['Structure'] + axIM.axis('off') + + g.set_titles(plot_title) + return g -def plot_violins(df, resname, figdir=None, molname=None, width=0.9, solvents=SOLVENTS_DEFAULT): - '''Coordinates plotting and optionally saving figures for all dihedral - atom groups. +def build_svg(mol, molname, name_index_pairs, atom_group_selection, + solvents=SOLVENTS_DEFAULT, width=0.9): + '''Converts and combines figure components into an + SVG object to be converted and saved as a publication + quality PDF. + + :keywords: + + *mol* + :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` + + *molname* + molecule name to be used for labelling + plots, if different from `resname` + (in this case, carried over from an upstream + decision between the two) + + *name_index_pairs* + dictionary with key-value pair for dihedral atom group, + atom indices, and bond indices + + .. seealso:: :func:`~mdpow.workflows.dihedrals.get_paired_indices` + + *atom_group_selection* + `name` of each section in the `groupby` series of atom group selections + + .. seealso:: :func:`~mdpow.workflows.dihedrals.plot_dihedral_violins` + + *solvents* + The default solvents are documented under :data:`SOLVENTS_DEFAULT`. + Normally takes a two-tuple, but analysis is compatible with single solvent selections. + Single solvent analyses will result in a figure with fully filled violins for the single solvent. + + *width* + width of the violin element (>1 overlaps) + default: 0.9 + + :returns: + + *fig* + :mod:`svgutils` SVG figure object - Makes a subdirectory within the specified - `figdir` using `resname` or `molname` provided and saves violin plot - figur for each dihedral atom group separately. + ''' + + atom_index = name_index_pairs[atom_group_selection[0]][0] + bond_index = name_index_pairs[atom_group_selection[0]][1] + + drawer = rdMolDraw2D.MolDraw2DSVG(250, 250) + drawer.DrawMolecule(mol=mol, highlightAtoms=atom_index, highlightBonds=bond_index) + drawer.FinishDrawing() + svg = drawer.GetDrawingText().replace('svg:','') + + mol_svg = svgutils.transform.fromstring(svg) + m = mol_svg.getroot() + m.scale(0.0125).moveto(21.8, 0.35) + + plot_title = f'{molname}, {atom_group_selection[0]} {atom_index} | ''{col_name}' + plot = dihedral_violins(atom_group_selection[1], width=width, solvents=solvents, plot_title=plot_title) + + plot_svg = svgutils.transform.from_mpl(plot) + p = plot_svg.getroot() + p.scale(0.02) + + # order matters here, plot down first, mol on top (p, m) + fig = svgutils.compose.Figure("28cm", "4.2cm", p, m) + + return fig + +def plot_dihedral_violins(df, resname, mol, name_index_pairs, figdir=None, molname=None, + width=0.9, plot_pdf_width=PLOT_WIDTH_DEFAULT, solvents=SOLVENTS_DEFAULT): + '''Coordinates plotting and saving figures for all dihedral atom groups. + + Makes a subdirectory for the current project within the specified + `figdir` using `resname` or `molname` as title and saves production + quality PDFs for each dihedral atom group separately. .. seealso:: :func:`~mdpow.workflows.dihedrals.automated_dihedral_analysis`, - :func:`~mdpow.workflows.dihedrals.dihedral_violins` + :func:`~mdpow.workflows.dihedrals.dihedral_violins`, + :func:`~mdpow.workflows.dihedrals.build_svg` :keywords: *df* augmented results :class:`pandas.DataFrame` from - :func:`~mdpow.workflows.dihedrals.periodic_angle` + :func:`~mdpow.workflows.dihedrals.periodic_angle_padding` *resname* `resname` for the molecule as defined in the topology and trajectory + *mol* + :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` + + *name_index_pairs* + dictionary with key-value pair for dihedral atom group, + atom indices, and bond indices + + .. seealso:: :func:`~mdpow.workflows.dihedrals.get_paired_indices` + *figdir* - optional, path to the location to save figures + path to the location to save figures (REQUIRED but marked + as a kwarg for technical reasons; will be changed in #244) *molname* molecule name to be used for labelling @@ -528,45 +687,68 @@ def plot_violins(df, resname, figdir=None, molname=None, width=0.9, solvents=SOL .. seealso:: :func:`~mdpow.workflows.dihedrals.dihedral_violins` + *plot_pdf_width* + The default value for width of plot output is described in detail under + :data:`PLOT_WIDTH_DEFAULT`. + *solvents* The default solvents are documented under :data:`SOLVENTS_DEFAULT`. - - :returns: - - *violin plot* - returns a :class:`seaborn.FacetGrid` object containing a violin plot of the - kernel density estimations (KDE) of the dihedral angle frequencies for each - relevant dihedral atom group in the molecule from the current MDPOW project + Normally takes a two-tuple, but analysis is compatible with single solvent selections. + Single solvent analyses will result in a figure with fully filled violins for the single solvent. ''' + assert figdir is not None, "figdir MUST be set, even though it is a kwarg. Will be changed with #244" + if molname is None: molname = resname - if figdir is not None: - subdir = molname - newdir = os.path.join(figdir, subdir) - os.mkdir(newdir) + subdir = molname + newdir = os.path.join(figdir, subdir) + os.mkdir(newdir) - section = df.groupby(by='selection') + section = df.groupby(by="selection") - for name in section: - plot = dihedral_violins(name[1], width=width, solvents=solvents) - plot.set_titles(f'{molname},{name[0]}, ''{col_name}') - # plot.set_titles needs to stay here during future development - # this locale ensures that plots are properly named, - # especially when generated for a projecct iteratively + plot_pdf_width_px = plot_pdf_width * 3.7795275591 - if figdir is not None: - figfile = pathlib.Path(newdir) / f"{molname}_{name[0]}_violins.pdf" - plot.savefig(figfile) - logger.info(f'Figure saved as {figfile}') + pdf_list = [] - return plot + for name in section: + + fig = build_svg(mol=mol, molname=molname, atom_group_selection=name, name_index_pairs=name_index_pairs, + solvents=solvents, width=width) -def automated_dihedral_analysis(dirname=None, df_save_dir=None, figdir=None, - resname=None, molname=None, - SMARTS=SMARTS_DEFAULT, + figfile = pathlib.Path(newdir) / f"{molname}_{name[0]}_violins.pdf" + if figdir is not None: + plot_pdf = cairosvg.svg2pdf(bytestring=fig.tostr(), write_to=str(figfile), + output_width=plot_pdf_width_px) + + # add PDF for each dihedral atom group to all_PDFs list + pdf_list.append(f'{figfile}') + + logger.info(f"Figure saved as {figfile}") + + logger.info(f"All figures generated and saved in {figdir}") + + merger = pypdf.PdfWriter() + + for pdf in pdf_list: + merger.append(pdf) + merger.write(f"{figdir}/{molname}/{molname}_all_figures.pdf") + merger.close() + logger.info(f"PDF of combined figures generated and saved as {figdir}/{molname}/{molname}_all_figures.pdf") + + return None + +def automated_dihedral_analysis(dirname, resname, + figdir=None, + # figdir is required and will cause issues if not specified + # figdir=None is a temporary way to satisfy + # workflows base tests until issue #244 is resolved + # because it currently uses a **kwargs convention and the + # positional argument figdir will not carry over nicely + df_save_dir=None, molname=None, + SMARTS=SMARTS_DEFAULT, plot_pdf_width=PLOT_WIDTH_DEFAULT, dataframe=None, padding=45, width=0.9, solvents=SOLVENTS_DEFAULT, interactions=INTERACTIONS_DEFAULT, @@ -577,13 +759,11 @@ def automated_dihedral_analysis(dirname=None, df_save_dir=None, figdir=None, For one MDPOW project, automatically determines all relevant dihedral atom groups in the molecule, runs :class:`~mdpow.analysis.dihedral.DihedralAnalysis` for each group, - pads the dihedral angles from analysis results for all groups to maintain periodicity, - creates violin plots of dihedral angle frequencies for each group, separately, from the - padded results. + pads the dihedral angles to maintain periodicity, creates violin plots of dihedral angle + frequencies (KDEs), and saves publication quality PDF figures for each group, separately. Optionally saves all pre-padded :class:`~mdpow.analysis.dihedral.DihedralAnalysis` results - as a single :class:`pandas.DataFrame`, and separate violin plots for each dihedral atom group - in `df_save_dir`, and `figdir` directories provided, respectively. + as a single :class:`pandas.DataFrame` in `df_save_dir` provided. :keywords: @@ -597,16 +777,17 @@ def automated_dihedral_analysis(dirname=None, df_save_dir=None, figdir=None, and .xtc files for trajectory. It will default to using the tpr file available. - *df_save_dir* - optional, path to the location to save results :class:`pandas.DataFrame` - *figdir* - optional, path to the location to save figures + path to the location to save figures (REQUIRED but marked + as a kwarg for technical reasons; will be changed in #244) *resname* `resname` for the molecule as defined in the topology and trajectory + *df_save_dir* + optional, path to the location to save results :class:`pandas.DataFrame` + *molname* molecule name to be used for labelling plots, if different from `resname` @@ -614,6 +795,10 @@ def automated_dihedral_analysis(dirname=None, df_save_dir=None, figdir=None, *SMARTS* The default SMARTS string is described in detail under :data:`SMARTS_DEFAULT`. + *plot_pdf_width* + The default value for width of plot output is described in detail under + :data:`PLOT_WIDTH_DEFAULT`. + *dataframe* optional, if :class:`~mdpow.analysis.dihedral.DihedralAnalysis` was done prior, then results :class:`pandas.DataFrame` can be @@ -623,7 +808,7 @@ def automated_dihedral_analysis(dirname=None, df_save_dir=None, figdir=None, value in degrees default: 45 - .. seealso:: :func:`~mdpow.workflows.dihedrals.periodic_angle` + .. seealso:: :func:`~mdpow.workflows.dihedrals.periodic_angle_padding` *width* width of the violin element (>1 overlaps) @@ -633,6 +818,8 @@ def automated_dihedral_analysis(dirname=None, df_save_dir=None, figdir=None, *solvents* The default solvents are documented under :data:`SOLVENTS_DEFAULT`. + Normally takes a two-tuple, but analysis is compatible with single solvent selections. + Single solvent analyses will result in a figure with fully filled violins for the single solvent. *interactions* The default interactions are documented under :data:`INTERACTIONS_DEFAULT`. @@ -643,20 +830,13 @@ def automated_dihedral_analysis(dirname=None, df_save_dir=None, figdir=None, .. seealso:: :class:`~mdpow.analysis.ensemble.EnsembleAnalysis` - :returns: - - *violin plot* - returns a :class:`seaborn.FacetGrid` object containing a violin plot of the - kernel density estimations (KDE) of the dihedral angle frequencies for each - relevant dihedral atom group in the molecule from the current MDPOW project - .. rubric:: Example Typical Workflow:: - import automated_dihedral_analysis as ada + import dihedrals - ada.automated_dihedral_analysis(dirname='/foo/bar/MDPOW_project_data', + dihedrals.automated_dihedral_analysis(dirname='/foo/bar/MDPOW_project_data', figdir='/foo/bar/MDPOW_figure_directory', resname='UNK', molname='benzene', padding=45, width=0.9, @@ -665,6 +845,14 @@ def automated_dihedral_analysis(dirname=None, df_save_dir=None, figdir=None, start=0, stop=100, step=10) ''' + u = build_universe(dirname=dirname) + mol, solute = rdkit_conversion(u=u, resname=resname) + atom_indices = get_atom_indices(mol=mol, SMARTS=SMARTS) + bond_indices = get_bond_indices(mol=mol, atom_indices=atom_indices) + dihedral_groups = get_dihedral_groups(solute=solute, atom_indices=atom_indices) + name_index_pairs = get_paired_indices(atom_indices=atom_indices, bond_indices=bond_indices, + dihedral_groups=dihedral_groups) + if dataframe is not None: df = dataframe @@ -672,14 +860,18 @@ def automated_dihedral_analysis(dirname=None, df_save_dir=None, figdir=None, else: - atom_group_indices = dihedral_indices(dirname=dirname, resname=resname, SMARTS=SMARTS) - df = dihedral_groups_ensemble(atom_group_indices=atom_group_indices, dirname=dirname, + df = dihedral_groups_ensemble(dirname=dirname, atom_indices=atom_indices, solvents=solvents, interactions=interactions, start=start, stop=stop, step=step) if df_save_dir is not None: save_df(df=df, df_save_dir=df_save_dir, resname=resname, molname=molname) - df_aug = periodic_angle(df, padding=padding) + df_aug = periodic_angle_padding(df, padding=padding) + + plot_dihedral_violins(df=df_aug, resname=resname, mol=mol, name_index_pairs=name_index_pairs, figdir=figdir, molname=molname, + width=width, plot_pdf_width=plot_pdf_width, solvents=solvents) + + logger.info(f"DihedralAnalysis completed for all projects in {dirname}") - return plot_violins(df_aug, resname=resname, figdir=figdir, molname=molname, width=width, solvents=solvents) + return diff --git a/setup.py b/setup.py index f1125c54..262f113c 100644 --- a/setup.py +++ b/setup.py @@ -62,6 +62,9 @@ 'matplotlib', 'seaborn', 'rdkit', + 'svgutils', + 'cairosvg', + 'pypdf' ], #setup_requires=['pytest-runner',], tests_require=['pytest', 'pybol', 'py'],