From ae7332a77e5c7e702ee4cc479332ecedc9080857 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20M=C3=BCller?= Date: Fri, 8 Nov 2024 00:51:42 +0100 Subject: [PATCH 1/7] update docs, co-workers, pyproject.toml, remove typos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marcel Müller --- .github/CODEOWNERS | 4 +-- README.md | 75 +++++++++++++++++++++++++++++----------------- pyproject.toml | 6 ++-- 3 files changed, 53 insertions(+), 32 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 79a95f5..2922cc7 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -7,6 +7,6 @@ # These parts are specifically owned by some people /src/mindlessgen/cli @marcelmbn /src/mindlessgen/generator @marcelmbn -/src/mindlessgen/molecules @marcelmbn -/src/mindlessgen/prog @marcelmbn +/src/mindlessgen/molecules @marcelmbn @jonathan-schoeps +/src/mindlessgen/prog @marcelmbn @jonathan-schoeps /src/mindlessgen/qm @marcelmbn diff --git a/README.md b/README.md index 871d9d2..0254714 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,8 @@ Apache-2.0 - - Python Versions + + Python Versions @@ -49,10 +49,10 @@ Both installation methods work in principle also without a virtual environment, ### Development purposes For working on the code of `mindlessgen`, the following setup is recommended: -``` +```bash mamba create -n mindlessgen python=3.12 mamba activate mindlessgen -git clone {link to the MindlessGen repository} +git clone https://github.com/grimme-lab/MindlessGen.git # or the analogous SSH link pip install -e '.[dev]' ``` Thereby, all necessary development tools (e.g., `ruff`, `mypy`, `tox`, `pytest`, and `pre-commit`) are installed. @@ -82,11 +82,12 @@ If the path is not specified with `-c/--config`, `mindlessgen.toml` will be sear 1. Current working directory (`$CWD`) 2. Home directory (`$USER/`) -The active configuration can be printed using `--print-config`. +If neither a corresponding CLI command nor an entry in the configuration file is provided, the default values are used. +The active configuration, including the default values, can be printed using `--print-config`. ### Element composition There are two related aspects of the element composition: -1. _Which elements_ should occur within the generated molecule? +1. **Which elements** should occur within the generated molecule? 2. **How many atoms** of the specified element should occur? - **Example 1**: `C:1-3, O:1-1, H:1-*` would result in a molecule with 1, 2, or 3 carbon atoms, exactly 1 oxygen atom, and between 1 and an undefined number of hydrogen atoms (i.e., at least 1). - **Example 2**: `Na:10-10, In:10-10, O:20-20`. This example would result in a molecule with exactly 10 sodium atoms, 10 indium atoms, and 20 oxygen atoms. **For a fixed element composition, the number of atoms (40) has to be within the min_num_atoms and max_num_atom interval.** `mindlessgen` will consequently always return a molecule with exactly 40 atoms. @@ -98,28 +99,46 @@ There are two related aspects of the element composition: ## Citation -When using the program for academic purposes, please cite: - -_J. Chem. Theory Comput._ 2009, **5**, 4, 993–1003 - -or in `BibTeX` format: -``` -@article{doi:10.1021/ct800511q, -author = {Korth, Martin and Grimme, Stefan}, -title = {“Mindless” DFT Benchmarking}, -journal = {Journal of Chemical Theory and Computation}, -volume = {5}, -number = {4}, -pages = {993-1003}, -year = {2009}, -doi = {10.1021/ct800511q}, -note ={PMID: 26609608}, -URL = {https://doi.org/10.1021/ct800511q}, -eprint = {https://doi.org/10.1021/ct800511q} -} -``` - -## Acknowdledgements +When using the program for academic purposes, please cite _i)_ the original idea and _ii)_ the new Python implementation. + +1. _J. Chem. Theory Comput._ 2009, **5**, 4, 993–1003 + ``` + @article{korth_mindless_2009, + title = {Mindless {DFT} benchmarking}, + volume = {5}, + issn = {15499618}, + url = {https://pubs.acs.org/doi/full/10.1021/ct800511q}, + doi = {10.1021/ct800511q}, + number = {4}, + urldate = {2022-11-07}, + journal = {J. Chem. Theo. Comp.}, + author = {Korth, Martin and Grimme, Stefan}, + month = apr, + year = {2009}, + note = {Publisher: American Chemical Society}, + pages = {993--1003}, + } + ``` + +2. A new publication featuring all functionalities and improvements of `mindlessgen` is in preparation. + In the meantime, please refer to the original publication and to the following preprint, which uses the `mindlessgen` program for the first time: + Müller, M.; Froitzheim, T.; Hansen, A.; Grimme, S. _ChemRxiv_ October 28, 2024. https://doi.org/10.26434/chemrxiv-2024-h76ms. + ``` + @misc{muller_advanced_2024, + title = {Advanced {Charge} {Extended} {Hückel} ({CEH}) {Model} and a {Consistent} {Adaptive} {Minimal} {Basis} {Set} for the {Elements} {Z}=1-103}, + url = {https://chemrxiv.org/engage/chemrxiv/article-details/671a92581fb27ce1247466ad}, + doi = {10.26434/chemrxiv-2024-h76ms}, + urldate = {2024-10-28}, + publisher = {ChemRxiv}, + author = {Müller, Marcel and Froitzheim, Thomas and Hansen, Andreas and Grimme, Stefan}, + month = oct, + year = {2024}, + keywords = {DFT, Basis sets, EHT, SQM}, + } + ``` + +## Acknowledgements [T. Gasevic](https://github.com/gasevic) for creating an initial `GitHub` [migration](https://github.com/gasevic/mlmgen) of the code and making important adjustments to the workflow. [S. Grimme](https://www.chemie.uni-bonn.de/grimme/de/grimme) and M. Korth for the original code written in Fortran associated to the publication in [J. Chem. Theory Comput.](https://pubs.acs.org/doi/full/10.1021/ct800511q). +[T. Froitzheim](https://github.com/thfroitzheim) for helpful discussons during the development of the program. diff --git a/pyproject.toml b/pyproject.toml index 92d6f76..4bcaf95 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,17 +6,19 @@ build-backend = "setuptools.build_meta" name = "mindlessgen" authors = [ { name = "Marcel Müller", email = "marcel.mueller@thch.uni-bonn.de" }, + { name = "Jonathan Schöps", email = "s6jtscho@uni-bonn.de" }, ] -description = "Mindless Molecule GENerator" +description = "Mindless Molecule Generator" readme = "README.md" requires-python = ">=3.10" license = { file = "LICENSE.md" } classifiers = [ - "License :: OSI Approved :: MIT License", + "License :: OSI Approved :: Apache-2.0 License", "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Topic :: Scientific/Engineering", "Typing :: Typed", ] From a8e9a7ce337550b9710abdc4b8cb437251618d67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20M=C3=BCller?= Date: Fri, 8 Nov 2024 00:59:48 +0100 Subject: [PATCH 2/7] update CHANGELOG, make case more consistent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marcel Müller --- CHANGELOG.md | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a51f7fb..2037616 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,22 +7,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Changed - vdW radii scaling parameter can now be adjusted via `mindlessgen.toml` or CLI -- The check_distance function now checks based on the sum of the van der Waals radii and a scaling factor acessible via `mindlessgen.toml` or CLI +- check_distance function now checks based on the sum of the van der Waals radii and a scaling factor acessible via `mindlessgen.toml` or CLI - better type hints for `Callables` -- A clearer differentiation between the distinct scaling factors for the van der Waals radii. +- a clearer differentiation between the distinct scaling factors for the van der Waals radii. - `README.md` with more detailed explanation of the element composition function. ### Fixed -- Unit conversion for (currenly unused) vdW radii from the original Fortran project +- unit conversion for (currenly unused) vdW radii from the original Fortran project - minor print output issues (no new line breaks, more consistent verbosity differentiation, ...) - bug in `postprocess_mol` which led to an unassigned return variable in the single-point case +- bug leading to `UnicodeDecodeError` when reading `xtb` output files - bug with all atom lists being initialized with a length of 102 instead of 103 ### Added -- Support for the novel "g-xTB" method (working title: GP3-xTB) -- A function which contracts the coordinates after the initial generation. -- A function which is able to printout the xyz coordinates to the terminal similar to the `.xyz` layout. -- Elements 87 to 103 are accessible via the element composition. If `xtb` is the engine, the elements will be replaced by their lighter homologues. +- support for the novel "g-xTB" method (working title: GP3-xTB) +- function which contracts the coordinates after the initial generation. +- function which is able to printout the xyz coordinates to the terminal similar to the `.xyz` layout. +- elements 87 to 103 are accessible via the element composition. If `xtb` is the engine, the elements will be replaced by their lighter homologues. +- support for `python-3.13` ### Breaking Changes - Removal of the `dist_threshold` flag and in the `-toml` file. From 0568bdc8d0860086e40784f45a3f00ecbbafd7c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20M=C3=BCller?= Date: Fri, 8 Nov 2024 01:19:02 +0100 Subject: [PATCH 3/7] update mindlessgen.toml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marcel Müller --- mindlessgen.toml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/mindlessgen.toml b/mindlessgen.toml index 0dc56bd..6cdc7ad 100644 --- a/mindlessgen.toml +++ b/mindlessgen.toml @@ -1,21 +1,21 @@ -# Default configuration for the 'Mindless Molecule GENerator' (MindlessGen) package -# Following file locations are searched for in the following order: +# Default configuration for the 'Mindless Molecule Generator' (MindlessGen) package +# The following file locations are searched for in ascending order: # 1. Location specified by the `--config < str | Path >` command-line argument # 2. Current working directory (`Path.cwd()`) # 3. User's home directory (`Path.home()`) [general] -# > Verbosity level defining the printout: Options: 0 = silent, 1 = default, 2 = verbose, 3 = debug +# > Verbosity level defining the printout: Options: -1 = super-silent, 0 = silent, 1 = default, 2 = verbose, 3 = debug verbosity = 1 # > Number of parallel processes to use. Corresponds to the number of physical CPU cores used. Options: parallel = 1 -# > Maximum number of generation/optimization try-and-error cycles per molecule. Options: -max_cycles = 100 +# > Maximum number of generation & optimization try-and-error cycles per molecule. Options: +max_cycles = 200 # > Number of molecules to generate. Options: num_molecules = 1 -# > Do post-processing (checking for HL gap, etc.) after the optimization. Options: +# > Do post-processing after the optimization with another engine (e.g., `orca`). Default: false. Options: postprocess = false -# > Switch molecule structure XYZ writing on and off (default: true). Options: +# > Switch molecule structure XYZ writing on and off. Default: false. Options: write_xyz = true [generate] @@ -26,13 +26,14 @@ max_num_atoms = 10 # > Initial coordinate scaling factor. Options: init_scaling = 3.0 # > Increase in the coordinate scaling factor per trial after check_distance was not met. Options: -increase_scaling_factor = 1.3 +increase_scaling_factor = 1.1 # > Scaling factor for the van der Waals radii employed for the fragment detection. Options: scale_fragment_detection = 1.25 # > Scaling factor for the minimal distance between two atoms based on the sum of the van der Waals radii. Options: scale_minimal_distance = 0.8 -# > Contract the coordinates after the initial generation. Leads to more cluster-like and less extended structures. Options: -contract_coords = false +# > Contract the coordinates after the initial generation. Leads to more cluster-like and less extended structures +# and can speed-up the generation for larger molecules significantly. Options: +contract_coords = true # > Atom types and their minimum and maximum occurrences. Format: ":-" # > Elements that are not specified are only added by random selection. # > A star sign (*) can be used as a wildcard for integer value. @@ -40,7 +41,6 @@ element_composition = "C:2-3, H:1-2, O:1-2, N:1-*" # > Atom types that are not chosen for random selection. Format: ", , ..." # > CAUTION: This option is overridden by the 'element_composition' option. # > I.e., if an element is specified in 'element_composition' with an occurrence > 0, it will be added to the molecule anyway. -# > Example: forbidden_elements = "18,57-*" forbidden_elements = "57-71, 81-*" [refine] From 2e6493c0c5ddc8e3f59662b3021491238e6d8430 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20M=C3=BCller?= Date: Fri, 8 Nov 2024 01:30:10 +0100 Subject: [PATCH 4/7] align defaults MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marcel Müller --- CHANGELOG.md | 11 +++++++---- src/mindlessgen/prog/config.py | 18 +++++++++--------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2037616..cdd291d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - vdW radii scaling parameter can now be adjusted via `mindlessgen.toml` or CLI - check_distance function now checks based on the sum of the van der Waals radii and a scaling factor acessible via `mindlessgen.toml` or CLI - better type hints for `Callables` -- a clearer differentiation between the distinct scaling factors for the van der Waals radii. -- `README.md` with more detailed explanation of the element composition function. +- clearer differentiation between the distinct scaling factors for the van der Waals radii +- `README.md` with more detailed explanation of the element composition function +- Default `max_cycles` for the generation & refinement set to 200 ### Fixed - unit conversion for (currenly unused) vdW radii from the original Fortran project @@ -18,17 +19,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - bug in `postprocess_mol` which led to an unassigned return variable in the single-point case - bug leading to `UnicodeDecodeError` when reading `xtb` output files - bug with all atom lists being initialized with a length of 102 instead of 103 +- inconsistent default values for the `mindlessgen.toml` and the `ConfigManager` class ### Added - support for the novel "g-xTB" method (working title: GP3-xTB) -- function which contracts the coordinates after the initial generation. -- function which is able to printout the xyz coordinates to the terminal similar to the `.xyz` layout. +- function which contracts the coordinates after the initial generation +- function which is able to printout the xyz coordinates to the terminal similar to the `.xyz` layout - elements 87 to 103 are accessible via the element composition. If `xtb` is the engine, the elements will be replaced by their lighter homologues. - support for `python-3.13` ### Breaking Changes - Removal of the `dist_threshold` flag and in the `-toml` file. - The number of unpaired electrons (`Molecule.uhf`) is now set to 0 if `xtb` is used as `QMMethod` and a lanthanide is within the molecule to match the `f-in-core` approximation. +- "Contract Coordinates" functionality set to `true` by default in the `mindlessgen.toml` file. ## [0.4.0] - 2024-09-19 ### Changed diff --git a/src/mindlessgen/prog/config.py b/src/mindlessgen/prog/config.py index dc07c51..3a25867 100644 --- a/src/mindlessgen/prog/config.py +++ b/src/mindlessgen/prog/config.py @@ -37,7 +37,7 @@ class GeneralConfig(BaseConfig): def __init__(self: GeneralConfig) -> None: self._verbosity: int = 1 - self._max_cycles: int = 100 + self._max_cycles: int = 200 self._print_config: bool = False self._parallel: int = 1 self._num_molecules: int = 1 @@ -174,15 +174,15 @@ class GenerateConfig(BaseConfig): """ def __init__(self: GenerateConfig) -> None: - self._min_num_atoms: int = 2 - self._max_num_atoms: int = 100 + self._min_num_atoms: int = 5 + self._max_num_atoms: int = 10 self._init_coord_scaling: float = 3.0 - self._increase_scaling_factor: float = 1.3 + self._increase_scaling_factor: float = 1.25 self._element_composition: dict[int, tuple[int | None, int | None]] = {} self._forbidden_elements: list[int] | None = None self._scale_fragment_detection: float = 1.25 self._scale_minimal_distance: float = 0.8 - self._contract_coords: bool = False + self._contract_coords: bool = True def get_identifier(self) -> str: return "generate" @@ -409,7 +409,7 @@ class RefineConfig(BaseConfig): """ def __init__(self: RefineConfig) -> None: - self._max_frag_cycles: int = 100 + self._max_frag_cycles: int = 10 self._engine: str = "xtb" self._hlgap: float = 0.5 self._debug: bool = False @@ -495,8 +495,8 @@ class PostProcessConfig(BaseConfig): def __init__(self: PostProcessConfig) -> None: self._engine: str = "orca" - self._opt_cycles: int | None = None - self._optimize: bool = False + self._opt_cycles: int | None = 5 + self._optimize: bool = True self._debug: bool = False def get_identifier(self) -> str: @@ -624,7 +624,7 @@ class ORCAConfig(BaseConfig): def __init__(self: ORCAConfig) -> None: self._orca_path: str | Path = "orca" self._functional: str = "PBE" - self._basis: str = "" + self._basis: str = "def2-SVP" self._gridsize: int = 1 self._scf_cycles: int = 100 From e8aff5d285ab35079da06aef623908661f6ee230 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20M=C3=BCller?= Date: Fri, 8 Nov 2024 01:33:54 +0100 Subject: [PATCH 5/7] update reference default value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marcel Müller --- test/test_config/test_config_set_attributes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_config/test_config_set_attributes.py b/test/test_config/test_config_set_attributes.py index 1505ba0..414b435 100644 --- a/test/test_config/test_config_set_attributes.py +++ b/test/test_config/test_config_set_attributes.py @@ -150,7 +150,7 @@ def test_refine_config_property_setters( @pytest.mark.parametrize( "property_name, initial_value", [ - ("max_frag_cycles", 100), + ("max_frag_cycles", 10), ("engine", "xtb"), ], ) From e102b8f65bd41fbf690edabe90d9ffc1701d7ef0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20M=C3=BCller?= Date: Fri, 8 Nov 2024 01:42:55 +0100 Subject: [PATCH 6/7] update tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marcel Müller --- src/mindlessgen/prog/config.py | 2 +- test/test_config/test_config_set_attributes.py | 12 ++++++------ test/test_molecules/test_refinement.py | 2 ++ 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/mindlessgen/prog/config.py b/src/mindlessgen/prog/config.py index 3a25867..2ab9126 100644 --- a/src/mindlessgen/prog/config.py +++ b/src/mindlessgen/prog/config.py @@ -177,7 +177,7 @@ def __init__(self: GenerateConfig) -> None: self._min_num_atoms: int = 5 self._max_num_atoms: int = 10 self._init_coord_scaling: float = 3.0 - self._increase_scaling_factor: float = 1.25 + self._increase_scaling_factor: float = 1.1 self._element_composition: dict[int, tuple[int | None, int | None]] = {} self._forbidden_elements: list[int] | None = None self._scale_fragment_detection: float = 1.25 diff --git a/test/test_config/test_config_set_attributes.py b/test/test_config/test_config_set_attributes.py index 414b435..feb7ae0 100644 --- a/test/test_config/test_config_set_attributes.py +++ b/test/test_config/test_config_set_attributes.py @@ -43,7 +43,7 @@ def test_general_config_property_setters( "property_name, initial_value", [ ("verbosity", 1), - ("max_cycles", 100), + ("max_cycles", 200), ("print_config", False), ("parallel", 1), ("num_molecules", 1), @@ -110,10 +110,10 @@ def test_generate_config_element_composition( @pytest.mark.parametrize( "property_name, initial_value", [ - ("min_num_atoms", 2), - ("max_num_atoms", 100), + ("min_num_atoms", 5), + ("max_num_atoms", 10), ("init_coord_scaling", 3.0), - ("increase_scaling_factor", 1.3), + ("increase_scaling_factor", 1.1), ("element_composition", {}), ("forbidden_elements", None), ], @@ -127,8 +127,8 @@ def test_generate_config_default_values(property_name, initial_value): @pytest.mark.parametrize( "property_name, valid_value, invalid_value, expected_exception", [ - ("max_frag_cycles", 100, -1, ValueError), - ("max_frag_cycles", 100, "100", TypeError), + ("max_frag_cycles", 200, -1, ValueError), + ("max_frag_cycles", 200, "100", TypeError), ("engine", "xtb", 123, TypeError), ("engine", "xtb", "g16", ValueError), ], diff --git a/test/test_molecules/test_refinement.py b/test/test_molecules/test_refinement.py index 1471dfd..bb95bd8 100644 --- a/test/test_molecules/test_refinement.py +++ b/test/test_molecules/test_refinement.py @@ -122,6 +122,8 @@ def test_iterative_optimization(mol_C13H14: Molecule, mol_C7H8: Molecule) -> Non """ # initialize a configuration object config = ConfigManager() + config.generate.min_num_atoms = 2 + config.generate.max_num_atoms = 100 config.refine.hlgap = 0.001 # TODO: Change charge assignment such that # fragment charge is not completely random anymore. Currently, that's the # reason for a virtually switched off HL gap check (fragment can be -2, 0, 2) From f68d3d636c675282218ba29786c2a3f92653dd60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20M=C3=BCller?= Date: Fri, 8 Nov 2024 09:51:19 +0100 Subject: [PATCH 7/7] correct inconsistent boolean MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marcel Müller --- mindlessgen.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindlessgen.toml b/mindlessgen.toml index 6cdc7ad..7340de4 100644 --- a/mindlessgen.toml +++ b/mindlessgen.toml @@ -15,7 +15,7 @@ max_cycles = 200 num_molecules = 1 # > Do post-processing after the optimization with another engine (e.g., `orca`). Default: false. Options: postprocess = false -# > Switch molecule structure XYZ writing on and off. Default: false. Options: +# > Switch molecule structure XYZ writing on and off. Default: true. Options: write_xyz = true [generate]