From 07039eec4ace94b77bfe7d850ff29f9cd2593c2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Sch=C3=B6ps?= Date: Tue, 1 Oct 2024 16:22:41 +0200 Subject: [PATCH 1/5] Implemented the van der Waals radii inside the check distance function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jonathan Schöps --- CHANGELOG.md | 1 + mindlessgen.toml | 2 ++ src/mindlessgen/cli/cli_parser.py | 7 ++++++ .../molecules/generate_molecule.py | 17 +++++++++---- src/mindlessgen/prog/config.py | 19 ++++++++++++++ test/test_generate/test_generate_molecule.py | 25 ++++++++++++------- 6 files changed, 57 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e5ba49e..78e0082 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Changed - vdW radii scaling parameter can now be adjusted via `mindlessgen.toml` or CLI +- The check_distance function now checks based on the sum of the van der Waals radii and a scaling factor acessible via `mindlessgen.toml` or CLI ### Fixed - Unit conversion for (currenly unused) vdW radii from the original Fortran project diff --git a/mindlessgen.toml b/mindlessgen.toml index c0e2031..5d49bcd 100644 --- a/mindlessgen.toml +++ b/mindlessgen.toml @@ -31,6 +31,8 @@ increase_scaling_factor = 1.3 dist_threshold = 1.2 # > Scaling factor for the employed van der Waals radii. Options: scale_vdw_radii = 1.3333 +# > Scaling factor for the minimal bondlength based on the sum of the can der Waals radii. Options: +scale_minimal_bondlength = 0.75 # > Atom types and their minimum and maximum occurrences. Format: ":-" # > Elements that are not specified are only added by random selection. # > A star sign (*) can be used as a wildcard for integer value. diff --git a/src/mindlessgen/cli/cli_parser.py b/src/mindlessgen/cli/cli_parser.py index 860935c..8e3abf9 100644 --- a/src/mindlessgen/cli/cli_parser.py +++ b/src/mindlessgen/cli/cli_parser.py @@ -92,6 +92,12 @@ def cli_parser(argv: Sequence[str] | None = None) -> dict: required=False, help="Scaling factor for van der Waals radii.", ) + parser.add_argument( + "--scale-minimal-bondlength", + type=float, + required=False, + help="Minimum bond length scaling factor.", + ) ### Molecule generation arguments ### parser.add_argument( @@ -273,6 +279,7 @@ def cli_parser(argv: Sequence[str] | None = None) -> dict: "element_composition": args_dict["element_composition"], "forbidden_elements": args_dict["forbidden_elements"], "scale_vdw_radii": args_dict["scale_vdw_radii"], + "scale_minimal_bondlength": args_dict["scale_minimal_bondlength"], } # XTB specific arguments rev_args_dict["xtb"] = {"xtb_path": args_dict["xtb_path"]} diff --git a/src/mindlessgen/molecules/generate_molecule.py b/src/mindlessgen/molecules/generate_molecule.py index 6226d75..33f4442 100644 --- a/src/mindlessgen/molecules/generate_molecule.py +++ b/src/mindlessgen/molecules/generate_molecule.py @@ -6,6 +6,7 @@ import numpy as np from ..prog import GenerateConfig from .molecule import Molecule +from .refinement import get_cov_radii, COV_RADII from .miscellaneous import ( set_random_charge, get_alkali_metals, @@ -36,9 +37,9 @@ def generate_random_molecule( mol.xyz, mol.ati = generate_coordinates( at=mol.atlist, scaling=config_generate.init_coord_scaling, - dist_threshold=config_generate.dist_threshold, inc_scaling_factor=config_generate.increase_scaling_factor, verbosity=verbosity, + scale_bondlength=config_generate.scale_minimal_bondlength, ) mol.charge, mol.uhf = set_random_charge(mol.ati, verbosity) mol.set_name_from_formula() @@ -322,9 +323,9 @@ def check_composition(): def generate_coordinates( at: np.ndarray, scaling: float, - dist_threshold: float, inc_scaling_factor: float = 1.3, verbosity: int = 1, + scale_bondlength: float = 0.75, ) -> tuple[np.ndarray, np.ndarray]: """ Generate random coordinates for a molecule. @@ -335,7 +336,7 @@ def generate_coordinates( xyz, ati = generate_random_coordinates(at) xyz = xyz * eff_scaling # do while check_distances is False - while not check_distances(xyz, dist_threshold): + while not check_distances(xyz, ati, scale_bondlength=scale_bondlength): if verbosity > 1: print( f"Distance check failed. Increasing expansion factor by {inc_scaling_factor}..." @@ -370,7 +371,7 @@ def generate_random_coordinates(at: np.ndarray) -> tuple[np.ndarray, np.ndarray] return xyz, ati -def check_distances(xyz: np.ndarray, threshold: float) -> bool: +def check_distances(xyz: np.ndarray, ati: np.ndarray, scale_bondlength: float) -> bool: """ Check if the distances between atoms are larger than a threshold. """ @@ -378,6 +379,12 @@ def check_distances(xyz: np.ndarray, threshold: float) -> bool: for i in range(xyz.shape[0] - 1): for j in range(i + 1, xyz.shape[0]): r = np.linalg.norm(xyz[i, :] - xyz[j, :]) - if r < threshold: + sum_radii = get_cov_radii(ati[i], COV_RADII) + get_cov_radii( + ati[j], COV_RADII + ) + print( + f"scaled bondlength: {scale_bondlength}, sum_radii: {sum_radii}, scaled sum_radii: {scale_bondlength * sum_radii}" + ) + if r < scale_bondlength * sum_radii: return False return True diff --git a/src/mindlessgen/prog/config.py b/src/mindlessgen/prog/config.py index 6562729..d6b34e9 100644 --- a/src/mindlessgen/prog/config.py +++ b/src/mindlessgen/prog/config.py @@ -180,6 +180,7 @@ def __init__(self: GenerateConfig) -> None: self._element_composition: dict[int, tuple[int | None, int | None]] = {} self._forbidden_elements: list[int] | None = None self._scale_vdw_radii: float = 4.0 / 3.0 + self._scale_minimal_bondlength: float = 0.75 def get_identifier(self) -> str: return "generate" @@ -383,6 +384,24 @@ def scale_vdw_radii(self, scale_vdw_radii: float): raise ValueError("Scale van der Waals radii should be greater than 0.") self._scale_vdw_radii = scale_vdw_radii + @property + def scale_minimal_bondlength(self): + """ + Get the scaling factor for minimal bond length. + """ + return self._scale_minimal_bondlength + + @scale_minimal_bondlength.setter + def scale_minimal_bondlength(self, scale_minimal_bondlength: float): + """ + Set the scaling factor for minimal bond length. + """ + if not isinstance(scale_minimal_bondlength, float): + raise TypeError("Scale minimal bond length should be a float.") + if scale_minimal_bondlength <= 0: + raise ValueError("Scale minimal bond length should be greater than 0.") + self._scale_minimal_bondlength = scale_minimal_bondlength + class RefineConfig(BaseConfig): """ diff --git a/test/test_generate/test_generate_molecule.py b/test/test_generate/test_generate_molecule.py index 520eaff..aa35545 100644 --- a/test/test_generate/test_generate_molecule.py +++ b/test/test_generate/test_generate_molecule.py @@ -218,49 +218,56 @@ def test_generate_coordinates() -> None: @pytest.mark.parametrize( - "xyz, threshold, expected, description", + "xyz, ati, scale_minimal_bondlength, expected, description", [ ( np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0]]), + np.array([0, 0]), 0.5, True, "Two atoms with distance greater than threshold (1.0 > 0.5)", ), ( np.array([[0.0, 0.0, 0.0], [0.4, 0.0, 0.0]]), - 0.5, + np.array([0, 0]), + 0.75, False, "Two atoms with distance less than threshold (0.4 < 0.5)", ), ( np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [2.0, 0.0, 0.0]]), - 0.5, + np.array([0, 0, 0]), + 1.0, True, "Three atoms in a line with distances greater than threshold", ), ( np.array([[0.0, 0.0, 0.0], [0.4, 0.0, 0.0], [1.0, 0.0, 0.0]]), - 0.5, + np.array([0, 0, 0]), + 0.75, False, "Three atoms with one pair close together: distance between first two is less than threshold", ), ( np.array([[0.0, 0.0, 0.0]]), + np.array([0]), 0.5, True, "Single atom, no distances to compare", ), ( np.array([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]), - 0.5, + np.array([0, 0]), + 0.75, False, "Two atoms at identical positions: distance is zero, less than threshold", ), ( np.array([[0.0, 0.0, 0.0], [1.0, 1.0, 1.0]]), - 1.7320, + np.array([0, 0]), + 2.70625, True, - "Two atoms with diagonal distance just above threshold (sqrt(3) ≈ 1.732)", + "Two atoms with diagonal distance just above threshold (sqrt(3) ≈ 1.732, 1.7323/0.64 = 2.70625)(0.64 = sum of covalent radii for H)", ), ], ids=[ @@ -273,8 +280,8 @@ def test_generate_coordinates() -> None: "diagonal_distance", ], ) -def test_check_distances(xyz, threshold, expected, description): - assert check_distances(xyz, threshold) == expected +def test_check_distances(xyz, ati, scale_minimal_bondlength, expected, description): + assert check_distances(xyz, ati, scale_minimal_bondlength) == expected def test_generate_atom_list_min_larger_than_max(default_generate_config): From eb5b7b147584451a50357ccaa04f84da85bde33a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joanthan=20Sch=C3=B6ps?= <106986430+jonathan-schoeps@users.noreply.github.com> Date: Tue, 1 Oct 2024 16:36:51 +0200 Subject: [PATCH 2/5] Update generate_molecule.py A print statement was not removed --- src/mindlessgen/molecules/generate_molecule.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/mindlessgen/molecules/generate_molecule.py b/src/mindlessgen/molecules/generate_molecule.py index 33f4442..0f0eac4 100644 --- a/src/mindlessgen/molecules/generate_molecule.py +++ b/src/mindlessgen/molecules/generate_molecule.py @@ -382,9 +382,6 @@ def check_distances(xyz: np.ndarray, ati: np.ndarray, scale_bondlength: float) - sum_radii = get_cov_radii(ati[i], COV_RADII) + get_cov_radii( ati[j], COV_RADII ) - print( - f"scaled bondlength: {scale_bondlength}, sum_radii: {sum_radii}, scaled sum_radii: {scale_bondlength * sum_radii}" - ) if r < scale_bondlength * sum_radii: return False return True From 9d8bb61e393b3ad7e8d061445ba4ada52f411c04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joanthan=20Sch=C3=B6ps?= <106986430+jonathan-schoeps@users.noreply.github.com> Date: Tue, 1 Oct 2024 16:54:01 +0200 Subject: [PATCH 3/5] Update mindlessgen.toml A typo in the `.toml` file is fixed --- mindlessgen.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindlessgen.toml b/mindlessgen.toml index 5d49bcd..e39136d 100644 --- a/mindlessgen.toml +++ b/mindlessgen.toml @@ -31,7 +31,7 @@ increase_scaling_factor = 1.3 dist_threshold = 1.2 # > Scaling factor for the employed van der Waals radii. Options: scale_vdw_radii = 1.3333 -# > Scaling factor for the minimal bondlength based on the sum of the can der Waals radii. Options: +# > Scaling factor for the minimal bondlength based on the sum of the van der Waals radii. Options: scale_minimal_bondlength = 0.75 # > Atom types and their minimum and maximum occurrences. Format: ":-" # > Elements that are not specified are only added by random selection. From d8ce5abe92b226fdbaec890870f1723f9c6c85fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Sch=C3=B6ps?= Date: Wed, 2 Oct 2024 11:10:31 +0200 Subject: [PATCH 4/5] Implementation of the suggested changes for the PR #51 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jonathan Schöps --- mindlessgen.toml | 6 +-- src/mindlessgen/cli/cli_parser.py | 7 ---- .../molecules/generate_molecule.py | 3 -- src/mindlessgen/prog/config.py | 19 ---------- test/fixtures/example_config.toml | 4 +- .../test_config/test_config_set_attributes.py | 3 -- test/test_config/test_load_from_toml.py | 1 - test/test_generate/test_generate_molecule.py | 38 +++++++++++++++---- 8 files changed, 34 insertions(+), 47 deletions(-) diff --git a/mindlessgen.toml b/mindlessgen.toml index 5d49bcd..d6501e3 100644 --- a/mindlessgen.toml +++ b/mindlessgen.toml @@ -25,13 +25,11 @@ min_num_atoms = 5 max_num_atoms = 10 # > Initial coordinate scaling factor. Options: init_scaling = 3.0 -# > Increase in the coordinate scaling factor per trial after dist_threshold was not met. Options: +# > Increase in the coordinate scaling factor per trial after check_distance was not met. Options: increase_scaling_factor = 1.3 -# > Distance threshold for the inital, randomly generated coordinates. Options: -dist_threshold = 1.2 # > Scaling factor for the employed van der Waals radii. Options: scale_vdw_radii = 1.3333 -# > Scaling factor for the minimal bondlength based on the sum of the can der Waals radii. Options: +# > Scaling factor for the minimal bondlength based on the sum of the van der Waals radii. Options: scale_minimal_bondlength = 0.75 # > Atom types and their minimum and maximum occurrences. Format: ":-" # > Elements that are not specified are only added by random selection. diff --git a/src/mindlessgen/cli/cli_parser.py b/src/mindlessgen/cli/cli_parser.py index 8e3abf9..2a8e345 100644 --- a/src/mindlessgen/cli/cli_parser.py +++ b/src/mindlessgen/cli/cli_parser.py @@ -125,12 +125,6 @@ def cli_parser(argv: Sequence[str] | None = None) -> dict: help="Factor with which the coordinate scaling factor is increased " + "after a failed attempt.", ) - parser.add_argument( - "--dist-threshold", - type=float, - required=False, - help="Distance threshold for generating coordinates.", - ) parser.add_argument( "--element-composition", type=str, @@ -275,7 +269,6 @@ def cli_parser(argv: Sequence[str] | None = None) -> dict: "max_num_atoms": args_dict["max_num_atoms"], "init_coord_scaling": args_dict["init_coord_scaling"], "increase_scaling_factor": args_dict["increase_scaling_factor"], - "dist_threshold": args_dict["dist_threshold"], "element_composition": args_dict["element_composition"], "forbidden_elements": args_dict["forbidden_elements"], "scale_vdw_radii": args_dict["scale_vdw_radii"], diff --git a/src/mindlessgen/molecules/generate_molecule.py b/src/mindlessgen/molecules/generate_molecule.py index 33f4442..0f0eac4 100644 --- a/src/mindlessgen/molecules/generate_molecule.py +++ b/src/mindlessgen/molecules/generate_molecule.py @@ -382,9 +382,6 @@ def check_distances(xyz: np.ndarray, ati: np.ndarray, scale_bondlength: float) - sum_radii = get_cov_radii(ati[i], COV_RADII) + get_cov_radii( ati[j], COV_RADII ) - print( - f"scaled bondlength: {scale_bondlength}, sum_radii: {sum_radii}, scaled sum_radii: {scale_bondlength * sum_radii}" - ) if r < scale_bondlength * sum_radii: return False return True diff --git a/src/mindlessgen/prog/config.py b/src/mindlessgen/prog/config.py index d6b34e9..6f32e1a 100644 --- a/src/mindlessgen/prog/config.py +++ b/src/mindlessgen/prog/config.py @@ -175,7 +175,6 @@ def __init__(self: GenerateConfig) -> None: self._min_num_atoms: int = 2 self._max_num_atoms: int = 100 self._init_coord_scaling: float = 3.0 - self._dist_threshold: float = 1.2 self._increase_scaling_factor: float = 1.3 self._element_composition: dict[int, tuple[int | None, int | None]] = {} self._forbidden_elements: list[int] | None = None @@ -239,24 +238,6 @@ def init_coord_scaling(self, init_coord_scaling: float): raise ValueError("Initial coordinate scaling should be greater than 0.") self._init_coord_scaling = init_coord_scaling - @property - def dist_threshold(self): - """ - Get the distance threshold. - """ - return self._dist_threshold - - @dist_threshold.setter - def dist_threshold(self, dist_threshold: float): - """ - Set the distance threshold. - """ - if not isinstance(dist_threshold, float): - raise TypeError("Distance threshold should be a float.") - if dist_threshold <= 0: - raise ValueError("Distance threshold should be greater than 0.") - self._dist_threshold = dist_threshold - @property def increase_scaling_factor(self): """ diff --git a/test/fixtures/example_config.toml b/test/fixtures/example_config.toml index 49f6ea4..2b3f4e4 100644 --- a/test/fixtures/example_config.toml +++ b/test/fixtures/example_config.toml @@ -23,10 +23,8 @@ min_num_atoms = 2 max_num_atoms = 100 # > Initial coordinate scaling factor. Options: init_scaling = 3.0 -# > Increase in the coordinate scaling factor per trial after dist_threshold was not met. Options: +# > Increase in the coordinate scaling factor per trial after check_distance was not met. Options: increase_scaling_factor = 1.3 -# > Distance threshold for the inital, randomly generated coordinates. Options: -dist_threshold = 1.2 # > Atom types and their minimum and maximum occurrences. Format: ":-" # > Elements that are not specified are only added by random selection. # > A star sign (*) can be used as a wildcard for integer value. diff --git a/test/test_config/test_config_set_attributes.py b/test/test_config/test_config_set_attributes.py index d52ba5d..1505ba0 100644 --- a/test/test_config/test_config_set_attributes.py +++ b/test/test_config/test_config_set_attributes.py @@ -65,8 +65,6 @@ def test_general_config_default_values(property_name, initial_value): ("max_num_atoms", 80, None, TypeError), ("init_coord_scaling", 1.0, -0.5, ValueError), ("init_coord_scaling", 1.0, "1.0", TypeError), - ("dist_threshold", 1.5, -1.0, ValueError), - ("dist_threshold", 1.5, "1.5", TypeError), ("increase_scaling_factor", 1.1, 0.0, ValueError), ("increase_scaling_factor", 1.1, "1.1", TypeError), ], @@ -115,7 +113,6 @@ def test_generate_config_element_composition( ("min_num_atoms", 2), ("max_num_atoms", 100), ("init_coord_scaling", 3.0), - ("dist_threshold", 1.2), ("increase_scaling_factor", 1.3), ("element_composition", {}), ("forbidden_elements", None), diff --git a/test/test_config/test_load_from_toml.py b/test/test_config/test_load_from_toml.py index 7a10d41..0548412 100644 --- a/test/test_config/test_load_from_toml.py +++ b/test/test_config/test_load_from_toml.py @@ -36,7 +36,6 @@ def test_load_generate_config(config_manager): assert config_manager.generate.max_num_atoms == 100 assert config_manager.generate.init_coord_scaling == 3.0 assert config_manager.generate.increase_scaling_factor == 1.3 - assert config_manager.generate.dist_threshold == 1.2 assert config_manager.generate.element_composition == { 5: (2, 10), # Carbon (C) 0: (10, 20), # Hydrogen (H) diff --git a/test/test_generate/test_generate_molecule.py b/test/test_generate/test_generate_molecule.py index aa35545..df96b3a 100644 --- a/test/test_generate/test_generate_molecule.py +++ b/test/test_generate/test_generate_molecule.py @@ -225,49 +225,70 @@ def test_generate_coordinates() -> None: np.array([0, 0]), 0.5, True, - "Two atoms with distance greater than threshold (1.0 > 0.5)", + "Two Hydrogenes with distance greater than threshold (1.0 > 0.5)", ), ( np.array([[0.0, 0.0, 0.0], [0.4, 0.0, 0.0]]), np.array([0, 0]), 0.75, False, - "Two atoms with distance less than threshold (0.4 < 0.5)", + "Two Hydrogenes with distance less than threshold (0.4 < 0.5)", ), ( np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [2.0, 0.0, 0.0]]), np.array([0, 0, 0]), 1.0, True, - "Three atoms in a line with distances greater than threshold", + "Three Hydrogenes in a line with distances greater than threshold", ), ( np.array([[0.0, 0.0, 0.0], [0.4, 0.0, 0.0], [1.0, 0.0, 0.0]]), np.array([0, 0, 0]), 0.75, False, - "Three atoms with one pair close together: distance between first two is less than threshold", + "Three Hydrogenes with one pair close together: distance between first two is less than threshold", ), ( np.array([[0.0, 0.0, 0.0]]), np.array([0]), 0.5, True, - "Single atom, no distances to compare", + "Single Hydrogene, no distances to compare", ), ( np.array([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]), np.array([0, 0]), 0.75, False, - "Two atoms at identical positions: distance is zero, less than threshold", + "Two Hydrogenes at identical positions: distance is zero, less than threshold", ), ( np.array([[0.0, 0.0, 0.0], [1.0, 1.0, 1.0]]), np.array([0, 0]), 2.70625, True, - "Two atoms with diagonal distance just above threshold (sqrt(3) ≈ 1.732, 1.7323/0.64 = 2.70625)(0.64 = sum of covalent radii for H)", + "Two Hydrogenes with diagonal distance just above threshold (sqrt(3) ≈ 1.732, 1.7323/0.64 = 2.70625)(0.64 = sum of covalent radii for H)", + ), + ( + np.array([[0.0, 0.0, 0.0], [2.3, 0.0, 0.0]]), + np.array([18, 8]), + 0.9, + True, + "Potassium plus flourine with distance greater than threshold (r = 2.3, scaled_minimal_bondlength = 2.16)", + ), + ( + np.array([[0.0, 0.0, 0.0], [2.3, 0.0, 0.0]]), + np.array([18, 8]), + 2.0, + False, + "Potassium plus flourine with distance less than threshold (r = 2.3, scaled_minimal_bondlength = 4.8)", + ), + ( + np.array([[0.0, 0.0, 0.0], [2.3, 0.0, 0.0]]), + np.array([18, 16]), + 0.9, + False, + "Potassium plus chlorine with distance less than threshold (r = 2.3, scaled_minimal_bondlength = 2.61)", ), ], ids=[ @@ -278,6 +299,9 @@ def test_generate_coordinates() -> None: "single_atom", "two_identical", "diagonal_distance", + "different_elements_apart", + "different_elements_scaled_close", + "different_elements_close", ], ) def test_check_distances(xyz, ati, scale_minimal_bondlength, expected, description): From 51e38407742137d7ebc43da123cd63f63d87c001 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Sch=C3=B6ps?= Date: Wed, 2 Oct 2024 11:39:45 +0200 Subject: [PATCH 5/5] update to the CHAMGELOG.md file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jonathan Schöps --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ba63171..a00ab05 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Support for the novel "g-xTB" method (working title: GP3-xTB) +### Breaking Changes +- Removal of the `dist_threshold` flag and in the `-toml` file. + ## [0.4.0] - 2024-09-19 ### Changed - Default file name of `.xyz` file contains prefix `mlm_`