From 3138f6770dd88d5f8dc63388f4c32a0afc382af1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Sch=C3=B6ps?= Date: Tue, 8 Oct 2024 13:00:41 +0200 Subject: [PATCH 1/8] Update on ReadMe.md and clearification with the different scaling factors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jonathan Schöps --- CHANGELOG.md | 2 + README.md | 5 +++ mindlessgen.toml | 8 ++-- src/mindlessgen/cli/cli_parser.py | 12 +++--- .../molecules/generate_molecule.py | 14 ++++--- src/mindlessgen/molecules/refinement.py | 2 +- src/mindlessgen/prog/config.py | 42 +++++++++---------- 7 files changed, 47 insertions(+), 38 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 098031b..37f8dce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - vdW radii scaling parameter can now be adjusted via `mindlessgen.toml` or CLI - The check_distance function now checks based on the sum of the van der Waals radii and a scaling factor acessible via `mindlessgen.toml` or CLI - better type hints for `Callables` +- A more clear difference between the different scaling factors for the van der Waals radii. +- Update in the `ReadMe.md` which explains more detaild the element composition function. ### Fixed - Unit conversion for (currenly unused) vdW radii from the original Fortran project diff --git a/README.md b/README.md index 050eca8..abd9cc8 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,11 @@ If the path is not specified with `-c/--config`, `mindlessgen.toml` will be sear The active configuration can be printed using `--print-config`. +### Element composition +Here is a little explanation for the usage of the `element composition` module. There are two different parameters to set. First which elements should occure within the generated molecule, and second is how much of each specific element should occure within the generated molecule. +- Example 1: C:1-3, O:1-1, H:1-*. This example would result in a Molecule with 1,2 or 3 carbon atoms, exactly 1 oxygen atom, and between 1 and a random number of hydrogen atoms. +- Example 2: Na:10-10, In:10-10, O:20-20. This examle would result in a molecule with exactly 10 sodium atoms 10 indium atoms and 20 oxygen atoms. For this case the number of atoms (40) has to be within the min_num_atoms and max_num_atom intervall, but would always return a molecule with 40 atoms. + ## Citation When using the program for academic purposes, please cite: diff --git a/mindlessgen.toml b/mindlessgen.toml index 9cb79b5..54dc6a3 100644 --- a/mindlessgen.toml +++ b/mindlessgen.toml @@ -27,10 +27,10 @@ max_num_atoms = 10 init_scaling = 3.0 # > Increase in the coordinate scaling factor per trial after check_distance was not met. Options: increase_scaling_factor = 1.3 -# > Scaling factor for the employed van der Waals radii. Options: -scale_vdw_radii = 1.25 -# > Scaling factor for the minimal bondlength based on the sum of the van der Waals radii. Options: -scale_minimal_bondlength = 0.8 +# > Scaling factor for the fragment detection for a molecule based on the sum of van der Waals radii. Options: +scale_fragment_detection = 1.25 +# > Scaling factor for the minimal diatance between two atoms based on the sum of the van der Waals radii. Options: +scale_minimal_distance = 0.8 # > Contract the coordinates after the initial generation. Leads to more cluster-like and less extended structures. Options: contract_coords = false # > Atom types and their minimum and maximum occurrences. Format: ":-" diff --git a/src/mindlessgen/cli/cli_parser.py b/src/mindlessgen/cli/cli_parser.py index 8480108..47c8484 100644 --- a/src/mindlessgen/cli/cli_parser.py +++ b/src/mindlessgen/cli/cli_parser.py @@ -87,16 +87,16 @@ def cli_parser(argv: Sequence[str] | None = None) -> dict: help="Do not write the molecules to xyz files.", ) parser.add_argument( - "--scale-vdw-radii", + "--scale-fragment-detection", type=float, required=False, - help="Scaling factor for van der Waals radii.", + help="Scaling factor for the fragment detection based on the van der Waals radii.", ) parser.add_argument( - "--scale-minimal-bondlength", + "--scale-minimal-distance", type=float, required=False, - help="Minimum bond length scaling factor.", + help="Minimum atom distance scaling factor.", ) ### Molecule generation arguments ### @@ -277,8 +277,8 @@ def cli_parser(argv: Sequence[str] | None = None) -> dict: "increase_scaling_factor": args_dict["increase_scaling_factor"], "element_composition": args_dict["element_composition"], "forbidden_elements": args_dict["forbidden_elements"], - "scale_vdw_radii": args_dict["scale_vdw_radii"], - "scale_minimal_bondlength": args_dict["scale_minimal_bondlength"], + "scale_fragment_detection": args_dict["scale_fragment_detection"], + "scale_minimal_distance": args_dict["scale_minimal_distance"], "contract_coords": args_dict["contract_coords"], } # XTB specific arguments diff --git a/src/mindlessgen/molecules/generate_molecule.py b/src/mindlessgen/molecules/generate_molecule.py index 9de4d88..96d0e20 100644 --- a/src/mindlessgen/molecules/generate_molecule.py +++ b/src/mindlessgen/molecules/generate_molecule.py @@ -39,13 +39,13 @@ def generate_random_molecule( scaling=config_generate.init_coord_scaling, inc_scaling_factor=config_generate.increase_scaling_factor, verbosity=verbosity, - scale_bondlength=config_generate.scale_minimal_bondlength, + scale_minimal_distance=config_generate.scale_minimal_distance, ) if config_generate.contract_coords: mol.xyz = contract_coordinates( xyz=mol.xyz, ati=mol.ati, - scale_minimal_distance=config_generate.scale_minimal_bondlength, + scale_minimal_distance=config_generate.scale_minimal_distance, ) mol.charge, mol.uhf = set_random_charge(mol.ati, verbosity) mol.set_name_from_formula() @@ -331,7 +331,7 @@ def generate_coordinates( scaling: float, inc_scaling_factor: float = 1.3, verbosity: int = 1, - scale_bondlength: float = 0.75, + scale_minimal_distance: float = 0.8, ) -> tuple[np.ndarray, np.ndarray]: """ Generate random coordinates for a molecule. @@ -342,7 +342,7 @@ def generate_coordinates( xyz, ati = generate_random_coordinates(at) xyz = xyz * eff_scaling # do while check_distances is False - while not check_distances(xyz, ati, scale_bondlength=scale_bondlength): + while not check_distances(xyz, ati, scale_minimal_distance=scale_minimal_distance): if verbosity > 1: print( f"Distance check failed. Increasing expansion factor by {inc_scaling_factor}..." @@ -410,7 +410,9 @@ def contract_coordinates( return xyz -def check_distances(xyz: np.ndarray, ati: np.ndarray, scale_bondlength: float) -> bool: +def check_distances( + xyz: np.ndarray, ati: np.ndarray, scale_minimal_distance: float +) -> bool: """ Check if the distances between atoms are larger than a threshold. """ @@ -421,6 +423,6 @@ def check_distances(xyz: np.ndarray, ati: np.ndarray, scale_bondlength: float) - sum_radii = get_cov_radii(ati[i], COV_RADII) + get_cov_radii( ati[j], COV_RADII ) - if r < scale_bondlength * sum_radii: + if r < scale_minimal_distance * sum_radii: return False return True diff --git a/src/mindlessgen/molecules/refinement.py b/src/mindlessgen/molecules/refinement.py index 291c1a9..3f406de 100644 --- a/src/mindlessgen/molecules/refinement.py +++ b/src/mindlessgen/molecules/refinement.py @@ -50,7 +50,7 @@ def iterative_optimization( # Detect fragments from the optimized molecule fragmols = detect_fragments( mol=rev_mol, - vdw_scaling=config_generate.scale_vdw_radii, + vdw_scaling=config_generate.scale_fragment_detection, verbosity=verbosity, ) diff --git a/src/mindlessgen/prog/config.py b/src/mindlessgen/prog/config.py index 98688ab..6bd0020 100644 --- a/src/mindlessgen/prog/config.py +++ b/src/mindlessgen/prog/config.py @@ -178,8 +178,8 @@ def __init__(self: GenerateConfig) -> None: self._increase_scaling_factor: float = 1.3 self._element_composition: dict[int, tuple[int | None, int | None]] = {} self._forbidden_elements: list[int] | None = None - self._scale_vdw_radii: float = 1.25 - self._scale_minimal_bondlength: float = 0.8 + self._scale_fragment_detection: float = 1.25 + self._scale_minimal_distance: float = 0.8 self._contract_coords: bool = False def get_identifier(self) -> str: @@ -349,40 +349,40 @@ def forbidden_elements(self: GenerateConfig, forbidden_str: str) -> None: self._forbidden_elements = sorted(list(forbidden_set)) @property - def scale_vdw_radii(self): + def scale_fragment_detection(self): """ - Get the scaling factor for van der Waals radii. + Get the scaling factor for the fracment detection based on the van der Waals radii. """ - return self._scale_vdw_radii + return self._scale_fragment_detection - @scale_vdw_radii.setter - def scale_vdw_radii(self, scale_vdw_radii: float): + @scale_fragment_detection.setter + def scale_fragment_detection(self, scale_fragment_detection: float): """ Set the scaling factor for van der Waals radii. """ - if not isinstance(scale_vdw_radii, float): + if not isinstance(scale_fragment_detection, float): raise TypeError("Scale van der Waals radii should be a float.") - if scale_vdw_radii <= 0: + if scale_fragment_detection <= 0: raise ValueError("Scale van der Waals radii should be greater than 0.") - self._scale_vdw_radii = scale_vdw_radii + self._scale_fragment_detection = scale_fragment_detection @property - def scale_minimal_bondlength(self): + def scale_minimal_distance(self): """ - Get the scaling factor for minimal bond length. + Get the scaling factor for minimal distance between two atoms. """ - return self._scale_minimal_bondlength + return self._scale_minimal_distance - @scale_minimal_bondlength.setter - def scale_minimal_bondlength(self, scale_minimal_bondlength: float): + @scale_minimal_distance.setter + def scale_minimal_distance(self, scale_minimal_distance: float): """ - Set the scaling factor for minimal bond length. + Set the scaling factor for minimal distance between two atoms. """ - if not isinstance(scale_minimal_bondlength, float): - raise TypeError("Scale minimal bond length should be a float.") - if scale_minimal_bondlength <= 0: - raise ValueError("Scale minimal bond length should be greater than 0.") - self._scale_minimal_bondlength = scale_minimal_bondlength + if not isinstance(scale_minimal_distance, float): + raise TypeError("Scale minimal distance should be a float.") + if scale_minimal_distance <= 0: + raise ValueError("Scale minimal distance should be greater than 0.") + self._scale_minimal_distance = scale_minimal_distance @property def contract_coords(self): From b82a944b68161bd8fedbcb1e788f38a8566ea293 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Sch=C3=B6ps?= <106986430+jonathan-schoeps@users.noreply.github.com> Date: Tue, 8 Oct 2024 13:40:47 +0200 Subject: [PATCH 2/8] Update CHANGELOG.md Co-authored-by: Marcel Mueller --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 37f8dce..ca4a4e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - vdW radii scaling parameter can now be adjusted via `mindlessgen.toml` or CLI - The check_distance function now checks based on the sum of the van der Waals radii and a scaling factor acessible via `mindlessgen.toml` or CLI - better type hints for `Callables` -- A more clear difference between the different scaling factors for the van der Waals radii. +- A clearer differentiation between the distinct scaling factors for the van der Waals radii. - Update in the `ReadMe.md` which explains more detaild the element composition function. ### Fixed From 125e5a0d8ced07a0e154bea0e00e509718f85bf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Sch=C3=B6ps?= <106986430+jonathan-schoeps@users.noreply.github.com> Date: Tue, 8 Oct 2024 13:40:59 +0200 Subject: [PATCH 3/8] Update CHANGELOG.md Co-authored-by: Marcel Mueller --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ca4a4e7..497801c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - The check_distance function now checks based on the sum of the van der Waals radii and a scaling factor acessible via `mindlessgen.toml` or CLI - better type hints for `Callables` - A clearer differentiation between the distinct scaling factors for the van der Waals radii. -- Update in the `ReadMe.md` which explains more detaild the element composition function. +- `README.md` with more detailed explanation of the element composition function. ### Fixed - Unit conversion for (currenly unused) vdW radii from the original Fortran project From f5074eef8c7a43bc1cf82089de6cc8d93d6ca432 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Sch=C3=B6ps?= <106986430+jonathan-schoeps@users.noreply.github.com> Date: Tue, 8 Oct 2024 13:41:10 +0200 Subject: [PATCH 4/8] Update mindlessgen.toml Co-authored-by: Marcel Mueller --- mindlessgen.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindlessgen.toml b/mindlessgen.toml index 54dc6a3..fc2569b 100644 --- a/mindlessgen.toml +++ b/mindlessgen.toml @@ -27,7 +27,7 @@ max_num_atoms = 10 init_scaling = 3.0 # > Increase in the coordinate scaling factor per trial after check_distance was not met. Options: increase_scaling_factor = 1.3 -# > Scaling factor for the fragment detection for a molecule based on the sum of van der Waals radii. Options: +# > Scaling factor for the van der Waals radii employed for the fragment detection. Options: scale_fragment_detection = 1.25 # > Scaling factor for the minimal diatance between two atoms based on the sum of the van der Waals radii. Options: scale_minimal_distance = 0.8 From 9e3c2b40cfbf0fa284490dff5214fbb7f8f06690 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Sch=C3=B6ps?= <106986430+jonathan-schoeps@users.noreply.github.com> Date: Tue, 8 Oct 2024 13:41:21 +0200 Subject: [PATCH 5/8] Update README.md Co-authored-by: Marcel Mueller --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index abd9cc8..92b2e0e 100644 --- a/README.md +++ b/README.md @@ -85,7 +85,9 @@ If the path is not specified with `-c/--config`, `mindlessgen.toml` will be sear The active configuration can be printed using `--print-config`. ### Element composition -Here is a little explanation for the usage of the `element composition` module. There are two different parameters to set. First which elements should occure within the generated molecule, and second is how much of each specific element should occure within the generated molecule. +There are two related aspects of the element composition: +1. _Which elements_ should occur within the generated molecule? +2. **How many atoms** of the specified element should occur? - Example 1: C:1-3, O:1-1, H:1-*. This example would result in a Molecule with 1,2 or 3 carbon atoms, exactly 1 oxygen atom, and between 1 and a random number of hydrogen atoms. - Example 2: Na:10-10, In:10-10, O:20-20. This examle would result in a molecule with exactly 10 sodium atoms 10 indium atoms and 20 oxygen atoms. For this case the number of atoms (40) has to be within the min_num_atoms and max_num_atom intervall, but would always return a molecule with 40 atoms. From 9c61361e858bd89b1c1b580b6e86cd1f2bebe514 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Sch=C3=B6ps?= <106986430+jonathan-schoeps@users.noreply.github.com> Date: Tue, 8 Oct 2024 13:41:34 +0200 Subject: [PATCH 6/8] Update README.md Co-authored-by: Marcel Mueller --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 92b2e0e..4e04be9 100644 --- a/README.md +++ b/README.md @@ -88,8 +88,8 @@ The active configuration can be printed using `--print-config`. There are two related aspects of the element composition: 1. _Which elements_ should occur within the generated molecule? 2. **How many atoms** of the specified element should occur? -- Example 1: C:1-3, O:1-1, H:1-*. This example would result in a Molecule with 1,2 or 3 carbon atoms, exactly 1 oxygen atom, and between 1 and a random number of hydrogen atoms. -- Example 2: Na:10-10, In:10-10, O:20-20. This examle would result in a molecule with exactly 10 sodium atoms 10 indium atoms and 20 oxygen atoms. For this case the number of atoms (40) has to be within the min_num_atoms and max_num_atom intervall, but would always return a molecule with 40 atoms. +- **Example 1**: `C:1-3, O:1-1, H:1-*` would result in a molecule with 1, 2, or 3 carbon atoms, exactly 1 oxygen atom, and between 1 and an undefined number of hydrogen atoms (i.e., at least 1). +- **Example 2**: `Na:10-10, In:10-10, O:20-20`. This example would result in a molecule with exactly 10 sodium atoms, 10 indium atoms, and 20 oxygen atoms. For a fixed element composition, the number of atoms (40) has to be within the min_num_atoms and max_num_atom interval. `mindlessgen` will consequently always return a molecule with exactly 40 atoms. ## Citation From 2255b0064611678e862729ff24d6d1df4f14c62b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Sch=C3=B6ps?= <106986430+jonathan-schoeps@users.noreply.github.com> Date: Tue, 8 Oct 2024 13:41:42 +0200 Subject: [PATCH 7/8] Update mindlessgen.toml Co-authored-by: Marcel Mueller --- mindlessgen.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindlessgen.toml b/mindlessgen.toml index fc2569b..0dc56bd 100644 --- a/mindlessgen.toml +++ b/mindlessgen.toml @@ -29,7 +29,7 @@ init_scaling = 3.0 increase_scaling_factor = 1.3 # > Scaling factor for the van der Waals radii employed for the fragment detection. Options: scale_fragment_detection = 1.25 -# > Scaling factor for the minimal diatance between two atoms based on the sum of the van der Waals radii. Options: +# > Scaling factor for the minimal distance between two atoms based on the sum of the van der Waals radii. Options: scale_minimal_distance = 0.8 # > Contract the coordinates after the initial generation. Leads to more cluster-like and less extended structures. Options: contract_coords = false From efcd3c7a06fde00f166c78bbc6bdab6e05fa5a5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Sch=C3=B6ps?= Date: Tue, 8 Oct 2024 14:28:20 +0200 Subject: [PATCH 8/8] A sentence is now bolt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jonathan Schöps --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4e04be9..307ca17 100644 --- a/README.md +++ b/README.md @@ -89,7 +89,7 @@ There are two related aspects of the element composition: 1. _Which elements_ should occur within the generated molecule? 2. **How many atoms** of the specified element should occur? - **Example 1**: `C:1-3, O:1-1, H:1-*` would result in a molecule with 1, 2, or 3 carbon atoms, exactly 1 oxygen atom, and between 1 and an undefined number of hydrogen atoms (i.e., at least 1). -- **Example 2**: `Na:10-10, In:10-10, O:20-20`. This example would result in a molecule with exactly 10 sodium atoms, 10 indium atoms, and 20 oxygen atoms. For a fixed element composition, the number of atoms (40) has to be within the min_num_atoms and max_num_atom interval. `mindlessgen` will consequently always return a molecule with exactly 40 atoms. +- **Example 2**: `Na:10-10, In:10-10, O:20-20`. This example would result in a molecule with exactly 10 sodium atoms, 10 indium atoms, and 20 oxygen atoms. **For a fixed element composition, the number of atoms (40) has to be within the min_num_atoms and max_num_atom interval.** `mindlessgen` will consequently always return a molecule with exactly 40 atoms. ## Citation