diff --git a/src/quacc/atoms/skzcam.py b/src/quacc/atoms/skzcam.py index bd0d8576f9..ff75f6a0c9 100644 --- a/src/quacc/atoms/skzcam.py +++ b/src/quacc/atoms/skzcam.py @@ -17,424 +17,390 @@ from ase.atom import Atom from numpy.typing import NDArray - from quacc.types import BlockInfo, ElementInfo, ElementStr, MultiplicityDict + from quacc.types import ( + BlockInfo, + ElementInfo, + ElementStr, + MultiplicityDict, + SKZCAMOutput, + ) has_chemshell = find_spec("chemsh") is not None -def create_mrcc_eint_blocks( - embedded_adsorbed_cluster: Atoms, - quantum_cluster_indices: list[int], - ecp_region_indices: list[int], - element_info: dict[ElementStr, ElementInfo] | None = None, - include_cp: bool = True, - multiplicities: MultiplicityDict | None = None, -) -> BlockInfo: +class MRCCInputGenerator: """ - Creates the orcablocks input for the MRCC ASE calculator. + A class to generate the SKZCAM input for the MRCC ASE calculator. - Parameters + Attributes ---------- - embedded_adsorbed_cluster - The ASE Atoms object containing the atomic coordinates and atomic charges from the .pun file, as well as the atom type. This object is created by the [quacc.atoms.skzcam.create_skzcam_clusters][] function. + adsorbate_slab_embedded_cluster + The ASE Atoms object containing the atomic coordinates and atomic charges from the .pun file, as well as the atom type. This object is created within the [quacc.atoms.skzcam.CreateSKZCAMClusters][] class. quantum_cluster_indices - A list containing the indices of the atoms in each quantum cluster. These indices are provided by the [quacc.atoms.skzcam.create_skzcam_clusters][] function. + A list containing the indices of the atoms in one quantum cluster. These indices are created within the [quacc.atoms.skzcam.CreateSKZCAMClusters][] class. ecp_region_indices - A list containing the indices of the atoms in each ECP region. These indices are provided by the [quacc.atoms.skzcam.create_skzcam_clusters][] function. + A list containing the indices of the atoms in one ECP region. These indices are provided by the [quacc.atoms.skzcam.CreateSKZCAMClusters][] class. element_info A dictionary with elements as keys which gives the (1) number of core electrons as 'core', (2) basis set as 'basis', (3) effective core potential as 'ecp', (4) resolution-of-identity/density-fitting auxiliary basis set for DFT/HF calculations as 'ri_scf_basis' and (5) resolution-of-identity/density-fitting for correlated wave-function methods as 'ri_cwft_basis'. include_cp - If True, the coords strings will include the counterpoise correction for the adsorbate and slab. + If True, the coords strings will include the counterpoise correction (i.e., ghost atoms) for the adsorbate and slab. multiplicities The multiplicity of the adsorbate-slab complex, adsorbate and slab respectively, with the keys 'adsorbate_slab', 'adsorbate', and 'slab'. - - Returns - ------- - BlockInfo - The ORCA input block (to be put in 'orcablocks' parameter) as a string for the adsorbate-slab complex, the adsorbate, and the slab in a dictionary with the keys 'adsorbate_slab', 'adsorbate', and 'slab' respectively. + adsorbate_slab_cluster + The ASE Atoms object for the quantum cluster of the adsorbate-slab complex. + ecp_region + The ASE Atoms object for the ECP region. + adsorbate_indices + The indices of the adsorbates from the adsorbate_slab_cluster quantum cluster. + slab_indices + The indices of the slab from the adsorbate_slab_cluster quantum cluster. + The ECP region cluster. + adsorbate_cluster + The ASE Atoms object for the quantum cluster of the adsorbate. + slab_cluster + The ASE Atoms object for the quantum cluster of the slab. + mrccblocks + The MRCC input block (to be put in 'mrccblocks' parameter) as a string for the adsorbate-slab complex, the adsorbate, and the slab in a dictionary with the keys 'adsorbate_slab', 'adsorbate', and 'slab' respectively. """ - # Create the blocks for the basis sets (basis, basis_sm, dfbasis_scf, dfbasis_cor, ecp) - basis_ecp_block = generate_mrcc_basis_ecp_block( - embedded_adsorbed_cluster=embedded_adsorbed_cluster, - quantum_cluster_indices=quantum_cluster_indices, - ecp_region_indices=ecp_region_indices, - element_info=element_info, - include_cp=include_cp, - ) - - # Create the blocks for the coordinates - coords_block = generate_mrcc_coords_block( - embedded_adsorbed_cluster=embedded_adsorbed_cluster, - quantum_cluster_indices=quantum_cluster_indices, - ecp_region_indices=ecp_region_indices, - element_info=element_info, - include_cp=include_cp, - multiplicities=multiplicities, - ) - - # Create the point charge block - point_charge_block = generate_mrcc_point_charge_block( - embedded_adsorbed_cluster=embedded_adsorbed_cluster, - quantum_cluster_indices=quantum_cluster_indices, - ecp_region_indices=ecp_region_indices, - ) + def __init__( + self, + adsorbate_slab_embedded_cluster: Atoms, + quantum_cluster_indices: list[int], + ecp_region_indices: list[int], + element_info: dict[ElementStr, ElementInfo] | None = None, + include_cp: bool = True, + multiplicities: MultiplicityDict | None = None, + ) -> None: + """ + Parameters + ---------- + adsorbate_slab_embedded_cluster + The ASE Atoms object containing the atomic coordinates and atomic charges from the .pun file, as well as the atom type. This object is created within the [quacc.atoms.skzcam.CreateSKZCAMClusters][] class. + quantum_cluster_indices + A list containing the indices of the atoms in one quantum cluster. These indices are created within the [quacc.atoms.skzcam.CreateSKZCAMClusters][] class. + ecp_region_indices + A list containing the indices of the atoms in the corresponding ECP region of one quantum cluster. These indices are provided by the [quacc.atoms.skzcam.CreateSKZCAMClusters][] class. + element_info + A dictionary with elements as keys which gives the (1) number of core electrons as 'core', (2) basis set as 'basis', (3) effective core potential as 'ecp', (4) resolution-of-identity/density-fitting auxiliary basis set for DFT/HF calculations as 'ri_scf_basis' and (5) resolution-of-identity/density-fitting for correlated wave-function methods as 'ri_cwft_basis'. + include_cp + If True, the coords strings will include the counterpoise correction (i.e., ghost atoms) for the adsorbate and slab. + multiplicities + The multiplicity of the adsorbate-slab complex, adsorbate and slab respectively, with the keys 'adsorbate_slab', 'adsorbate', and 'slab'. + + Returns + ------- + None + """ + + self.adsorbate_slab_embedded_cluster = adsorbate_slab_embedded_cluster + self.quantum_cluster_indices = quantum_cluster_indices + self.ecp_region_indices = ecp_region_indices + self.element_info = element_info + self.include_cp = include_cp + self.multiplicities = ( + {"adsorbate_slab": 1, "adsorbate": 1, "slab": 1} + if multiplicities is None + else multiplicities + ) - # Combine the blocks - return { - "adsorbate_slab": basis_ecp_block["adsorbate_slab"] - + coords_block["adsorbate_slab"] - + point_charge_block, - "adsorbate": basis_ecp_block["adsorbate"] + coords_block["adsorbate"], - "slab": basis_ecp_block["slab"] + coords_block["slab"] + point_charge_block, - } - - -def generate_mrcc_basis_ecp_block( - embedded_adsorbed_cluster: Atoms, - quantum_cluster_indices: list[int], - ecp_region_indices: list[int], - element_info: dict[ElementStr, ElementInfo] | None = None, - include_cp: bool = True, -) -> BlockInfo: - """ - Generates the basis and ECP block for the MRCC input file. + # Check that none of the indices in quantum_cluster_indices are in ecp_region_indices + if not np.all( + [x not in self.ecp_region_indices for x in self.quantum_cluster_indices] + ): + raise ValueError( + "An atom in the quantum cluster is also in the ECP region." + ) - Parameters - ---------- - embedded_adsorbed_cluster - The ASE Atoms object containing the atomic coordinates and atomic charges from the .pun file. This object is created by the [quacc.atoms.skzcam.create_skzcam_clusters][] function and should contain the atom_types for the adsorbate and slab. - quantum_cluster_indices - A list of lists containing the indices of the atoms in each quantum cluster. These indices are provided by the [quacc.atoms.skzcam.create_skzcam_clusters][] function. - ecp_region_indices - A list of lists containing the indices of the atoms in the ECP region for each quantum cluster. These indices are provided by the [quacc.atoms.skzcam.create_skzcam_clusters][] function. - element_info - A dictionary with elements as keys which gives the (1) number of core electrons as 'core', (2) basis set as 'basis', (3) effective core potential as 'ecp', (4) resolution-of-identity/density-fitting auxiliary basis set for DFT/HF calculations as 'ri_scf_basis' and (5) resolution-of-identity/density-fitting for correlated wave-function methods as 'ri_cwft_basis'. - include_cp - If True, the coords strings will include the counterpoise correction (i.e., ghost atoms) for the adsorbate and slab. + # Create the adsorbate-slab complex quantum cluster and ECP region cluster + self.adsorbate_slab_cluster: Atoms = self.adsorbate_slab_embedded_cluster[ + self.quantum_cluster_indices + ] + self.ecp_region: Atoms = self.adsorbate_slab_embedded_cluster[ + self.ecp_region_indices + ] - Returns - ------- - BlockInfo - The basis and ECP block for the MRCC input file for the adsorbate-slab complex, the adsorbate, and the slab in a dictionary with the keys 'adsorbate_slab', 'adsorbate', and 'slab' respectively. - """ + # Get the indices of the adsorbates from the quantum cluster + self.adsorbate_indices: list[int] = [ + i + for i in range(len(self.adsorbate_slab_cluster)) + if self.adsorbate_slab_cluster.get_array("atom_type")[i] == "adsorbate" + ] + # Get the indices of the slab from the quantum cluster + self.slab_indices: list[int] = [ + i + for i in range(len(self.adsorbate_slab_cluster)) + if self.adsorbate_slab_cluster.get_array("atom_type")[i] != "adsorbate" + ] - # Create the quantum cluster and ECP region cluster - adsorbate_slab_cluster = embedded_adsorbed_cluster[quantum_cluster_indices] - ecp_region = embedded_adsorbed_cluster[ecp_region_indices] - - # Get the indices of the adsorbates from the quantum cluster - adsorbate_indices = [ - i - for i in range(len(adsorbate_slab_cluster)) - if adsorbate_slab_cluster.get_array("atom_type")[i] == "adsorbate" - ] - - # Get the indices of the slab from the quantum cluster - slab_indices = [ - i - for i in range(len(adsorbate_slab_cluster)) - if adsorbate_slab_cluster.get_array("atom_type")[i] != "adsorbate" - ] - - adsorbate_cluster = adsorbate_slab_cluster[adsorbate_indices] - slab_cluster = adsorbate_slab_cluster[slab_indices] - - # Helper to generate basis strings for MRCC - def _create_basis_block(quantum_region, ecp_region=None): - return f""" -basis_sm=atomtype -{create_mrcc_atomtype_basis(quantum_region=quantum_region, ecp_region=ecp_region, element_basis_info={element: 'def2-SVP' for element in element_info})} + # Create the adsorbate and slab quantum clusters + self.adsorbate_cluster: Atoms = self.adsorbate_slab_cluster[ + self.adsorbate_indices + ] + self.slab_cluster: Atoms = self.adsorbate_slab_cluster[self.slab_indices] -basis=atomtype -{create_mrcc_atomtype_basis(quantum_region=quantum_region, ecp_region=ecp_region, element_basis_info={element: element_info[element]['basis'] for element in element_info})} + # Initialize the mrccblocks input strings for the adsorbate-slab complex, adsorbate, and slab + self.mrccblocks: BlockInfo = {"adsorbate_slab": "", "adsorbate": "", "slab": ""} -dfbasis_scf=atomtype -{create_mrcc_atomtype_basis(quantum_region=quantum_region, ecp_region=ecp_region, element_basis_info={element: element_info[element]['ri_scf_basis'] for element in element_info})} + def generate_input(self) -> BlockInfo: + """ + Creates the mrccblocks input for the MRCC ASE calculator. -dfbasis_cor=atomtype -{create_mrcc_atomtype_basis(quantum_region=quantum_region, ecp_region=ecp_region, element_basis_info={element: element_info[element]['ri_cwft_basis'] for element in element_info})} -""" + Returns + ------- + BlockInfo + The MRCC input block (to be put in 'mrccblocks' parameter) as a string for the adsorbate-slab complex, the adsorbate, and the slab in a dictionary with the keys 'adsorbate_slab', 'adsorbate', and 'slab' respectively. + """ - if include_cp: - return { - "adsorbate_slab": _create_basis_block( - quantum_region=adsorbate_slab_cluster, ecp_region=ecp_region - ), - "slab": _create_basis_block( - quantum_region=adsorbate_slab_cluster, ecp_region=ecp_region - ), - "adsorbate": _create_basis_block( - quantum_region=adsorbate_slab_cluster, ecp_region=None - ), - } - else: - return { - "adsorbate_slab": _create_basis_block( - quantum_region=adsorbate_slab_cluster, ecp_region=ecp_region - ), - "slab": _create_basis_block( - quantum_region=slab_cluster, ecp_region=ecp_region - ), - "adsorbate": _create_basis_block( - quantum_region=adsorbate_cluster, ecp_region=None - ), - } + # Create the blocks for the basis sets (basis, basis_sm, dfbasis_scf, dfbasis_cor, ecp) + self._generate_basis_ecp_block() + # Create the blocks for the coordinates + self._generate_coords_block() -def create_mrcc_atomtype_basis( - quantum_region: Atoms, - element_basis_info: dict[ElementStr, str], - ecp_region: Atoms | None = None, -) -> str: - """ - Creates a column for the basis set for each atom in the Atoms object, given by element_info. + # Create the point charge block and add it to the adsorbate-slab complex and slab blocks + point_charge_block = self._generate_point_charge_block() + self.mrccblocks["adsorbate_slab"] += point_charge_block + self.mrccblocks["slab"] += point_charge_block - Parameters - ---------- - quantum_region - The ASE Atoms object containing the atomic coordinates of the quantum cluster region (could be the adsorbate-slab complex, slab or adsorbate by itself). - element_basis_info - A dictionary with elements as keys which gives the basis set for each element. - ecp_region - The ASE atoms object containing the atomic coordinates of the capped ECP region. + # Combine the blocks + return self.mrccblocks - Returns - ------- - str - The basis set for each atom in the Atoms object given as a column (of size N, where N is the number of atoms). - """ + def _generate_basis_ecp_block(self) -> None: + """ + Generates the basis and ECP block for the MRCC input file. - basis_str = "" - for atom in quantum_region: - basis_str += f"{element_basis_info[atom.symbol]}\n" - if ecp_region is not None: - basis_str += "no-basis-set\n" * len(ecp_region) + Returns + ------- + None + """ - return basis_str + # Helper to generate basis strings for MRCC + def _create_basis_block(quantum_region, ecp_region=None): + return f""" +basis_sm=atomtype +{self._create_atomtype_basis(quantum_region=quantum_region, ecp_region=ecp_region, element_basis_info={element: 'def2-SVP' for element in self.element_info})} +basis=atomtype +{self._create_atomtype_basis(quantum_region=quantum_region, ecp_region=ecp_region, element_basis_info={element: self.element_info[element]['basis'] for element in self.element_info})} -def generate_mrcc_coords_block( - embedded_adsorbed_cluster: Atoms, - quantum_cluster_indices: list[int], - ecp_region_indices: list[int], - element_info: ElementInfo, - include_cp: bool = True, - multiplicities: MultiplicityDict | None = None, -) -> BlockInfo: - """ - Generates the coordinates block for the MRCC input file. This includes the coordinates of the quantum cluster, the ECP region, and the point charges. It will return three strings for the adsorbate-slab complex, adsorbate and slab. +dfbasis_scf=atomtype +{self._create_atomtype_basis(quantum_region=quantum_region, ecp_region=ecp_region, element_basis_info={element: self.element_info[element]['ri_scf_basis'] for element in self.element_info})} - Parameters - ---------- - embedded_adsorbed_cluster - The ASE Atoms object containing the atomic coordinates and atomic charges from the .pun file, as well as the atom type. This Atoms object is typically produced from [quacc.atoms.skzcam.create_skzcam_clusters][]. - quantum_cluster_indices - A list containing the indices of the atoms in each quantum cluster. These indices are provided by the [quacc.atoms.skzcam.create_skzcam_clusters][] function. - ecp_region_indices - A list containing the indices of the atoms in each ECP region. These indices are provided by the [quacc.atoms.skzcam.create_skzcam_clusters][] function. - element_info - A dictionary with elements as keys which gives the (1) number of core electrons as 'core', (2) basis set as 'basis', (3) effective core potential as 'ecp', (4) resolution-of-identity/density-fitting auxiliary basis set for DFT/HF calculations as 'ri_scf_basis' and (5) resolution-of-identity/density-fitting for correlated wave-function methods as 'ri_cwft_basis'. - include_cp - If True, the coords strings will include the counterpoise correction for the adsorbate and slab. - multiplicities - The multiplicity of the adsorbate-slab complex, adsorbate and slab respectively, with the keys 'adsorbate_slab', 'adsorbate', and 'slab'. +dfbasis_cor=atomtype +{self._create_atomtype_basis(quantum_region=quantum_region, ecp_region=ecp_region, element_basis_info={element: self.element_info[element]['ri_cwft_basis'] for element in self.element_info})} +""" - Returns - ------- - BlockInfo - The coordinates block for the ORCA input block (to be put in 'orcablocks' parameter) as a string for the adsorbate-slab complex, the adsorbate, and the slab in a dictionary with the keys 'adsorbate_slab', 'adsorbate', and 'slab' respectively. - """ + if self.include_cp: + self.mrccblocks["adsorbate_slab"] += _create_basis_block( + quantum_region=self.adsorbate_slab_cluster, ecp_region=self.ecp_region + ) + self.mrccblocks["slab"] += _create_basis_block( + quantum_region=self.adsorbate_slab_cluster, ecp_region=self.ecp_region + ) + self.mrccblocks["adsorbate"] += _create_basis_block( + quantum_region=self.adsorbate_slab_cluster, ecp_region=None + ) + else: + self.mrccblocks["adsorbate_slab"] += _create_basis_block( + quantum_region=self.adsorbate_slab_cluster, ecp_region=self.ecp_region + ) + self.mrccblocks["slab"] += _create_basis_block( + quantum_region=self.slab_cluster, ecp_region=self.ecp_region + ) + self.mrccblocks["adsorbate"] += _create_basis_block( + quantum_region=self.adsorbate_cluster, ecp_region=None + ) - # Create the quantum cluster and ECP region cluster - if multiplicities is None: - multiplicities = {"adsorbate_slab": 1, "adsorbate": 1, "slab": 1} - quantum_cluster = embedded_adsorbed_cluster[quantum_cluster_indices] - ecp_region = embedded_adsorbed_cluster[ecp_region_indices] - - # Get the indices of the adsorbates from the quantum cluster - adsorbate_indices = [ - i - for i in range(len(quantum_cluster)) - if quantum_cluster.get_array("atom_type")[i] == "adsorbate" - ] - - # Get the indices of the slab from the quantum cluster - slab_indices = [ - i - for i in range(len(quantum_cluster)) - if quantum_cluster.get_array("atom_type")[i] != "adsorbate" - ] - - # Get the charge of the quantum cluster - charge = int(sum(quantum_cluster.get_array("oxi_states"))) - - # Get the total number of core electrons for the quantum cluster - core = { - "adsorbate_slab": sum( - [element_info[atom.symbol]["core"] for atom in quantum_cluster] - ), - "adsorbate": sum( - [ - element_info[atom.symbol]["core"] - for atom in quantum_cluster - if atom.index in adsorbate_indices - ] - ), - "slab": sum( - [ - element_info[atom.symbol]["core"] - for atom in quantum_cluster - if atom.index in slab_indices - ] - ), - } + def _create_atomtype_basis( + self, + quantum_region: Atoms, + element_basis_info: dict[ElementStr, str], + ecp_region: Atoms | None = None, + ) -> str: + """ + Creates a column for the basis set for each atom in the Atoms object, given by element_info. + + Parameters + ---------- + quantum_region + The ASE Atoms object containing the atomic coordinates of the quantum cluster region (could be the adsorbate-slab complex, slab or adsorbate by itself). + element_basis_info + A dictionary with elements as keys which gives the basis set for each element. + ecp_region + The ASE atoms object containing the atomic coordinates of the capped ECP region. + + Returns + ------- + str + The basis set for each atom in the Atoms object given as a column (of size N, where N is the number of atoms). + """ + + basis_str = "" + for atom in quantum_region: + basis_str += f"{element_basis_info[atom.symbol]}\n" + if ecp_region is not None: + basis_str += "no-basis-set\n" * len(ecp_region) + + return basis_str + + def _generate_coords_block(self) -> None: + """ + Generates the coordinates block for the MRCC input file. This includes the coordinates of the quantum cluster, the ECP region, and the point charges. It will return three strings for the adsorbate-slab complex, adsorbate and slab. + + Returns + ------- + None + """ + + # Get the charge of the quantum cluster + charge = int(sum(self.adsorbate_slab_cluster.get_array("oxi_states"))) + + # Get the total number of core electrons for the quantum cluster + core = { + "adsorbate_slab": sum( + [ + self.element_info[atom.symbol]["core"] + for atom in self.adsorbate_slab_cluster + ] + ), + "adsorbate": sum( + [ + self.element_info[atom.symbol]["core"] + for atom in self.adsorbate_cluster + ] + ), + "slab": sum( + [self.element_info[atom.symbol]["core"] for atom in self.slab_cluster] + ), + } - # Create the coords strings for the adsorbate-slab complex, adsorbate, and slab - coords_block = { - "adsorbate_slab": f"""charge={charge} -mult={multiplicities['adsorbate_slab']} + # Add the charge and core electron information to mrccblocks + self.mrccblocks["adsorbate_slab"] += f"""charge={charge} +mult={self.multiplicities['adsorbate_slab']} core={int(core['adsorbate_slab']/2)} unit=angs geom=xyz -""", - "adsorbate": f"""charge=0 -mult={multiplicities['adsorbate']} +""" + self.mrccblocks["adsorbate"] += f"""charge=0 +mult={self.multiplicities['adsorbate']} core={int(core['adsorbate']/2)} unit=angs geom=xyz -""", - "slab": f"""charge={charge} -mult={multiplicities['slab']} +""" + self.mrccblocks["slab"] += f"""charge={charge} +mult={self.multiplicities['slab']} core={int(core['slab']/2)} unit=angs geom=xyz -""", - } +""" + # Create the atom coordinates block for the adsorbate-slab cluster, ECP region + adsorbate_slab_coords_block = "" + for atom in self.adsorbate_slab_cluster: + adsorbate_slab_coords_block += create_atom_coord_string(atom=atom) + + ecp_region_block = "" + for ecp_atom in self.ecp_region: + ecp_region_block += create_atom_coord_string(atom=ecp_atom) + + # Set the number of atoms for each system. This would be the number of atoms in the quantum cluster plus the number of atoms in the ECP region. If include_cp is True, then the number of atoms in the quantum cluster is the number of atoms in the adsorbate-slab complex for both the adsorbate and slab. + if self.include_cp: + self.mrccblocks["adsorbate_slab"] += ( + f"{len(self.adsorbate_slab_cluster) + len(self.ecp_region)}\n\n" + ) + self.mrccblocks["adsorbate_slab"] += ( + adsorbate_slab_coords_block + ecp_region_block + ) - # Set the number of atoms for each system - if include_cp: - coords_block["adsorbate_slab"] += ( - f"{len(quantum_cluster) + len(ecp_region)}\n\n" - ) - coords_block["adsorbate"] += f"{len(quantum_cluster)}\n\n" - coords_block["slab"] += f"{len(quantum_cluster) + len(ecp_region)}\n\n" - else: - coords_block["adsorbate_slab"] += ( - f"{len(quantum_cluster) + len(ecp_region)}\n\n" - ) - coords_block["adsorbate"] += f"{len(adsorbate_indices)}\n\n" - coords_block["slab"] += f"{len(slab_indices) + len(ecp_region)}\n\n" - - for i, atom in enumerate(quantum_cluster): - # Create the coords section for the adsorbate-slab complex - coords_block["adsorbate_slab"] += create_atom_coord_string(atom=atom) - - # Create the coords section for the adsorbate and slab - if i in adsorbate_indices: - coords_block["adsorbate"] += create_atom_coord_string(atom=atom) - if include_cp: - coords_block["slab"] += create_atom_coord_string(atom=atom) - elif i in slab_indices: - coords_block["slab"] += create_atom_coord_string(atom=atom) - if include_cp: - coords_block["adsorbate"] += create_atom_coord_string(atom=atom) - - # Create the coords section for the ECP region - for atom in ecp_region: - coords_block["adsorbate_slab"] += create_atom_coord_string(atom=atom) - coords_block["slab"] += create_atom_coord_string(atom=atom) - - # Adding the ghost atoms for the counterpoise correction - for system in ["adsorbate_slab", "adsorbate", "slab"]: - coords_block[system] += "\nghost=serialno\n" - if include_cp and system in ["adsorbate"]: - coords_block[system] += ",".join( - [str(atom_idx + 1) for atom_idx in slab_indices] + self.mrccblocks["adsorbate"] += f"{len(self.adsorbate_slab_cluster)}\n\n" + self.mrccblocks["adsorbate"] += adsorbate_slab_coords_block + + self.mrccblocks["slab"] += ( + f"{len(self.adsorbate_slab_cluster) + len(self.ecp_region)}\n\n" + ) + self.mrccblocks["slab"] += adsorbate_slab_coords_block + ecp_region_block + + for system in ["adsorbate_slab", "adsorbate", "slab"]: + self.mrccblocks[system] += "\nghost=serialno\n" + # Add the ghost atoms for the counterpoise correction in the adsorbate and slab + if system == "adsorbate": + self.mrccblocks[system] += ",".join( + [str(atom_idx + 1) for atom_idx in self.slab_indices] + ) + elif system == "slab": + self.mrccblocks[system] += ",".join( + [str(atom_idx + 1) for atom_idx in self.adsorbate_indices] + ) + self.mrccblocks[system] += "\n\n" + else: + self.mrccblocks["adsorbate_slab"] += ( + f"{len(self.adsorbate_slab_cluster) + len(self.ecp_region)}\n\n" ) - elif include_cp and system in ["slab"]: - coords_block[system] += ",".join( - [str(atom_idx + 1) for atom_idx in adsorbate_indices] + self.mrccblocks["adsorbate_slab"] += ( + adsorbate_slab_coords_block + ecp_region_block ) - coords_block[system] += "\n\n" - return coords_block + self.mrccblocks["adsorbate"] += f"{len(self.adsorbate_cluster)}\n\n" + for atom in self.adsorbate_cluster: + self.mrccblocks["adsorbate"] += create_atom_coord_string(atom=atom) + self.mrccblocks["slab"] += ( + f"{len(self.slab_cluster) + len(self.ecp_region)}\n\n" + ) + for atom in self.slab_cluster: + self.mrccblocks["slab"] += create_atom_coord_string(atom=atom) + self.mrccblocks["slab"] += ecp_region_block + + def _generate_point_charge_block(self) -> str: + """ + Create the point charge block for the MRCC input file. This requires the embedded_cluster Atoms object containing both atom_type and oxi_states arrays, as well as the indices of the quantum cluster and ECP region. Such arrays are created by the [quacc.atoms.skzcam.CreateSKZCAMClusters][] class. + + Returns + ------- + str + The point charge block for the MRCC input file. + """ + + # Get the oxi_states arrays from the embedded_cluster + oxi_states = self.adsorbate_slab_embedded_cluster.get_array("oxi_states") + + # Get the number of point charges for this system. There is a point charge associated with each capped ECP as well. + pc_region_indices = [ + atom.index + for atom in self.adsorbate_slab_embedded_cluster + if atom.index not in self.quantum_cluster_indices + ] -def generate_mrcc_point_charge_block( - embedded_adsorbed_cluster: Atoms, - quantum_cluster_indices: list[int], - ecp_region_indices: list[int], -) -> str: - """ - Create the point charge block for the MRCC input file. This requires the embedded_cluster Atoms object containing both atom_type and oxi_states arrays, as well as the indices of the quantum cluster and ECP region. Such arrays are created by the [quacc.atoms.skzcam.create_skzcam_clusters][] function. + num_pc = len(pc_region_indices) + pc_block = f"qmmm=Amber\npointcharges\n{num_pc}\n" - Parameters - ---------- - embedded_adsorbed_cluster - The ASE Atoms object containing the atomic coordinates and atomic charges from the .pun file. This object can be created by the [quacc.atoms.skzcam.create_skzcam_clusters][] function. - quantum_cluster_indices - A list of lists containing the indices of the atoms in each quantum cluster. These indices are provided by the [quacc.atoms.skzcam.create_skzcam_clusters][] function. - ecp_region_indices - A list of lists containing the indices of the atoms in the ECP region for each quantum cluster. These indices are provided by the [quacc.atoms.skzcam.create_skzcam_clusters][] function. + # Add the ecp_region indices + for i in pc_region_indices: + position = self.adsorbate_slab_embedded_cluster[i].position + pc_block += f" {position[0]:-16.11f} {position[1]:-16.11f} {position[2]:-16.11f} {oxi_states[i]:-16.11f}\n" + + return pc_block - Returns - ------- - str - The point charge block for the MRCC input file. - """ - # Get the oxi_states arrays from the embedded_cluster - oxi_states = embedded_adsorbed_cluster.get_array("oxi_states") - - # Check that none of the indices in quantum_cluster_indices are in ecp_region_indices - if not np.all([x not in ecp_region_indices for x in quantum_cluster_indices]): - raise ValueError("An atom in the quantum cluster is also in the ECP region.") - - # Get the number of point charges for this system. There is a point charge associated with each capped ECP as well. - pc_region_indices = ecp_region_indices + [ - atom.index - for atom in embedded_adsorbed_cluster - if atom not in quantum_cluster_indices + ecp_region_indices - ] - - num_pc = len(pc_region_indices) - pc_block = f"qmmm=Amber\npointcharges\n{num_pc}\n" - - # Add the ecp_region indices - for i in pc_region_indices: - position = embedded_adsorbed_cluster[i].position - pc_block += f" {position[0]:-16.11f} {position[1]:-16.11f} {position[2]:-16.11f} {oxi_states[i]:-16.11f}\n" - - return pc_block - - -def create_orca_eint_blocks( - embedded_adsorbed_cluster: Atoms, - quantum_cluster_indices: list[int], - ecp_region_indices: list[int], - element_info: dict[ElementStr, ElementInfo] | None = None, - pal_nprocs_block: dict[str, int] | None = None, - method_block: dict[str, str] | None = None, - scf_block: dict[str, str] | None = None, - ecp_info: dict[ElementStr, str] | None = None, - include_cp: bool = True, - multiplicities: MultiplicityDict | None = None, -) -> BlockInfo: +class ORCAInputGenerator: """ - Creates the orcablocks input for the ORCA ASE calculator. + A class to generate the SKZCAM input for the ORCA ASE calculator. - Parameters + Attributes ---------- - embedded_adsorbed_cluster - The ASE Atoms object containing the atomic coordinates and atomic charges from the .pun file, as well as the atom type. This object is created by the [quacc.atoms.skzcam.create_skzcam_clusters][] function. + adsorbate_slab_embedded_cluster + The ASE Atoms object containing the atomic coordinates and atomic charges from the .pun file, as well as the atom type. This object is created by the [quacc.atoms.skzcam.CreateSKZCAMClusters][] class. quantum_cluster_indices - A list containing the indices of the atoms in each quantum cluster. These indices are provided by the [quacc.atoms.skzcam.create_skzcam_clusters][] function. + A list containing the indices of the atoms in one quantum cluster. These indices are provided by the [quacc.atoms.skzcam.CreateSKZCAMClusters][] class. ecp_region_indices - A list containing the indices of the atoms in each ECP region. These indices are provided by the [quacc.atoms.skzcam.create_skzcam_clusters][] function. + A list containing the indices of the atoms in the ECP region of one quantum cluster. These indices are provided by the [quacc.atoms.skzcam.CreateSKZCAMClusters][] class. element_info A dictionary with elements as keys which gives the (1) number of core electrons as 'core', (2) basis set as 'basis', (3) effective core potential as 'ecp', (4) resolution-of-identity/density-fitting auxiliary basis set for DFT/HF calculations as 'ri_scf_basis' and (5) resolution-of-identity/density-fitting for correlated wave-function methods as 'ri_cwft_basis'. + include_cp + If True, the coords strings will include the counterpoise correction (i.e., ghost atoms) for the adsorbate and slab. + multiplicities + The multiplicity of the adsorbate-slab complex, adsorbate and slab respectively, with the keys 'adsorbate_slab', 'adsorbate', and 'slab'. pal_nprocs_block A dictionary with the number of processors for the PAL block as 'nprocs' and the maximum memory-per-core in megabytes blocks as 'maxcore'. method_block @@ -442,823 +408,997 @@ def create_orca_eint_blocks( scf_block A dictionary that contains the SCF block for the ORCA input file. The key is the ORCA setting and the value is that setting's value. ecp_info - A dictionary with the ECP data (in ORCA format) for the cations in the ECP region. - include_cp - If True, the coords strings will include the counterpoise correction for the adsorbate and slab. - multiplicities - The multiplicity of the adsorbate-slab complex, adsorbate and slab respectively, with the keys 'adsorbate_slab', 'adsorbate', and 'slab'. - - Returns - ------- - BlockInfo + A dictionary with the ECP data (in ORCA format) for the cations in the ECP region. The keys are the element symbols and the values are the ECP data. + adsorbate_slab_cluster + The ASE Atoms object for the quantum cluster of the adsorbate-slab complex. + ecp_region + The ASE Atoms object for the ECP region. + adsorbate_indices + The indices of the adsorbates from the adsorbate_slab_cluster quantum cluster. + slab_indices + The indices of the slab from the adsorbate_slab_cluster quantum cluster. + adsorbate_cluster + The ASE Atoms object for the quantum cluster of the adsorbate. + slab_cluster + The ASE Atoms object for the quantum cluster of the slab. + orcablocks The ORCA input block (to be put in 'orcablocks' parameter) as a string for the adsorbate-slab complex, the adsorbate, and the slab in a dictionary with the keys 'adsorbate_slab', 'adsorbate', and 'slab' respectively. - """ - - # First generate the preamble block - preamble_block = generate_orca_input_preamble( - embedded_cluster=embedded_adsorbed_cluster, - quantum_cluster_indices=quantum_cluster_indices, - element_info=element_info, - pal_nprocs_block=pal_nprocs_block, - method_block=method_block, - scf_block=scf_block, - ) - - # Generate the coords block - coords_block = generate_coords_block( - embedded_adsorbed_cluster=embedded_adsorbed_cluster, - quantum_cluster_indices=quantum_cluster_indices, - ecp_region_indices=ecp_region_indices, - ecp_info=ecp_info, - include_cp=include_cp, - multiplicities=multiplicities, - ) - - # Combine the blocks - return { - "adsorbate_slab": preamble_block + coords_block["adsorbate_slab"], - "adsorbate": preamble_block + coords_block["adsorbate"], - "slab": preamble_block + coords_block["slab"], - } - - -def create_atom_coord_string( - atom: Atom, - is_ghost_atom: bool = False, - atom_ecp_info: str | None = None, - pc_charge: float | None = None, -) -> str: - """ - Creates a string containing the Atom symbol and coordinates in the ORCA input file format, with additional information for atoms in the ECP region as well as ghost atoms. - - Parameters - ---------- - atom - The ASE Atom (not Atoms) object containing the atomic coordinates. - is_ghost_atom - If True, then the atom is a ghost atom. - atom_ecp_info - If not None, then assume this is an atom in the ECP region and adds the ECP info. - pc_charge - The point charge value for the ECP region atom. - Returns - ------- - str - The atom symbol and coordinates in the ORCA input file format. """ - # If ecp_info is not None and ghost_atom is True, raise an error - if atom_ecp_info and is_ghost_atom: - raise ValueError("ECP info cannot be provided for ghost atoms.") - - # Check that pc_charge is a float if atom_ecp_info is not None - if atom_ecp_info and pc_charge is None: - raise ValueError("Point charge value must be given for atoms with ECP info.") - - if is_ghost_atom: - atom_coord_str = f"{(atom.symbol + ':').ljust(3)} {' '*16} {atom.position[0]:-16.11f} {atom.position[1]:-16.11f} {atom.position[2]:-16.11f}\n" - elif atom_ecp_info is not None: - atom_coord_str = f"{(atom.symbol + '>').ljust(3)} {pc_charge:-16.11f} {atom.position[0]:-16.11f} {atom.position[1]:-16.11f} {atom.position[2]:-16.11f}\n{atom_ecp_info}" - else: - atom_coord_str = f"{atom.symbol.ljust(3)} {' '*16} {atom.position[0]:-16.11f} {atom.position[1]:-16.11f} {atom.position[2]:-16.11f}\n" - - return atom_coord_str - - -def generate_coords_block( - embedded_adsorbed_cluster: Atoms, - quantum_cluster_indices: list[int], - ecp_region_indices: list[int], - ecp_info: dict[ElementStr, str] | None = None, - include_cp: bool = True, - multiplicities: MultiplicityDict | None = None, -) -> BlockInfo: - """ - Generates the coordinates block for the ORCA input file. This includes the coordinates of the quantum cluster, the ECP region, and the point charges. It will return three strings for the adsorbate-slab complex, adsorbate and slab. + def __init__( + self, + adsorbate_slab_embedded_cluster: Atoms, + quantum_cluster_indices: list[int], + ecp_region_indices: list[int], + element_info: dict[ElementStr, ElementInfo] | None = None, + include_cp: bool = True, + multiplicities: MultiplicityDict | None = None, + pal_nprocs_block: dict[str, int] | None = None, + method_block: dict[str, str] | None = None, + scf_block: dict[str, str] | None = None, + ecp_info: dict[ElementStr, str] | None = None, + ) -> None: + """ + Parameters + ---------- + adsorbate_slab_embedded_cluster + The ASE Atoms object containing the atomic coordinates and atomic charges from the .pun file, as well as the atom type. This object is created by the [quacc.atoms.skzcam.CreateSKZCAMClusters][] class. + quantum_cluster_indices + A list containing the indices of the atoms in each quantum cluster. These indices are provided by the [quacc.atoms.skzcam.CreateSKZCAMClusters][] class. + ecp_region_indices + A list containing the indices of the atoms in each ECP region. These indices are provided by the [quacc.atoms.skzcam.CreateSKZCAMClusters][] class. + element_info + A dictionary with elements as keys which gives the (1) number of core electrons as 'core', (2) basis set as 'basis', (3) effective core potential as 'ecp', (4) resolution-of-identity/density-fitting auxiliary basis set for DFT/HF calculations as 'ri_scf_basis' and (5) resolution-of-identity/density-fitting for correlated wave-function methods as 'ri_cwft_basis'. + include_cp + If True, the coords strings will include the counterpoise correction (i.e., ghost atoms) for the adsorbate and slab. + multiplicities + The multiplicity of the adsorbate-slab complex, adsorbate and slab respectively, with the keys 'adsorbate_slab', 'adsorbate', and 'slab'. + pal_nprocs_block + A dictionary with the number of processors for the PAL block as 'nprocs' and the maximum memory-per-core in megabytes blocks as 'maxcore'. + method_block + A dictionary that contains the method block for the ORCA input file. The key is the ORCA setting and the value is that setting's value. + scf_block + A dictionary that contains the SCF block for the ORCA input file. The key is the ORCA setting and the value is that setting's value. + ecp_info + A dictionary with the ECP data (in ORCA format) for the cations in the ECP region. + + Returns + ------- + None + """ + + self.adsorbate_slab_embedded_cluster = adsorbate_slab_embedded_cluster + self.quantum_cluster_indices = quantum_cluster_indices + self.ecp_region_indices = ecp_region_indices + self.element_info = element_info + self.include_cp = include_cp + self.multiplicities = ( + {"adsorbate_slab": 1, "adsorbate": 1, "slab": 1} + if multiplicities is None + else multiplicities + ) + self.pal_nprocs_block = pal_nprocs_block + self.method_block = method_block + self.scf_block = scf_block + self.ecp_info = ecp_info + + # Check that none of the indices in quantum_cluster_indices are in ecp_region_indices + if not np.all( + [x not in self.ecp_region_indices for x in self.quantum_cluster_indices] + ): + raise ValueError( + "An atom in the quantum cluster is also in the ECP region." + ) - Parameters - ---------- - embedded_adsorbed_cluster - The ASE Atoms object containing the atomic coordinates and atomic charges from the .pun file, as well as the atom type. This Atoms object is typically produced from [quacc.atoms.skzcam.create_skzcam_clusters][]. - quantum_cluster_indices - A list containing the indices of the atoms in each quantum cluster. These indices are provided by the [quacc.atoms.skzcam.create_skzcam_clusters][] function. - ecp_region_indices - A list containing the indices of the atoms in each ECP region. These indices are provided by the [quacc.atoms.skzcam.create_skzcam_clusters][] function. - ecp_info - A dictionary with the ECP data (in ORCA format) for the cations in the ECP region. - include_cp - If True, the coords strings will include the counterpoise correction for the adsorbate and slab. - multiplicities - The multiplicity of the adsorbate-slab complex, adsorbate and slab respectively, with the keys 'adsorbate_slab', 'adsorbate', and 'slab'. + # Create the adsorbate-slab complex quantum cluster and ECP region cluster + self.adsorbate_slab_cluster: Atoms = self.adsorbate_slab_embedded_cluster[ + self.quantum_cluster_indices + ] + self.ecp_region: Atoms = self.adsorbate_slab_embedded_cluster[ + self.ecp_region_indices + ] - Returns - ------- - BlockInfo - The coordinates block for the ORCA input block (to be put in 'orcablocks' parameter) as a string for the adsorbate-slab complex, the adsorbate, and the slab in a dictionary with the keys 'adsorbate_slab', 'adsorbate', and 'slab' respectively. - """ + # Get the indices of the adsorbates from the quantum cluster + self.adsorbate_indices: list[int] = [ + i + for i in range(len(self.adsorbate_slab_cluster)) + if self.adsorbate_slab_cluster.get_array("atom_type")[i] == "adsorbate" + ] + # Get the indices of the slab from the quantum cluster + self.slab_indices: list[int] = [ + i + for i in range(len(self.adsorbate_slab_cluster)) + if self.adsorbate_slab_cluster.get_array("atom_type")[i] != "adsorbate" + ] - # Create the quantum cluster and ECP region cluster - if multiplicities is None: - multiplicities = {"adsorbate_slab": 1, "adsorbate": 1, "slab": 1} - quantum_cluster = embedded_adsorbed_cluster[quantum_cluster_indices] - ecp_region = embedded_adsorbed_cluster[ecp_region_indices] - - # Get the indices of the adsorbates from the quantum cluster - adsorbate_indices = [ - i - for i in range(len(quantum_cluster)) - if quantum_cluster.get_array("atom_type")[i] == "adsorbate" - ] - - # Get the indices of the slab from the quantum cluster - slab_indices = [ - i - for i in range(len(quantum_cluster)) - if quantum_cluster.get_array("atom_type")[i] != "adsorbate" - ] - - # Get the charge of the quantum cluster - charge = int(sum(quantum_cluster.get_array("oxi_states"))) - - # Create the coords strings for the adsorbate-slab complex, adsorbate, and slab - coords_block = { - "adsorbate_slab": f"""%coords + # Create the adsorbate and slab quantum clusters + self.adsorbate_cluster: Atoms = self.adsorbate_slab_cluster[ + self.adsorbate_indices + ] + self.slab_cluster: Atoms = self.adsorbate_slab_cluster[self.slab_indices] + + # Initialize the orcablocks input strings for the adsorbate-slab complex, adsorbate, and slab + self.orcablocks: BlockInfo = {"adsorbate_slab": "", "adsorbate": "", "slab": ""} + + def generate_input(self) -> BlockInfo: + """ + Creates the orcablocks input for the ORCA ASE calculator. + + Returns + ------- + BlockInfo + The ORCA input block (to be put in 'orcablocks' parameter) as a string for the adsorbate-slab complex, the adsorbate, and the slab in a dictionary with the keys 'adsorbate_slab', 'adsorbate', and 'slab' respectively. + """ + + # First generate the preamble block + self._generate_preamble_block() + + # Create the blocks for the coordinates + self._generate_coords_block() + + # Combine the blocks + return self.orcablocks + + def create_point_charge_file(self, pc_file: str | Path) -> None: + """ + Create a point charge file that can be read by ORCA. This requires the embedded_cluster Atoms object containing both atom_type and oxi_states arrays, as well as the indices of the quantum cluster and ECP region. + + Parameters + ---------- + pc_file + A file containing the point charges to be written by ORCA. + + Returns + ------- + None + """ + + # Get the oxi_states arrays from the embedded_cluster + oxi_states = self.adsorbate_slab_embedded_cluster.get_array("oxi_states") + + # Get the number of point charges for this system + total_indices = self.quantum_cluster_indices + self.ecp_region_indices + num_pc = len(self.adsorbate_slab_embedded_cluster) - len(total_indices) + counter = 0 + with Path.open(pc_file, "w") as f: + # Write the number of point charges first + f.write(f"{num_pc}\n") + for i in range(len(self.adsorbate_slab_embedded_cluster)): + if i not in total_indices: + counter += 1 + position = self.adsorbate_slab_embedded_cluster[i].position + if counter != num_pc: + f.write( + f"{oxi_states[i]:-16.11f} {position[0]:-16.11f} {position[1]:-16.11f} {position[2]:-16.11f}\n" + ) + else: + f.write( + f"{oxi_states[i]:-16.11f} {position[0]:-16.11f} {position[1]:-16.11f} {position[2]:-16.11f}" + ) + + def _generate_coords_block(self) -> None: + """ + Generates the coordinates block for the ORCA input file. This includes the coordinates of the quantum cluster, the ECP region, and the point charges. It will return three strings for the adsorbate-slab complex, adsorbate and slab. + + Returns + ------- + None + """ + + # Get the charge of the adsorbate_slab cluster + charge = int(sum(self.adsorbate_slab_cluster.get_array("oxi_states"))) + + # Add the coords strings for the adsorbate-slab complex, adsorbate, and slab + self.orcablocks["adsorbate_slab"] += f"""%coords CTyp xyz -Mult {multiplicities['adsorbate_slab']} +Mult {self.multiplicities['adsorbate_slab']} Units angs Charge {charge} coords -""", - "adsorbate": f"""%coords +""" + self.orcablocks["adsorbate"] += f"""%coords CTyp xyz -Mult {multiplicities['adsorbate']} +Mult {self.multiplicities['adsorbate']} Units angs Charge 0 coords -""", - "slab": f"""%coords +""" + self.orcablocks["slab"] += f"""%coords CTyp xyz -Mult {multiplicities['slab']} +Mult {self.multiplicities['slab']} Units angs Charge {charge} coords -""", - } - - for i, atom in enumerate(quantum_cluster): - # Create the coords section for the adsorbate-slab complex - coords_block["adsorbate_slab"] += create_atom_coord_string(atom=atom) - - # Create the coords section for the adsorbate and slab - if i in adsorbate_indices: - coords_block["adsorbate"] += create_atom_coord_string(atom=atom) - if include_cp: - coords_block["slab"] += create_atom_coord_string( - atom=atom, is_ghost_atom=True - ) - elif i in slab_indices: - coords_block["slab"] += create_atom_coord_string(atom=atom) - if include_cp: - coords_block["adsorbate"] += create_atom_coord_string( - atom=atom, is_ghost_atom=True - ) - - # Create the coords section for the ECP region - ecp_region_coords_section = "" - for i, atom in enumerate(ecp_region): - atom_ecp_info = format_ecp_info(atom_ecp_info=ecp_info[atom.symbol]) - ecp_region_coords_section += create_atom_coord_string( - atom=atom, - atom_ecp_info=atom_ecp_info, - pc_charge=ecp_region.get_array("oxi_states")[i], - ) - - # Add the ECP region coords section to the ads_slab_coords string - coords_block["adsorbate_slab"] += f"{ecp_region_coords_section}end\nend\n" - coords_block["slab"] += f"{ecp_region_coords_section}end\nend\n" - coords_block["adsorbate"] += "end\nend\n" - - return coords_block - - -def format_ecp_info(atom_ecp_info: str) -> str: - """ - Formats the ECP info so that it can be inputted to ORCA without problems. - - Parameters - ---------- - atom_ecp_info - The ECP info for a single atom. - - Returns - ------- - str - The formatted ECP info. - """ - # Find the starting position of "NewECP" and "end" - start_pos = atom_ecp_info.lower().find("newecp") - end_pos = atom_ecp_info.lower().find("end", start_pos) - - start_pos += len("NewECP") - - # If "NewECP" or "end" is not found, then we assume that ecp_info has been given without these lines but in the correct format - if start_pos == -1 or end_pos == -1: - raise ValueError("ECP info does not contain 'NewECP' or 'end' keyword.") - - # Extract content between "NewECP" and "end", exclusive of "end", then add correctly formatted "NewECP" and "end" - return f"NewECP\n{atom_ecp_info[start_pos:end_pos].strip()}\nend\n" +""" + for i, atom in enumerate(self.adsorbate_slab_cluster): + # Create the coords section for the adsorbate-slab complex + self.orcablocks["adsorbate_slab"] += create_atom_coord_string(atom=atom) -def generate_orca_input_preamble( - embedded_cluster: Atoms, - quantum_cluster_indices: list[int], - element_info: dict[ElementStr, ElementInfo] | None = None, - pal_nprocs_block: dict[str, int] | None = None, - method_block: dict[str, str] | None = None, - scf_block: dict[str, str] | None = None, -) -> str: - """ - From the quantum cluster Atoms object, generate the ORCA input preamble for the basis, method, pal, and scf blocks. - - Parameters - ---------- - embedded_cluster - The ASE Atoms object containing the atomic coordinates and atomic charges from the .pun ChemShell file. This object is created by the [quacc.atoms.skzcam.create_skzcam_clusters][] function. - quantum_cluster_indices - A list containing the indices of the atoms of embedded_cluster that form a quantum cluster. These indices are provided by the [quacc.atoms.skzcam.create_skzcam_clusters][] function. - element_info - A dictionary with elements as keys which gives the (1) number of core electrons as 'core', (2) basis set as 'basis', (3) effective core potential as 'ecp', (4) resolution-of-identity/density-fitting auxiliary basis set for DFT/HF calculations as 'ri_scf_basis' and (5) resolution-of-identity/density-fitting for correlated wave-function methods as 'ri_cwft_basis'. - pal_nprocs_block - A dictionary with the number of processors for the PAL block as 'nprocs' and the maximum memory-per-core blocks as 'maxcore'. - method_block - A dictionary that contains the method block for the ORCA input file. The key is the ORCA setting and the value is that setting's value. - scf_block - A dictionary that contains the SCF block for the ORCA input file. The key is the ORCA setting and the value is that setting's value. + # Create the coords section for the adsorbate and slab + if i in self.adsorbate_indices: + self.orcablocks["adsorbate"] += create_atom_coord_string(atom=atom) + if self.include_cp: + self.orcablocks["slab"] += create_atom_coord_string( + atom=atom, is_ghost_atom=True + ) + elif i in self.slab_indices: + self.orcablocks["slab"] += create_atom_coord_string(atom=atom) + if self.include_cp: + self.orcablocks["adsorbate"] += create_atom_coord_string( + atom=atom, is_ghost_atom=True + ) - Returns - ------- - str - The ORCA input preamble. - """ + # Create the coords section for the ECP region + ecp_region_coords_section = "" + for i, atom in enumerate(self.ecp_region): + atom_ecp_info = self._format_ecp_info( + atom_ecp_info=self.ecp_info[atom.symbol] + ) + ecp_region_coords_section += create_atom_coord_string( + atom=atom, + atom_ecp_info=atom_ecp_info, + pc_charge=self.ecp_region.get_array("oxi_states")[i], + ) - # Create the quantum cluster - quantum_cluster = embedded_cluster[quantum_cluster_indices] + # Add the ECP region coords section to the ads_slab_coords string + self.orcablocks["adsorbate_slab"] += f"{ecp_region_coords_section}end\nend\n" + self.orcablocks["slab"] += f"{ecp_region_coords_section}end\nend\n" + self.orcablocks["adsorbate"] += "end\nend\n" + + def _format_ecp_info(self, atom_ecp_info: str) -> str: + """ + Formats the ECP info so that it can be inputted to ORCA without problems. + + Parameters + ---------- + atom_ecp_info + The ECP info for a single atom. + + Returns + ------- + str + The formatted ECP info. + """ + # Find the starting position of "NewECP" and "end" + start_pos = atom_ecp_info.lower().find("newecp") + end_pos = atom_ecp_info.lower().find("end", start_pos) + + start_pos += len("NewECP") + + # If "NewECP" or "end" is not found, then we assume that ecp_info has been given without these lines but in the correct format + if start_pos == -1 or end_pos == -1: + raise ValueError("ECP info does not contain 'NewECP' or 'end' keyword.") + + # Extract content between "NewECP" and "end", exclusive of "end", then add correctly formatted "NewECP" and "end" + return f"NewECP\n{atom_ecp_info[start_pos:end_pos].strip()}\nend\n" + + def _generate_preamble_block(self) -> str: + """ + From the quantum cluster Atoms object, generate the ORCA input preamble for the basis, method, pal, and scf blocks. + + Returns + ------- + None + """ + + # Get the set of element symbols from the quantum cluster + element_symbols = list(set(self.adsorbate_slab_cluster.get_chemical_symbols())) + element_symbols.sort() + + # Check all element symbols are provided in element_info keys + if self.element_info is not None and not all( + element in self.element_info for element in element_symbols + ): + raise ValueError( + "Not all element symbols are provided in the element_info dictionary." + ) - # Get the set of element symbols from the quantum cluster - element_symbols = list(set(quantum_cluster.get_chemical_symbols())) - element_symbols.sort() + # Initialize preamble_input + preamble_input = "" + + # Add the pal_nprocs_block + if self.pal_nprocs_block is not None: + preamble_input += f"%pal nprocs {self.pal_nprocs_block['nprocs']} end\n" + preamble_input += f"%maxcore {self.pal_nprocs_block['maxcore']} end\n" + + # Add pointcharge file to read. It will be assumed that it is in the same folder as the input file + preamble_input += '%pointcharges "orca.pc"\n' + + # Make the method block + if self.method_block is not None and self.element_info is not None: + preamble_input += "%method\n" + # Iterate through the keys of method_block and add key value + if self.method_block is not None: + for key in self.method_block: + preamble_input += f"{key} {self.method_block[key]}\n" + # Iterate over the core value for each element (if it has been given) + if self.element_info is not None: + for element in element_symbols: + if "core" in self.element_info[element]: + preamble_input += ( + f"NewNCore {element} {self.element_info[element]['core']} end\n" + ) + if self.method_block is not None and self.element_info is not None: + preamble_input += "end\n" - # Check all element symbols are provided in element_info keys - if element_info is not None and not all( - element in element_info for element in element_symbols - ): - raise ValueError( - "Not all element symbols are provided in the element_info dictionary." - ) + # Make the basis block - # Initialize preamble_info - preamble_info = """""" - - # Add the pal_nprocs_block - if pal_nprocs_block is not None: - preamble_info += f"%pal nprocs {pal_nprocs_block['nprocs']} end\n" - preamble_info += f"%maxcore {pal_nprocs_block['maxcore']} end\n" - - # Add pointcharge file to read. It will be assumed that it is in the same folder as the input file - preamble_info += '%pointcharges "orca.pc"\n' - - # Make the method block - if method_block is not None and element_info is not None: - preamble_info += "%method\n" - # Iterate through the keys of method_block and add key value - if method_block is not None: - for key in method_block: - preamble_info += f"{key} {method_block[key]}\n" - # Iterate over the core value for each element (if it has been given) - if element_info is not None: - for element in element_symbols: - if "core" in element_info[element]: - preamble_info += ( - f"NewNCore {element} {element_info[element]['core']} end\n" + # First check if the basis key is the same for all elements. We use """ here because an option for these keys is "AutoAux" + if self.element_info is not None: + preamble_input += "%basis\n" + if ( + len( + {self.element_info[element]["basis"] for element in element_symbols} ) - if method_block is not None and element_info is not None: - preamble_info += "end\n" - - # Make the basis block - - # First check if the basis key is the same for all elements. We use """ here because an option for these keys is "AutoAux" - if element_info is not None: - preamble_info += "%basis\n" - if len({element_info[element]["basis"] for element in element_symbols}) == 1: - preamble_info += f"""Basis {element_info[element_symbols[0]]['basis']}\n""" - else: - for element in element_symbols: - element_basis = element_info[element]["basis"] - preamble_info += f"""NewGTO {element} "{element_basis}" end\n""" - - # Do the same for ri_scf_basis and ri_cwft_basis. - if ( - len({element_info[element]["ri_scf_basis"] for element in element_symbols}) - == 1 - ): - preamble_info += ( - f"""Aux {element_info[element_symbols[0]]['ri_scf_basis']}\n""" - ) - else: - for element in element_symbols: - element_basis = element_info[element]["ri_scf_basis"] - preamble_info += f'NewAuxJGTO {element} "{element_basis}" end\n' + == 1 + ): + preamble_input += ( + f"""Basis {self.element_info[element_symbols[0]]['basis']}\n""" + ) + else: + for element in element_symbols: + element_basis = self.element_info[element]["basis"] + preamble_input += f"""NewGTO {element} "{element_basis}" end\n""" - if ( - len( - list( + # Do the same for ri_scf_basis and ri_cwft_basis. + if ( + len( { - element_info[element]["ri_cwft_basis"] + self.element_info[element]["ri_scf_basis"] for element in element_symbols } ) - ) - == 1 - ): - preamble_info += ( - f"""AuxC {element_info[element_symbols[0]]['ri_cwft_basis']}\n""" - ) - else: - for element in element_symbols: - element_basis = element_info[element]["ri_cwft_basis"] - preamble_info += f"""NewAuxCGTO {element} "{element_basis}" end\n""" + == 1 + ): + preamble_input += ( + f"""Aux {self.element_info[element_symbols[0]]['ri_scf_basis']}\n""" + ) + else: + for element in element_symbols: + element_basis = self.element_info[element]["ri_scf_basis"] + preamble_input += f'NewAuxJGTO {element} "{element_basis}" end\n' - preamble_info += "end\n" + if ( + len( + list( + { + self.element_info[element]["ri_cwft_basis"] + for element in element_symbols + } + ) + ) + == 1 + ): + preamble_input += f"""AuxC {self.element_info[element_symbols[0]]['ri_cwft_basis']}\n""" + else: + for element in element_symbols: + element_basis = self.element_info[element]["ri_cwft_basis"] + preamble_input += ( + f"""NewAuxCGTO {element} "{element_basis}" end\n""" + ) - # Write the scf block - if scf_block is not None: - preamble_info += "%scf\n" - for key in scf_block: - preamble_info += f"""{key} {scf_block[key]}\n""" - preamble_info += "end\n" + preamble_input += "end\n" - return preamble_info + # Write the scf block + if self.scf_block is not None: + preamble_input += "%scf\n" + for key in self.scf_block: + preamble_input += f"""{key} {self.scf_block[key]}\n""" + preamble_input += "end\n" + # Add preamble_input to the orcablocks for the adsorbate-slab complex, adsorbate, and slab + self.orcablocks["adsorbate_slab"] += preamble_input + self.orcablocks["adsorbate"] += preamble_input + self.orcablocks["slab"] += preamble_input -def create_orca_point_charge_file( - embedded_cluster: Atoms, - quantum_cluster_indices: list[int], - ecp_region_indices: list[int], - pc_file: str | Path, -) -> None: - """ - Create a point charge file that can be read by ORCA. This requires the embedded_cluster Atoms object containing both atom_type and oxi_states arrays, as well as the indices of the quantum cluster and ECP region. - Parameters - ---------- - embedded_cluster - The ASE Atoms object containing the atomic coordinates and atomic charges from the .pun file. This object is created by the [quacc.atoms.skzcam.create_skzcam_clusters][] function. - quantum_cluster_indices - A list of lists containing the indices of the atoms in each quantum cluster. These indices are provided by the [quacc.atoms.skzcam.create_skzcam_clusters][] function. - ecp_region_indices - A list of lists containing the indices of the atoms in the ECP region for each quantum cluster. These indices are provided by the [quacc.atoms.skzcam.create_skzcam_clusters][] function. - pc_file - A file containing the point charges to be written by ORCA. - - Returns - ------- - None - """ - - # Get the oxi_states arrays from the embedded_cluster - oxi_states = embedded_cluster.get_array("oxi_states") - - # Check that none of the indices in quantum_cluster_indices are in ecp_region_indices - if not np.all([x not in ecp_region_indices for x in quantum_cluster_indices]): - raise ValueError("An atom in the quantum cluster is also in the ECP region.") - - # Get the number of point charges for this system - total_indices = quantum_cluster_indices + ecp_region_indices - num_pc = len(embedded_cluster) - len(total_indices) - counter = 0 - with Path.open(pc_file, "w") as f: - # Write the number of point charges first - f.write(f"{num_pc}\n") - for i in range(len(embedded_cluster)): - if i not in total_indices: - counter += 1 - position = embedded_cluster[i].position - if counter != num_pc: - f.write( - f"{oxi_states[i]:-16.11f} {position[0]:-16.11f} {position[1]:-16.11f} {position[2]:-16.11f}\n" - ) - else: - f.write( - f"{oxi_states[i]:-16.11f} {position[0]:-16.11f} {position[1]:-16.11f} {position[2]:-16.11f}" - ) - - -def get_cluster_info_from_slab( - adsorbate_slab_file: str | Path, - slab_center_indices: list[int], - adsorbate_indices: list[int], -) -> tuple[Atoms, Atoms, int, NDArray, NDArray]: +class CreateSKZCAMClusters: """ - Read the file containing the periodic slab and adsorbate (geometry optimized) and return the key information needed to create an embedded cluster in ChemShell. + A class to create the quantum clusters and ECP regions for the SKZCAM protocol. - Parameters + Attributes ---------- - adsorbate_slab_file - The path to the file containing the adsorbate molecule on the surface slab. It can be in any format that ASE can read. adsorbate_indices The indices of the atoms that make up the adsorbate molecule. slab_center_indices - The indices of the atoms that are at the 'center' of the slab right beneath the adsorbate. - - Returns - ------- - Atoms - The Atoms object of the adsorbate molecule. - Atoms - The Atoms object of the surface slab. - int - The index of the first atom of the slab as listed in slab_center_indices. - NDArray - The position of the center of the cluster. - NDArray - The vector from the center of the slab to the center of mass of the adsorbate. - """ - - # Get the necessary information for the cluster from a provided slab file (in any format that ASE can read) - adsorbate_slab = read(adsorbate_slab_file) - - # Find indices (within adsorbate_slab) of the slab - slab_indices = [ - i for i, _ in enumerate(adsorbate_slab) if i not in adsorbate_indices - ] - - # Create slab from adsorbate_slab - slab = adsorbate_slab[slab_indices] - - # Find index of the first center atom of the slab as listed in slab_center_indices - slab_center_idx = next( - index for index, x in enumerate(slab_indices) if x == slab_center_indices[0] - ) - - # Get the center of the cluster from the atom indices - slab_center_position = adsorbate_slab[slab_center_indices].get_positions().sum( - axis=0 - ) / len(slab_center_indices) - - adsorbate = adsorbate_slab[adsorbate_indices] - - # Get the relative distance of the adsorbate from the first center atom of the slab as defined in the slab_center_indices - adsorbate_com = adsorbate.get_center_of_mass() - adsorbate_vector_from_slab = ( - adsorbate[0].position - adsorbate_slab[slab_center_indices[0]].position - ) - - # Add the height of the adsorbate from the slab along the z-direction relative to the first center atom of the slab as defined in the slab_center_indices - adsorbate_com_z_disp = ( - adsorbate_com[2] - adsorbate_slab[slab_center_indices[0]].position[2] - ) - center_position = ( - np.array([0.0, 0.0, adsorbate_com_z_disp]) - + slab_center_position - - adsorbate_slab[slab_center_indices[0]].position - ) - - return ( - adsorbate, - slab, - slab_center_idx, - center_position, - adsorbate_vector_from_slab, - ) - - -@requires(has_chemshell, "ChemShell is not installed") -def generate_chemshell_cluster( - slab: Atoms, - slab_center_idx: int, - atom_oxi_states: dict[str, float], - filepath: str | Path, - chemsh_radius_active: float = 40.0, - chemsh_radius_cluster: float = 60.0, - chemsh_bq_layer: float = 6.0, - write_xyz_file: bool = False, -) -> None: - """ - Run ChemShell to create an embedded cluster from a slab. - - Parameters - ---------- - slab - The Atoms object of the slab. - slab_center_idx - The index of the (first) atom at the center of the slab, this index corresponds to the atom in the slab_center_idx list but adjusted for the slab (which does not contain the adsorbate atoms) + The indices of the atoms that make up the 'center' of the slab right beneath the adsorbate. + slab_indices + The indices of the atoms that make up the slab. atom_oxi_states - The oxidation states of the atoms in the slab as a dictionary - filepath - The location where the ChemShell output files will be written. - chemsh_radius_active - The radius of the active region in Angstroms. This 'active' region is simply region where the charge fitting is performed to ensure correct Madelung potential; it can be a relatively large value. - chemsh_radius_cluster - The radius of the total embedded cluster in Angstroms. - chemsh_bq_layer - The height above the surface to place some additional fitting point charges in Angstroms; simply for better reproduction of the electrostatic potential close to the adsorbate. - write_xyz_file - Whether to write an XYZ file of the cluster for visualisation. - - Returns - ------- - None + A dictionary with the element symbol as the key and its oxidation state as the value. + adsorbate_slab_file + The path to the file containing the adsorbate molecule on the surface slab. It can be in any format that ASE can read. + pun_file + The path to the .pun file containing the atomic coordinates and charges of the adsorbate-slab complex. This file should be generated by ChemShell. If it is None, then ChemShell wil be used to create this file. + adsorbate + The ASE Atoms object containing the atomic coordinates of the adsorbate. + slab + The ASE Atoms object containing the atomic coordinates of the slab. + adsorbate_slab + The ASE Atoms object containing the atomic coordinates of the adsorbate-slab complex. + adsorbate_slab_embedded_cluster + The ASE Atoms object containing the atomic coordinates, atomic charges and atom type (i.e., point charge or cation/anion) from the .pun file for the embedded cluster of the adsorbate-slab complex. + slab_embedded_cluster + The ASE Atoms object containing the atomic coordinates, atomic charges and atom type (i.e., point charge or cation/anion) from the .pun file for the embedded cluster of the slab. + quantum_cluster_indices_set + A list of lists of indices of the atoms in the set of quantum clusters created by the SKZCAM protocol + ecp_region_indices_set + A list of lists of indices of the atoms in the ECP region for the set of quantum clusters created by the SKZCAM protocol """ - from chemsh.io.tools import convert_atoms_to_frag - - # Translate slab such that first Mg atom is at 0,0,0 - slab.translate(-slab.get_positions()[slab_center_idx]) - - # Convert ASE Atoms to ChemShell Fragment object - slab_frag = convert_atoms_to_frag(slab, connect_mode="ionic", dim="2D") - - # Add the atomic charges to the fragment - slab_frag.addCharges(atom_oxi_states) - - # Create the chemshell cluster (i.e., add electrostatic fitting charges) from the fragment - chemsh_embedded_cluster = slab_frag.construct_cluster( - origin=slab_center_idx, - radius_cluster=chemsh_radius_cluster / Bohr, - radius_active=chemsh_radius_active / Bohr, - bq_layer=chemsh_bq_layer / Bohr, - adjust_charge="coordination_scaled", - ) - - # Save the final cluster to a .pun file - chemsh_embedded_cluster.save(filename=Path(filepath).with_suffix(".pun"), fmt="pun") - - if write_xyz_file: - # XYZ for visualisation - chemsh_embedded_cluster.save( - filename=Path(filepath).with_suffix(".xyz"), fmt="xyz" - ) + def __init__( + self, + adsorbate_indices: list[int], + slab_center_indices: list[int], + atom_oxi_states: dict[str, int], + adsorbate_slab_file: str | Path | None = None, + pun_file: str | Path | None = None, + ) -> None: + """ + Parameters + ---------- + adsorbate_indices + The indices of the atoms that make up the adsorbate molecule. + slab_center_indices + The indices of the atoms that make up the 'center' of the slab right beneath the adsorbate. + atom_oxi_states + A dictionary with the element symbol as the key and its oxidation state as the value. + adsorbate_slab_file + The path to the file containing the adsorbate molecule on the surface slab. It can be in any format that ASE can read. + pun_file + The path to the .pun file containing the atomic coordinates and charges of the adsorbate-slab complex. This file should be generated by ChemShell. If it is None, then ChemShell wil be used to create this file. + + Returns + ------- + None + """ + + self.adsorbate_indices = adsorbate_indices + self.slab_center_indices = slab_center_indices + self.slab_indices = None # This will be set later + self.atom_oxi_states = atom_oxi_states + self.adsorbate_slab_file = adsorbate_slab_file + self.pun_file = pun_file + + # Check that the adsorbate_indices and slab_center_indices are not the same + if any(x in self.adsorbate_indices for x in self.slab_center_indices): + raise ValueError( + "The adsorbate and slab center indices cannot be the same." + ) -def create_skzcam_clusters( - pun_file: str | Path, - center_position: NDArray, - atom_oxi_states: dict[str, float], - shell_max: int = 10, - shell_width: float = 0.1, - bond_dist: float = 2.5, - ecp_dist: float = 6.0, - write_clusters: bool = False, - write_clusters_path: str | Path = ".", - write_include_ecp: bool = False, -) -> tuple[Atoms, list[list[int]], list[list[int]]]: - """ - From a provided .pun file (generated by ChemShell), this function creates quantum clusters using the SKZCAM protocol. It will return the embedded cluster Atoms object and the indices of the atoms in the quantum clusters and the ECP region. The number of clusters created is controlled by the rdf_max parameter. + # Check that the adsorbate_slab_file and pun_file are not both None + if self.adsorbate_slab_file is None and self.pun_file is None: + raise ValueError( + "Either the adsorbate_slab_file or pun_file must be provided." + ) - Parameters - ---------- - pun_file - The path to the .pun file created by ChemShell to be read. - center_position - The position of the center of the embedded cluster (i.e., position of the adsorbate). - atom_oxi_states - A dictionary containing the atomic symbols as keys and the oxidation states as values. - shell_max - The maximum number of quantum clusters to be created. - shell_width - Defines the distance between atoms within shells; this is the maximum distance between any two atoms within the shell. - bond_dist - The distance within which an anion is considered to be coordinating a cation. - ecp_dist - The distance from edges of the quantum cluster to define the ECP region. - write_clusters - If True, the quantum clusters will be written to a file. - write_clusters_path - The path to the file where the quantum clusters will be written. - write_include_ecp - If True, the ECP region will be included in the quantum clusters. + # Initialize the adsorbate, slab and adsorbate_slab Atoms object which contains the adsorbate, slab and adsorbate-slab complex respectively + self.adsorbate: Atoms | None + self.slab: Atoms | None + self.adsorbate_slab: Atoms | None - Returns - ------- - Atoms - The ASE Atoms object containing the atomic coordinates and atomic charges from the .pun file. - list[list[int]] - A list of lists containing the indices of the atoms in each quantum cluster. - list[list[int]] - A list of lists containing the indices of the atoms in the ECP region for each quantum cluster. - """ + # Initialize the embedded_adsorbate_slab_cluster, and embedded_slab_cluster Atoms object which are the embedded cluster for the adsorbate-slab complex and slab respectively + self.adsorbate_slab_embedded_cluster: Atoms | None = None + self.slab_embedded_cluster: Atoms | None = None - # Read the .pun file and create the embedded_cluster Atoms object - embedded_cluster = convert_pun_to_atoms( - pun_file=pun_file, atom_oxi_states=atom_oxi_states - ) + # Initialize the quantum cluster indices and ECP region indices + self.quantum_cluster_indices_set: list[list[int]] | None = None + self.ecp_region_indices_set: list[list[int]] | None = None - # Get distances of all atoms from the cluster center - atom_center_distances = _get_atom_distances( - embedded_cluster=embedded_cluster, center_position=center_position - ) + def convert_slab_to_atoms(self) -> None: + """ + Read the file containing the periodic slab and adsorbate (geometry optimized) and format the resulting Atoms object to be used to create a .pun file in ChemShell. - # Determine the cation shells from the center of the embedded cluster - _, cation_shells_idx = _find_cation_shells( - embedded_cluster=embedded_cluster, - distances=atom_center_distances, - shell_width=shell_width, - ) + Returns + ------- + None + """ - # Create the distance matrix for the embedded cluster - embedded_cluster_all_dist = embedded_cluster.get_all_distances() + # Get the necessary information for the cluster from a provided slab file (in any format that ASE can read) + adsorbate_slab = read(self.adsorbate_slab_file) - # Create the anion coordination list for each cation shell - anion_coord_idx = [] - for shell_idx in range(shell_max): - cation_shell = cation_shells_idx[shell_idx] - anion_coord_idx += [ - _get_anion_coordination( - embedded_cluster, cation_shell, embedded_cluster_all_dist, bond_dist - ) + # Find indices (within adsorbate_slab) of the slab + slab_indices = self.slab_center_indices + [ + i + for i, _ in enumerate(adsorbate_slab) + if i not in (self.adsorbate_indices + self.slab_center_indices) ] - # Create the quantum clusters by summing up the indices of the cations and their coordinating anions - quantum_cluster_indices = [] - dummy_cation_indices = [] - dummy_anion_indices = [] - for shell_idx in range(shell_max): - dummy_cation_indices += cation_shells_idx[shell_idx] - dummy_anion_indices += anion_coord_idx[shell_idx] - quantum_cluster_indices += [ - list(set(dummy_cation_indices + dummy_anion_indices)) - ] + # Create adsorbate and slab from adsorbate_slab + slab = adsorbate_slab[slab_indices] + adsorbate = adsorbate_slab[self.adsorbate_indices] - # Get the ECP region for each quantum cluster - ecp_region_indices = _get_ecp_region( - embedded_cluster=embedded_cluster, - quantum_cluster_indices=quantum_cluster_indices, - dist_matrix=embedded_cluster_all_dist, - ecp_dist=ecp_dist, - ) + adsorbate.translate(-slab[0].position) + slab.translate(-slab[0].position) - # Write the quantum clusters to files - if write_clusters: - for idx in range(len(quantum_cluster_indices)): - quantum_atoms = embedded_cluster[quantum_cluster_indices[idx]] - if write_include_ecp: - ecp_atoms = embedded_cluster[ecp_region_indices[idx]] - ecp_atoms.set_chemical_symbols(np.array(["U"] * len(ecp_atoms))) - cluster_atoms = quantum_atoms + ecp_atoms - else: - cluster_atoms = quantum_atoms - write(Path(write_clusters_path, f"SKZCAM_cluster_{idx}.xyz"), cluster_atoms) + # Get the relative distance of the adsorbate from the first center atom of the slab as defined in the slab_center_indices + adsorbate_vector_from_slab = adsorbate[0].position - slab[0].position - return embedded_cluster, quantum_cluster_indices, ecp_region_indices + # Get the center of the cluster from the slab_center_indices + slab_center_position = slab[ + : len(self.slab_center_indices) + ].get_positions().sum(axis=0) / len(self.slab_center_indices) + # Add the height of the adsorbate from the slab along the z-direction relative to the slab_center + adsorbate_com_z_disp = ( + adsorbate.get_center_of_mass()[2] - slab_center_position[2] + ) -def convert_pun_to_atoms( - pun_file: str | Path, atom_oxi_states: dict[str, float] -) -> Atoms: - """ - Reads a .pun file and returns an ASE Atoms object containing the atomic coordinates, - point charges/oxidation states, and atom types. + center_position = ( + np.array([0.0, 0.0, adsorbate_com_z_disp]) + slab_center_position + ) - Parameters - ---------- - pun_file - The path to the .pun file created by ChemShell to be read. - atom_oxi_states - A dictionary containing the atomic symbols as keys and the oxidation states as values. + self.adsorbate = adsorbate + self.slab = slab + self.adsorbate_slab = adsorbate_slab + self.adsorbate_vector_from_slab = adsorbate_vector_from_slab + self.center_position = center_position + + @requires(has_chemshell, "ChemShell is not installed") + def run_chemshell( + self, + filepath: str | Path, + chemsh_radius_active: float = 40.0, + chemsh_radius_cluster: float = 60.0, + chemsh_bq_layer: float = 6.0, + write_xyz_file: bool = False, + ) -> None: + """ + Run ChemShell to create an embedded cluster from a slab. + + Parameters + ---------- + filepath + The location where the ChemShell output files will be written. + chemsh_radius_active + The radius of the active region in Angstroms. This 'active' region is simply region where the charge fitting is performed to ensure correct Madelung potential; it can be a relatively large value. + chemsh_radius_cluster + The radius of the total embedded cluster in Angstroms. + chemsh_bq_layer + The height above the surface to place some additional fitting point charges in Angstroms; simply for better reproduction of the electrostatic potential close to the adsorbate. + write_xyz_file + Whether to write an XYZ file of the cluster for visualisation. + + Returns + ------- + None + """ + from chemsh.io.tools import convert_atoms_to_frag + + # Convert ASE Atoms to ChemShell Fragment object + slab_frag = convert_atoms_to_frag(self.slab, connect_mode="ionic", dim="2D") + + # Add the atomic charges to the fragment + slab_frag.addCharges(self.atom_oxi_states) + + # Create the chemshell cluster (i.e., add electrostatic fitting charges) from the fragment + chemsh_slab_embedded_cluster = slab_frag.construct_cluster( + origin=0, + radius_cluster=chemsh_radius_cluster / Bohr, + radius_active=chemsh_radius_active / Bohr, + bq_layer=chemsh_bq_layer / Bohr, + adjust_charge="coordination_scaled", + ) - Returns - ------- - Atoms - The ASE Atoms object containing the atomic coordinates and atomic charges from the .pun file. - The `oxi_states` array contains the atomic charges, and the `atom_type` array contains the - atom types (cation, anion, neutral). - """ + # Save the final cluster to a .pun file + chemsh_slab_embedded_cluster.save( + filename=Path(filepath).with_suffix(".pun"), fmt="pun" + ) + self.pun_file = Path(filepath).with_suffix(".pun") - # Create a dictionary containing the atom types and whether they are cations or anions - atom_type_dict = { - atom: "cation" if oxi_state > 0 else "anion" if oxi_state < 0 else "neutral" - for atom, oxi_state in atom_oxi_states.items() - } - - # Load the pun file as a list of strings - with zopen(zpath(Path(pun_file))) as f: - raw_pun_file = [ - line.rstrip().decode("utf-8") if isinstance(line, bytes) else line.rstrip() - for line in f - ] + if write_xyz_file: + # XYZ for visualisation + chemsh_slab_embedded_cluster.save( + filename=Path(filepath).with_suffix(".xyz"), fmt="xyz" + ) - # Get the number of atoms and number of atomic charges in the .pun file - n_atoms = int(raw_pun_file[3].split()[-1]) - n_charges = int(raw_pun_file[4 + n_atoms - 1 + 3].split()[-1]) + def run_skzcam( + self, + shell_max: int = 10, + shell_width: float = 0.1, + bond_dist: float = 2.5, + ecp_dist: float = 6.0, + write_clusters: bool = False, + write_clusters_path: str | Path = ".", + write_include_ecp: bool = False, + ) -> SKZCAMOutput: + """ + From a provided .pun file (generated by ChemShell), this function creates quantum clusters using the SKZCAM protocol. It will return the embedded cluster Atoms object and the indices of the atoms in the quantum clusters and the ECP region. The number of clusters created is controlled by the rdf_max parameter. + + Parameters + ---------- + shell_max + The maximum number of quantum clusters to be created. + shell_width + Defines the distance between atoms within shells; this is the maximum distance between any two atoms within the shell. + bond_dist + The distance within which an anion is considered to be coordinating a cation. + ecp_dist + The distance from edges of the quantum cluster to define the ECP region. + write_clusters + If True, the quantum clusters will be written to a file. + write_clusters_path + The path to the file where the quantum clusters will be written. + write_include_ecp + If True, the ECP region will be included in the quantum clusters. + + Returns + ------- + None + """ + + # Read the .pun file and create the embedded_cluster Atoms object + self.slab_embedded_cluster = self._convert_pun_to_atoms(pun_file=self.pun_file) + + # Get distances of all atoms from the cluster center + atom_center_distances = _get_atom_distances( + atoms=self.slab_embedded_cluster, center_position=self.center_position + ) - # Check if number of atom charges same as number of atom positions - if n_atoms != n_charges: - raise ValueError( - "Number of atomic positions and atomic charges in the .pun file are not the same." + # Determine the cation shells from the center of the embedded cluster + _, cation_shells_idx = self._find_cation_shells( + slab_embedded_cluster=self.slab_embedded_cluster, + distances=atom_center_distances, + shell_width=shell_width, ) - raw_atom_positions = raw_pun_file[4 : 4 + n_atoms] - raw_charges = raw_pun_file[7 + n_atoms : 7 + 2 * n_atoms] - charges = [float(charge) for charge in raw_charges] - - # Add the atomic positions the embedded_cluster Atoms object (converting from Bohr to Angstrom) - atom_types = [] - atom_numbers = [] - atom_positions = [] - for _, line in enumerate(raw_atom_positions): - line_info = line.split() - - # Add the atom type to the atom_type_list - if line_info[0] in atom_type_dict: - atom_types.append(atom_type_dict[line_info[0]]) - elif line_info[0] == "F": - atom_types.append("pc") - else: - atom_types.append("unknown") - - # Add the atom number to the atom_number_list and position to the atom_position_list - atom_numbers += [atomic_numbers[line_info[0]]] - atom_positions += [ - [ - float(line_info[1]) * Bohr, - float(line_info[2]) * Bohr, - float(line_info[3]) * Bohr, + # Create the distance matrix for the embedded cluster + slab_embedded_cluster_all_dist = self.slab_embedded_cluster.get_all_distances() + + # Create the anion coordination list for each cation shell + anion_coord_idx = [] + for shell_idx in range(shell_max): + shell_indices = cation_shells_idx[shell_idx] + anion_coord_idx += [ + self._get_anion_coordination( + slab_embedded_cluster=self.slab_embedded_cluster, + cation_shell_indices=shell_indices, + dist_matrix=slab_embedded_cluster_all_dist, + bond_dist=bond_dist, + ) ] - ] - embedded_cluster = Atoms(numbers=atom_numbers, positions=atom_positions) + # Create the quantum clusters by summing up the indices of the cations and their coordinating anions + slab_quantum_cluster_indices_set = [] + dummy_cation_indices = [] + dummy_anion_indices = [] + for shell_idx in range(shell_max): + dummy_cation_indices += cation_shells_idx[shell_idx] + dummy_anion_indices += anion_coord_idx[shell_idx] + slab_quantum_cluster_indices_set += [ + list(set(dummy_cation_indices + dummy_anion_indices)) + ] - # Center the embedded cluster so that atom index 0 is at the [0, 0, 0] position - embedded_cluster.translate(-embedded_cluster[0].position) + # Get the ECP region for each quantum cluster + slab_ecp_region_indices_set = self._get_ecp_region( + slab_embedded_cluster=self.slab_embedded_cluster, + quantum_cluster_indices_set=slab_quantum_cluster_indices_set, + dist_matrix=slab_embedded_cluster_all_dist, + ecp_dist=ecp_dist, + ) - # Add the `oxi_states` and `atom_type` arrays to the Atoms object - embedded_cluster.set_array("oxi_states", np.array(charges)) - embedded_cluster.set_array("atom_type", np.array(atom_types)) + # Create the adsorbate_slab_embedded_cluster from slab_embedded_cluster and adsorbate atoms objects. This also sets the final quantum_cluster_indices_set and ecp_region_indices_set for the adsorbate_slab_embedded_cluster + self._create_adsorbate_slab_embedded_cluster( + quantum_cluster_indices_set=slab_quantum_cluster_indices_set, + ecp_region_indices_set=slab_ecp_region_indices_set, + ) - return embedded_cluster + # Write the quantum clusters to files + if write_clusters: + for idx in range(len(self.quantum_cluster_indices_set)): + quantum_atoms = self.adsorbate_slab_embedded_cluster[ + self.quantum_cluster_indices_set[idx] + ] + if write_include_ecp: + ecp_atoms = self.adsorbate_slab_embedded_cluster[ + self.ecp_region_indices_set[idx] + ] + ecp_atoms.set_chemical_symbols(np.array(["U"] * len(ecp_atoms))) + cluster_atoms = quantum_atoms + ecp_atoms + else: + cluster_atoms = quantum_atoms + write( + Path(write_clusters_path, f"SKZCAM_cluster_{idx}.xyz"), + cluster_atoms, + ) + return { + "adsorbate_slab_embedded_cluster": self.adsorbate_slab_embedded_cluster, + "quantum_cluster_indices_set": self.quantum_cluster_indices_set, + "ecp_region_indices_set": self.ecp_region_indices_set, + } -def insert_adsorbate_to_embedded_cluster( - embedded_cluster: Atoms, - adsorbate: Atoms, - adsorbate_vector_from_slab: NDArray, - quantum_cluster_indices: list[list[int]] | None = None, - ecp_region_indices: list[list[int]] | None = None, -) -> tuple[Atoms, list[list[int]], list[list[int]]]: - """ - Insert the adsorbate into the embedded cluster and update the quantum cluster and ECP region indices. + def _convert_pun_to_atoms(self, pun_file: str | Path) -> Atoms: + """ + Reads a .pun file and returns an ASE Atoms object containing the atomic coordinates, + point charges/oxidation states, and atom types. + + Parameters + ---------- + pun_file + The path to the .pun file created by ChemShell to be read. + + Returns + ------- + Atoms + The ASE Atoms object containing the atomic coordinates and atomic charges from the .pun file. + The `oxi_states` array contains the atomic charges, and the `atom_type` array contains the + atom types (cation, anion, neutral). + """ + + # Create a dictionary containing the atom types and whether they are cations or anions + atom_type_dict = { + atom: "cation" if oxi_state > 0 else "anion" if oxi_state < 0 else "neutral" + for atom, oxi_state in self.atom_oxi_states.items() + } - Parameters - ---------- - embedded_cluster - The ASE Atoms object containing the atomic coordinates and atomic charges from the .pun file. - adsorbate - The ASE Atoms object of the adsorbate molecule. - adsorbate_vector_from_slab - The vector from the first atom of the embedded cluster to the center of mass of the adsorbate. - quantum_cluster_indices - A list of lists containing the indices of the atoms in each quantum cluster. - ecp_region_indices - A list of lists containing the indices of the atoms in the ECP region for each quantum cluster. + # Load the pun file as a list of strings + with zopen(zpath(Path(pun_file))) as f: + raw_pun_file = [ + line.rstrip().decode("utf-8") + if isinstance(line, bytes) + else line.rstrip() + for line in f + ] - Returns - ------- - Atoms - The ASE Atoms object containing the adsorbate and embedded cluster - list[list[int]] - A list of lists containing the indices of the atoms in each quantum cluster. - list[list[int]] - A list of lists containing the indices of the atoms in the ECP region for each quantum cluster. - """ + # Get the number of atoms and number of atomic charges in the .pun file + n_atoms = int(raw_pun_file[3].split()[-1]) + n_charges = int(raw_pun_file[4 + n_atoms - 1 + 3].split()[-1]) - # Remove PBC from the adsorbate - adsorbate.set_pbc(False) + # Check if number of atom charges same as number of atom positions + if n_atoms != n_charges: + raise ValueError( + "Number of atomic positions and atomic charges in the .pun file are not the same." + ) - # Translate the adsorbate to the correct position relative to the slab - adsorbate.translate(-adsorbate[0].position + adsorbate_vector_from_slab) + raw_atom_positions = raw_pun_file[4 : 4 + n_atoms] + raw_charges = raw_pun_file[7 + n_atoms : 7 + 2 * n_atoms] + charges = [float(charge) for charge in raw_charges] + + # Add the atomic positions the embedded_cluster Atoms object (converting from Bohr to Angstrom) + atom_types = [] + atom_numbers = [] + atom_positions = [] + for _, line in enumerate(raw_atom_positions): + line_info = line.split() + + # Add the atom type to the atom_type_list + if line_info[0] in atom_type_dict: + atom_types.append(atom_type_dict[line_info[0]]) + elif line_info[0] == "F": + atom_types.append("pc") + else: + atom_types.append("unknown") + + # Add the atom number to the atom_number_list and position to the atom_position_list + atom_numbers += [atomic_numbers[line_info[0]]] + atom_positions += [ + [ + float(line_info[1]) * Bohr, + float(line_info[2]) * Bohr, + float(line_info[3]) * Bohr, + ] + ] - # Set oxi_state and atom_type arrays for the adsorbate - adsorbate.set_array("oxi_states", np.array([0.0] * len(adsorbate))) - adsorbate.set_array("atom_type", np.array(["adsorbate"] * len(adsorbate))) + slab_embedded_cluster = Atoms(numbers=atom_numbers, positions=atom_positions) + + # Center the embedded cluster so that atom index 0 is at the [0, 0, 0] position + slab_embedded_cluster.translate(-slab_embedded_cluster[0].position) + + # Add the `oxi_states` and `atom_type` arrays to the Atoms object + slab_embedded_cluster.set_array("oxi_states", np.array(charges)) + slab_embedded_cluster.set_array("atom_type", np.array(atom_types)) + + return slab_embedded_cluster + + def _create_adsorbate_slab_embedded_cluster( + self, + quantum_cluster_indices_set: list[list[int]] | None = None, + ecp_region_indices_set: list[list[int]] | None = None, + ) -> None: + """ + Insert the adsorbate into the embedded cluster and update the quantum cluster and ECP region indices. + + Parameters + ---------- + quantum_cluster_indices_set + A list of lists containing the indices of the atoms in each quantum cluster. + ecp_region_indices_set + A list of lists containing the indices of the atoms in the ECP region for each quantum cluster. + + Returns + ------- + None + """ + + # Remove PBC from the adsorbate + self.adsorbate.set_pbc(False) + + # Translate the adsorbate to the correct position relative to the slab + self.adsorbate.translate( + self.slab_embedded_cluster[0].position + - self.adsorbate[0].position + + self.adsorbate_vector_from_slab + ) - # Add the adsorbate to the embedded cluster - embedded_adsorbate_cluster = adsorbate + embedded_cluster + # Set oxi_state and atom_type arrays for the adsorbate + self.adsorbate.set_array("oxi_states", np.array([0.0] * len(self.adsorbate))) + self.adsorbate.set_array( + "atom_type", np.array(["adsorbate"] * len(self.adsorbate)) + ) - # Update the quantum cluster and ECP region indices - if quantum_cluster_indices is not None: - quantum_cluster_indices = [ - list(range(len(adsorbate))) + [idx + len(adsorbate) for idx in cluster] - for cluster in quantum_cluster_indices - ] - if ecp_region_indices is not None: - ecp_region_indices = [ - [idx + len(adsorbate) for idx in cluster] for cluster in ecp_region_indices - ] + # Add the adsorbate to the embedded cluster + self.adsorbate_slab_embedded_cluster = ( + self.adsorbate + self.slab_embedded_cluster + ) - return embedded_adsorbate_cluster, quantum_cluster_indices, ecp_region_indices + # Update the quantum cluster and ECP region indices + if quantum_cluster_indices_set is not None: + quantum_cluster_indices_set = [ + list(range(len(self.adsorbate))) + + [idx + len(self.adsorbate) for idx in cluster] + for cluster in quantum_cluster_indices_set + ] + if ecp_region_indices_set is not None: + ecp_region_indices_set = [ + [idx + len(self.adsorbate) for idx in cluster] + for cluster in ecp_region_indices_set + ] + self.quantum_cluster_indices_set = quantum_cluster_indices_set + self.ecp_region_indices_set = ecp_region_indices_set + + def _find_cation_shells( + self, slab_embedded_cluster: Atoms, distances: NDArray, shell_width: float = 0.1 + ) -> tuple[list[list[int]], list[list[int]]]: + """ + Returns a list of lists containing the indices of the cations in each shell, based on distance from the embedded cluster center. + This is achieved by clustering the data based on the DBSCAN clustering algorithm. + + Parameters + ---------- + slab_embedded_cluster + The ASE Atoms object containing the atomic coordinates AND the atom types (i.e. cation or anion). + distances + The distance of atoms from the cluster center. + shell_width + Defines the distance between atoms within shells; this is the maximum distance between any two atoms within the shell + + Returns + ------- + list[list[int]] + A list of lists containing the distance of the cation in each shell from the adsorbate. + list[list[int]] + A list of lists containing the indices of the cations in each shell. + """ + + # Define the empty list to store the cation shells + shells_distances = [] + shells_indices = [] + + # Sort the points by distance from the cluster center for the cations only + distances_sorted = [] + distances_sorted_indices = [] + for i in np.argsort(distances): + if slab_embedded_cluster.get_array("atom_type")[i] == "cation": + distances_sorted.append(distances[i]) + distances_sorted_indices.append(i) + + current_point = distances_sorted[0] + current_shell = [current_point] + current_shell_idx = [distances_sorted_indices[0]] + + for idx, point in enumerate(distances_sorted[1:]): + if point <= current_point + shell_width: + current_shell.append(point) + current_shell_idx.append(distances_sorted_indices[idx + 1]) + else: + shells_distances.append(current_shell) + shells_indices.append(current_shell_idx) + current_shell = [point] + current_shell_idx = [distances_sorted_indices[idx + 1]] + current_point = point + shells_distances.append(current_shell) + shells_indices.append(current_shell_idx) + + return shells_distances, shells_indices + + def _get_anion_coordination( + self, + slab_embedded_cluster: Atoms, + cation_shell_indices: list[int], + dist_matrix: NDArray, + bond_dist: float = 2.5, + ) -> list[int]: + """ + Returns a list of lists containing the indices of the anions coordinating the cation indices provided. + + Parameters + ---------- + slab_embedded_cluster + The ASE Atoms object containing the atomic coordinates AND the atom types (i.e. cation or anion). + cation_shell_indices + A list of the indices of the cations in the cluster. + dist_matrix + A matrix containing the distances between each pair of atoms in the embedded cluster. + bond_dist + The distance within which an anion is considered to be coordinating a cation. + + Returns + ------- + list[int] + A list containing the indices of the anions coordinating the cation indices. + """ + + # Define the empty list to store the anion coordination + anion_coord_indices = [] + + # Iterate over the cation shell indices and find the atoms within the bond distance of each cation + for atom_idx in cation_shell_indices: + anion_coord_indices += [ + idx + for idx, dist in enumerate(dist_matrix[atom_idx]) + if ( + dist < bond_dist + and slab_embedded_cluster.get_array("atom_type")[idx] == "anion" + ) + ] -def _get_atom_distances(embedded_cluster: Atoms, center_position: NDArray) -> NDArray: + return list(set(anion_coord_indices)) + + def _get_ecp_region( + self, + slab_embedded_cluster: Atoms, + quantum_cluster_indices_set: list[int], + dist_matrix: NDArray, + ecp_dist: float = 6.0, + ) -> list[list[int]]: + """ + Returns a list of lists containing the indices of the atoms in the ECP region of the embedded cluster for each quantum cluster + + Parameters + ---------- + slab_embedded_cluster + The ASE Atoms object containing the atomic coordinates AND the atom types (i.e. cation or anion). + quantum_cluster_indices_set + A list of lists containing the indices of the atoms in each quantum cluster. + dist_matrix + A matrix containing the distances between each pair of atoms in the embedded cluster. + ecp_dist + The distance from edges of the quantum cluster to define the ECP region. + + Returns + ------- + list[list[int]] + A list of lists containing the indices of the atoms in the ECP region for each quantum cluster. + """ + + ecp_region_indices_set = [] + dummy_cation_indices = [] + + # Iterate over the quantum clusters and find the atoms within the ECP distance of each quantum cluster + for cluster in quantum_cluster_indices_set: + dummy_cation_indices += cluster + cluster_ecp_region_idx = [] + for atom_idx in dummy_cation_indices: + for idx, dist in enumerate(dist_matrix[atom_idx]): + # Check if the atom is within the ecp_dist region and is not in the quantum cluster and is a cation + if ( + dist < ecp_dist + and idx not in dummy_cation_indices + and slab_embedded_cluster.get_array("atom_type")[idx] + == "cation" + ): + cluster_ecp_region_idx += [idx] + + ecp_region_indices_set += [list(set(cluster_ecp_region_idx))] + + return ecp_region_indices_set + + +def _get_atom_distances(atoms: Atoms, center_position: NDArray) -> NDArray: """ Returns the distance of all atoms from the center position of the embedded cluster @@ -1275,151 +1415,48 @@ def _get_atom_distances(embedded_cluster: Atoms, center_position: NDArray) -> ND An array containing the distances of each atom in the Atoms object from the cluster center. """ - return np.array( - [np.linalg.norm(atom.position - center_position) for atom in embedded_cluster] - ) - - -def _find_cation_shells( - embedded_cluster: Atoms, distances: NDArray, shell_width: float = 0.1 -) -> list[list[int]]: - """ - Returns a list of lists containing the indices of the cations in each shell, based on distance from the embedded cluster center. - This is achieved by clustering the data based on the DBSCAN clustering algorithm. - - Parameters - ---------- - embedded_cluster - The ASE Atoms object containing the atomic coordinates AND the atom types (i.e. cation or anion). - distances - The distance of atoms from the cluster center. - shell_width - Defines the distance between atoms within shells; this is the maximum distance between any two atoms within the shell + return np.array([np.linalg.norm(atom.position - center_position) for atom in atoms]) - Returns - ------- - list[list[int]] - A list of lists containing the indices of the cations in each shell. - """ - - # Define the empty list to store the cation shells - shells = [] - shells_indices = [] - - # Sort the points by distance from the cluster center for the cations only - distances_sorted = [] - distances_sorted_indices = [] - for i in np.argsort(distances): - if embedded_cluster.get_array("atom_type")[i] == "cation": - distances_sorted.append(distances[i]) - distances_sorted_indices.append(i) - - current_point = distances_sorted[0] - current_shell = [current_point] - current_shell_idx = [distances_sorted_indices[0]] - - for idx, point in enumerate(distances_sorted[1:]): - if point <= current_point + shell_width: - current_shell.append(point) - current_shell_idx.append(distances_sorted_indices[idx + 1]) - else: - shells.append(current_shell) - shells_indices.append(current_shell_idx) - current_shell = [point] - current_shell_idx = [distances_sorted_indices[idx + 1]] - current_point = point - shells.append(current_shell) - shells_indices.append(current_shell_idx) - - return shells, shells_indices - - -def _get_anion_coordination( - embedded_cluster: Atoms, - cation_shell_indices: list[int], - dist_matrix: NDArray, - bond_dist: float = 2.5, -) -> list[int]: - """ - Returns a list of lists containing the indices of the anions coordinating the cation indices provided. - - Parameters - ---------- - embedded_cluster - The ASE Atoms object containing the atomic coordinates AND the atom types (i.e. cation or anion). - cation_shell_indices - A list of the indices of the cations in the cluster. - dist_matrix - A matrix containing the distances between each pair of atoms in the embedded cluster. - bond_dist - The distance within which an anion is considered to be coordinating a cation. - Returns - ------- - list[int] - A list containing the indices of the anions coordinating the cation indices. - """ - - # Define the empty list to store the anion coordination - anion_coord_indices = [] - - # Iterate over the cation shell indices and find the atoms within the bond distance of each cation - for atom_idx in cation_shell_indices: - anion_coord_indices += [ - idx - for idx, dist in enumerate(dist_matrix[atom_idx]) - if ( - dist < bond_dist - and embedded_cluster.get_array("atom_type")[idx] == "anion" - ) - ] - - return list(set(anion_coord_indices)) - - -def _get_ecp_region( - embedded_cluster: Atoms, - quantum_cluster_indices: list[int], - dist_matrix: NDArray, - ecp_dist: float = 6.0, -) -> list[list[int]]: +def create_atom_coord_string( + atom: Atom, + is_ghost_atom: bool = False, + atom_ecp_info: str | None = None, + pc_charge: float | None = None, +) -> str: """ - Returns a list of lists containing the indices of the atoms in the ECP region of the embedded cluster for each quantum cluster + Creates a string containing the Atom symbol and coordinates for both MRCC and ORCA, with additional information for atoms in the ECP region as well as ghost atoms. Parameters ---------- - embedded_cluster - The ASE Atoms object containing the atomic coordinates AND the atom types (i.e. cation or anion). - quantum_cluster_indices - A list of lists containing the indices of the atoms in each quantum cluster. - dist_matrix - A matrix containing the distances between each pair of atoms in the embedded cluster. - ecp_dist - The distance from edges of the quantum cluster to define the ECP region. + atom + The ASE Atom (not Atoms) object containing the atomic coordinates. + is_ghost_atom + If True, then the atom is a ghost atom. + atom_ecp_info + If not None, then assume this is an atom in the ECP region and adds the ECP info. + pc_charge + The point charge value for the ECP region atom. Returns ------- - list[list[int]] - A list of lists containing the indices of the atoms in the ECP region for each quantum cluster. + str + The atom symbol and coordinates in the ORCA input file format. """ - ecp_region_indices = [] - dummy_cation_indices = [] + # If ecp_info is not None and ghost_atom is True, raise an error + if atom_ecp_info and is_ghost_atom: + raise ValueError("ECP info cannot be provided for ghost atoms.") - # Iterate over the quantum clusters and find the atoms within the ECP distance of each quantum cluster - for cluster in quantum_cluster_indices: - dummy_cation_indices += cluster - cluster_ecp_region_idx = [] - for atom_idx in dummy_cation_indices: - for idx, dist in enumerate(dist_matrix[atom_idx]): - # Check if the atom is within the ecp_dist region and is not in the quantum cluster and is a cation - if ( - dist < ecp_dist - and idx not in dummy_cation_indices - and embedded_cluster.get_array("atom_type")[idx] == "cation" - ): - cluster_ecp_region_idx += [idx] + # Check that pc_charge is a float if atom_ecp_info is not None + if atom_ecp_info and pc_charge is None: + raise ValueError("Point charge value must be given for atoms with ECP info.") - ecp_region_indices += [list(set(cluster_ecp_region_idx))] + if is_ghost_atom: + atom_coord_str = f"{(atom.symbol + ':').ljust(3)} {' '*16} {atom.position[0]:-16.11f} {atom.position[1]:-16.11f} {atom.position[2]:-16.11f}\n" + elif atom_ecp_info is not None: + atom_coord_str = f"{(atom.symbol + '>').ljust(3)} {pc_charge:-16.11f} {atom.position[0]:-16.11f} {atom.position[1]:-16.11f} {atom.position[2]:-16.11f}\n{atom_ecp_info}" + else: + atom_coord_str = f"{atom.symbol.ljust(3)} {' '*16} {atom.position[0]:-16.11f} {atom.position[1]:-16.11f} {atom.position[2]:-16.11f}\n" - return ecp_region_indices + return atom_coord_str diff --git a/src/quacc/types.py b/src/quacc/types.py index 2556f37c30..e93be954f2 100644 --- a/src/quacc/types.py +++ b/src/quacc/types.py @@ -101,7 +101,6 @@ class MaxwellBoltzmanDistributionKwargs(TypedDict, total=False): # ----------- Atoms handling type hints ----------- - ElementStr = Literal[ "H", "He", @@ -248,6 +247,11 @@ class FindAdsSitesKwargs(TypedDict, total=False): # ----------- Atoms (skzcam) handling type hints ----------- + class SKZCAMOutput(TypedDict): + adsorbate_slab_embedded_cluster: Atoms + quantum_cluster_indices_set: list[list[int]] + ecp_region_indices_set: list[list[int]] + class ElementInfo(TypedDict): core: int basis: str diff --git a/tests/core/atoms/conftest.py b/tests/core/atoms/conftest.py index aec15d8d58..7ac9abca26 100644 --- a/tests/core/atoms/conftest.py +++ b/tests/core/atoms/conftest.py @@ -9,22 +9,27 @@ FILE_DIR = Path(__file__).parent -def mock_generate_chemshell_cluster( - slab, slab_center_idx, atom_oxi_states, filepath, **kwargs -): - with ( - gzip.open( - Path(FILE_DIR, "skzcam_files", "REF_ChemShell_cluster.xyz.gz"), "rb" - ) as f_in, - Path(filepath, "ChemShell_cluster.xyz").open(mode="wb") as f_out, - ): - shutil.copyfileobj(f_in, f_out) +def mock_run_chemshell(*args, filepath=".", write_xyz_file=False, **kwargs): + if write_xyz_file: + with ( + gzip.open( + Path(FILE_DIR, "skzcam_files", "REF_ChemShell_Cluster.xyz.gz"), "rb" + ) as f_in, + Path(filepath).with_suffix(".xyz").open(mode="wb") as f_out, + ): + shutil.copyfileobj(f_in, f_out) + else: + with ( + gzip.open( + Path(FILE_DIR, "skzcam_files", "ChemShell_Cluster.pun.gz"), "rb" + ) as f_in, + Path(filepath).with_suffix(".pun").open(mode="wb") as f_out, + ): + shutil.copyfileobj(f_in, f_out) @pytest.fixture(autouse=True) -def patch_generate_chemshell_cluster(monkeypatch): - from quacc.atoms import skzcam +def patch_run_chemshell(monkeypatch): + from quacc.atoms.skzcam import CreateSKZCAMClusters - monkeypatch.setattr( - skzcam, "generate_chemshell_cluster", mock_generate_chemshell_cluster - ) + monkeypatch.setattr(CreateSKZCAMClusters, "run_chemshell", mock_run_chemshell) diff --git a/tests/core/atoms/skzcam_files/CO_MgO.poscar.gz b/tests/core/atoms/skzcam_files/CO_MgO.poscar.gz new file mode 100644 index 0000000000..09b71f8989 Binary files /dev/null and b/tests/core/atoms/skzcam_files/CO_MgO.poscar.gz differ diff --git a/tests/core/atoms/skzcam_files/mgo_shells_cluster.pun.gz b/tests/core/atoms/skzcam_files/ChemShell_Cluster.pun.gz similarity index 100% rename from tests/core/atoms/skzcam_files/mgo_shells_cluster.pun.gz rename to tests/core/atoms/skzcam_files/ChemShell_Cluster.pun.gz diff --git a/tests/core/atoms/skzcam_files/REF_ChemShell_Cluster.xyz.gz b/tests/core/atoms/skzcam_files/REF_ChemShell_Cluster.xyz.gz new file mode 100644 index 0000000000..dfc72b8cb2 Binary files /dev/null and b/tests/core/atoms/skzcam_files/REF_ChemShell_Cluster.xyz.gz differ diff --git a/tests/core/atoms/skzcam_files/REF_ChemShell_cluster.xyz.gz b/tests/core/atoms/skzcam_files/REF_ChemShell_cluster.xyz.gz deleted file mode 100644 index eb83aef23a..0000000000 Binary files a/tests/core/atoms/skzcam_files/REF_ChemShell_cluster.xyz.gz and /dev/null differ diff --git a/tests/core/atoms/skzcam_files/adsorbate_slab_embedded_cluster.npy.gz b/tests/core/atoms/skzcam_files/adsorbate_slab_embedded_cluster.npy.gz new file mode 100644 index 0000000000..6fbc5fc72c Binary files /dev/null and b/tests/core/atoms/skzcam_files/adsorbate_slab_embedded_cluster.npy.gz differ diff --git a/tests/core/atoms/test_skzcam.py b/tests/core/atoms/test_skzcam.py index 319e9d5be0..d3a145401f 100644 --- a/tests/core/atoms/test_skzcam.py +++ b/tests/core/atoms/test_skzcam.py @@ -1,72 +1,113 @@ from __future__ import annotations +import gzip +import os +from copy import deepcopy from pathlib import Path import numpy as np import pytest from ase import Atoms +from ase.calculators.calculator import compare_atoms from ase.io import read from numpy.testing import assert_allclose, assert_equal from quacc.atoms.skzcam import ( - _find_cation_shells, - _get_anion_coordination, + CreateSKZCAMClusters, + MRCCInputGenerator, + ORCAInputGenerator, _get_atom_distances, - _get_ecp_region, - convert_pun_to_atoms, create_atom_coord_string, - create_mrcc_atomtype_basis, - create_mrcc_eint_blocks, - create_orca_eint_blocks, - create_orca_point_charge_file, - create_skzcam_clusters, - format_ecp_info, - generate_coords_block, - generate_mrcc_basis_ecp_block, - generate_mrcc_coords_block, - generate_mrcc_point_charge_block, - generate_orca_input_preamble, - get_cluster_info_from_slab, - insert_adsorbate_to_embedded_cluster, ) FILE_DIR = Path(__file__).parent @pytest.fixture() -def embedded_cluster(): - return convert_pun_to_atoms( - Path(FILE_DIR, "skzcam_files", "mgo_shells_cluster.pun.gz"), - {"Mg": 2.0, "O": -2.0}, +def skzcam_clusters(): + return CreateSKZCAMClusters( + adsorbate_indices=[0, 1], + slab_center_indices=[32], + atom_oxi_states={"Mg": 2.0, "O": -2.0}, + adsorbate_slab_file=Path(FILE_DIR, "skzcam_files", "CO_MgO.poscar.gz"), + pun_file=None, ) @pytest.fixture() -def embedded_adsorbed_cluster(): - embedded_cluster, quantum_cluster_indices, ecp_region_indices = ( - create_skzcam_clusters( - Path(FILE_DIR, "skzcam_files", "mgo_shells_cluster.pun.gz"), - [0, 0, 2], - {"Mg": 2.0, "O": -2.0}, - shell_max=2, - ecp_dist=3, - write_clusters=False, - ) +def slab_embedded_cluster(skzcam_clusters): + return skzcam_clusters._convert_pun_to_atoms( + pun_file=Path(FILE_DIR, "skzcam_files", "ChemShell_Cluster.pun.gz") ) - adsorbate = Atoms( - "CO", positions=[[0.0, 0.0, 0.0], [0.0, 0.0, 1.128]], pbc=[False, False, False] + + +@pytest.fixture() +def distance_matrix(slab_embedded_cluster): + return slab_embedded_cluster.get_all_distances() + + +@pytest.fixture() +def adsorbate_slab_embedded_cluster(): + with gzip.open( + Path(FILE_DIR, "skzcam_files", "adsorbate_slab_embedded_cluster.npy.gz"), "r" + ) as file: + return np.load(file, allow_pickle=True).item()["atoms"] + + +@pytest.fixture() +def mrcc_input_generator(adsorbate_slab_embedded_cluster, element_info): + return MRCCInputGenerator( + adsorbate_slab_embedded_cluster=adsorbate_slab_embedded_cluster, + quantum_cluster_indices=[0, 1, 2, 3, 4, 5, 6, 7], + ecp_region_indices=[8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24], + element_info=element_info, + include_cp=True, + multiplicities={"adsorbate_slab": 3, "adsorbate": 1, "slab": 2}, ) - embedded_adsorbed_cluster, quantum_cluster_indices, ecp_region_indices = ( - insert_adsorbate_to_embedded_cluster( - embedded_cluster=embedded_cluster, - adsorbate=adsorbate, - adsorbate_vector_from_slab=[0.0, 0.0, 2.0], - quantum_cluster_indices=quantum_cluster_indices, - ecp_region_indices=ecp_region_indices, - ) + +@pytest.fixture() +def orca_input_generator(adsorbate_slab_embedded_cluster, element_info): + pal_nprocs_block = {"nprocs": 1, "maxcore": 5000} + + method_block = {"Method": "hf", "RI": "on", "RunTyp": "Energy"} + + scf_block = { + "HFTyp": "rhf", + "Guess": "MORead", + "MOInp": '"orca_svp_start.gbw"', + "SCFMode": "Direct", + "sthresh": "1e-6", + "AutoTRAHIter": 60, + "MaxIter": 1000, + } + + ecp_info = { + "Mg": """NewECP +N_core 0 +lmax f +s 1 +1 1.732000000 14.676000000 2 +p 1 +1 1.115000000 5.175700000 2 +d 1 +1 1.203000000 -1.816000000 2 +f 1 +1 1.000000000 0.000000000 2 +end""" + } + return ORCAInputGenerator( + adsorbate_slab_embedded_cluster=adsorbate_slab_embedded_cluster, + quantum_cluster_indices=[0, 1, 2, 3, 4, 5, 6, 7], + ecp_region_indices=[8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24], + element_info=element_info, + include_cp=True, + multiplicities={"adsorbate_slab": 3, "adsorbate": 1, "slab": 2}, + pal_nprocs_block=pal_nprocs_block, + method_block=method_block, + scf_block=scf_block, + ecp_info=ecp_info, ) - return embedded_adsorbed_cluster @pytest.fixture() @@ -93,43 +134,87 @@ def element_info(): } -@pytest.fixture() -def distance_matrix(embedded_cluster): - return embedded_cluster.get_all_distances() - - -def test_create_mrcc_eint_blocks(embedded_adsorbed_cluster, element_info): - mrcc_blocks = create_mrcc_eint_blocks( - embedded_adsorbed_cluster=embedded_adsorbed_cluster, +def test_MRCCInputGenerator_init(adsorbate_slab_embedded_cluster, element_info): + # Check what happens if multiplicities is not provided + mrcc_input_generator = MRCCInputGenerator( + adsorbate_slab_embedded_cluster=adsorbate_slab_embedded_cluster, quantum_cluster_indices=[0, 1, 2, 3, 4, 5, 6, 7], ecp_region_indices=[8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24], element_info=element_info, include_cp=True, ) - mrcc_blocks_nocp = create_mrcc_eint_blocks( - embedded_adsorbed_cluster=embedded_adsorbed_cluster, + assert mrcc_input_generator.multiplicities == { + "adsorbate_slab": 1, + "adsorbate": 1, + "slab": 1, + } + + mrcc_input_generator = MRCCInputGenerator( + adsorbate_slab_embedded_cluster=adsorbate_slab_embedded_cluster, quantum_cluster_indices=[0, 1, 2, 3, 4, 5, 6, 7], ecp_region_indices=[8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24], element_info=element_info, - include_cp=False, + include_cp=True, + multiplicities={"adsorbate_slab": 3, "adsorbate": 1, "slab": 2}, + ) + + assert not compare_atoms( + mrcc_input_generator.adsorbate_slab_embedded_cluster, + adsorbate_slab_embedded_cluster, ) + assert_equal(mrcc_input_generator.quantum_cluster_indices, [0, 1, 2, 3, 4, 5, 6, 7]) + assert_equal(mrcc_input_generator.adsorbate_indices, [0, 1]) + assert_equal(mrcc_input_generator.slab_indices, [2, 3, 4, 5, 6, 7]) + assert_equal( + mrcc_input_generator.ecp_region_indices, + [8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24], + ) + assert mrcc_input_generator.element_info == element_info + assert mrcc_input_generator.include_cp is True + assert mrcc_input_generator.multiplicities == { + "adsorbate_slab": 3, + "adsorbate": 1, + "slab": 2, + } + + # Check if error raise if quantum_cluster_indices and ecp_region_indices overlap + + with pytest.raises( + ValueError, match="An atom in the quantum cluster is also in the ECP region." + ): + mrcc_input_generator = MRCCInputGenerator( + adsorbate_slab_embedded_cluster=adsorbate_slab_embedded_cluster, + quantum_cluster_indices=[0, 1, 2, 3, 4, 5, 6, 7], + ecp_region_indices=[7, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24], + element_info=element_info, + include_cp=True, + ) + + +def test_MRCCInputGenerator_generate_input(mrcc_input_generator): + mrcc_input_generator_nocp = deepcopy(mrcc_input_generator) + + mrcc_input_generator_nocp.include_cp = False + mrcc_input_generator_nocp.generate_input() + + mrcc_input_generator.generate_input() reference_block_collated = { "adsorbate_slab": { - "float": [21.0, 2.0, -2.0, -2.0, 2.0, -8.9536039173], + "float": [21.0, -2.0, 2.0, 2.0, 2.0, 0.1474277671], "string": ["basis_sm=atomtype", "def2/JK", "O"], }, "adsorbate": {"float": [8.0], "string": ["basis_sm=atomtype", "3,4,5,6,7,8"]}, "slab": { - "float": [21.0, 2.0, -2.0, -2.0, 2.0, -8.9536039173], + "float": [21.0, -2.0, 2.0, 2.0, 2.0, 0.1474277671], "string": ["basis_sm=atomtype", "def2/JK", "O"], }, } reference_block_nocp_collated = { "adsorbate_slab": { - "float": [21.0, 2.0, -2.0, -2.0, 2.0, -8.9536039173], + "float": [21.0, -2.0, 2.0, 2.0, 2.0, 0.1474277671], "string": ["basis_sm=atomtype", "def2/JK", "O"], }, "adsorbate": {"float": [2.0], "string": ["basis_sm=atomtype"]}, @@ -137,10 +222,10 @@ def test_create_mrcc_eint_blocks(embedded_adsorbed_cluster, element_info): "float": [ 19.0, -4.22049352791, - 0.0, - -6.33028254133, - 10.55242967836, - 30.92617453977, + 4.22049352791, + 4.22049352791, + 2.11024676395, + -0.0, ], "string": ["basis_sm=atomtype", "no-basis-set", "Mg"], }, @@ -158,23 +243,23 @@ def test_create_mrcc_eint_blocks(embedded_adsorbed_cluster, element_info): for system in ["adsorbate_slab", "adsorbate", "slab"]: generated_block_collated[system]["float"] = [ float(x) - for x in mrcc_blocks[system].split() + for x in mrcc_input_generator.mrccblocks[system].split() if x.replace(".", "", 1).replace("-", "", 1).isdigit() ][::300] generated_block_collated[system]["string"] = [ x - for x in mrcc_blocks[system].split() + for x in mrcc_input_generator.mrccblocks[system].split() if not x.replace(".", "", 1).replace("-", "", 1).isdigit() ][::50] generated_block_nocp_collated[system]["float"] = [ float(x) - for x in mrcc_blocks_nocp[system].split() + for x in mrcc_input_generator_nocp.mrccblocks[system].split() if x.replace(".", "", 1).replace("-", "", 1).isdigit() ][::300] generated_block_nocp_collated[system]["string"] = [ x - for x in mrcc_blocks_nocp[system].split() + for x in mrcc_input_generator_nocp.mrccblocks[system].split() if not x.replace(".", "", 1).replace("-", "", 1).isdigit() ][::50] @@ -201,22 +286,13 @@ def test_create_mrcc_eint_blocks(embedded_adsorbed_cluster, element_info): ) -def test_generate_mrcc_basis_ecp_block(embedded_adsorbed_cluster, element_info): - generated_mrcc_blocks = generate_mrcc_basis_ecp_block( - embedded_adsorbed_cluster=embedded_adsorbed_cluster, - quantum_cluster_indices=[0, 1, 2, 3, 4, 5, 6, 7], - ecp_region_indices=[8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24], - element_info=element_info, - include_cp=True, - ) +def test_MRCCInputGenerator_generate_basis_ecp_block(mrcc_input_generator): + mrcc_input_generator_nocp = deepcopy(mrcc_input_generator) - generated_mrcc_blocks_nocp = generate_mrcc_basis_ecp_block( - embedded_adsorbed_cluster=embedded_adsorbed_cluster, - quantum_cluster_indices=[0, 1, 2, 3, 4, 5, 6, 7], - ecp_region_indices=[8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24], - element_info=element_info, - include_cp=False, - ) + mrcc_input_generator_nocp.include_cp = False + mrcc_input_generator_nocp._generate_basis_ecp_block() + + mrcc_input_generator._generate_basis_ecp_block() reference_mrcc_blocks_collated = { "adsorbate_slab": [ @@ -276,12 +352,12 @@ def test_generate_mrcc_basis_ecp_block(embedded_adsorbed_cluster, element_info): system: [] for system in ["adsorbate_slab", "slab", "adsorbate"] } for system in ["adsorbate_slab", "adsorbate", "slab"]: - generated_mrcc_blocks_collated[system] = generated_mrcc_blocks[system].split()[ - ::10 - ] - generated_mrcc_blocks_nocp_collated[system] = generated_mrcc_blocks_nocp[ + generated_mrcc_blocks_collated[system] = mrcc_input_generator.mrccblocks[ system ].split()[::10] + generated_mrcc_blocks_nocp_collated[system] = ( + mrcc_input_generator_nocp.mrccblocks[system].split()[::10] + ) assert_equal( generated_mrcc_blocks_collated[system], @@ -293,24 +369,21 @@ def test_generate_mrcc_basis_ecp_block(embedded_adsorbed_cluster, element_info): ) -def test_create_mrcc_atomtype_basis(embedded_adsorbed_cluster, element_info): - quantum_region = embedded_adsorbed_cluster[[0, 1, 2, 3, 4, 5, 6, 7]] - ecp_region = embedded_adsorbed_cluster[ - [8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24] - ] - - generated_basis_block_without_ecp = create_mrcc_atomtype_basis( - quantum_region=quantum_region, +def test_MRCCInputGenerator_create_atomtype_basis(mrcc_input_generator): + generated_basis_block_without_ecp = mrcc_input_generator._create_atomtype_basis( + quantum_region=mrcc_input_generator.adsorbate_slab_cluster, element_basis_info={ - element: element_info[element]["ri_cwft_basis"] for element in element_info + element: mrcc_input_generator.element_info[element]["ri_cwft_basis"] + for element in mrcc_input_generator.element_info }, ) - generated_basis_block_with_ecp = create_mrcc_atomtype_basis( - quantum_region=quantum_region, + generated_basis_block_with_ecp = mrcc_input_generator._create_atomtype_basis( + quantum_region=mrcc_input_generator.adsorbate_slab_cluster, element_basis_info={ - element: element_info[element]["ri_cwft_basis"] for element in element_info + element: mrcc_input_generator.element_info[element]["ri_cwft_basis"] + for element in mrcc_input_generator.element_info }, - ecp_region=ecp_region, + ecp_region=mrcc_input_generator.ecp_region, ) reference_basis_block_without_ecp = "aug-cc-pVDZ/C\naug-cc-pVDZ/C\ncc-pVDZ/C\naug-cc-pVDZ/C\naug-cc-pVDZ/C\naug-cc-pVDZ/C\naug-cc-pVDZ/C\naug-cc-pVDZ/C\n" @@ -320,37 +393,13 @@ def test_create_mrcc_atomtype_basis(embedded_adsorbed_cluster, element_info): assert generated_basis_block_with_ecp == reference_basis_block_with_ecp -def test_generate_mrcc_coords_block(embedded_adsorbed_cluster, element_info): - # Check if multiplicity is read - - mrcc_blocks = generate_mrcc_coords_block( - embedded_adsorbed_cluster=embedded_adsorbed_cluster, - quantum_cluster_indices=[0, 1, 2, 3, 4, 5, 6, 7], - ecp_region_indices=[8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24], - element_info=element_info, - include_cp=True, - multiplicities={"adsorbate_slab": 3, "adsorbate": 1, "slab": 2}, - ) +def test_MRCCInputGenerator_generate_coords_block(mrcc_input_generator): + mrcc_input_generator_nocp = deepcopy(mrcc_input_generator) - assert mrcc_blocks["adsorbate"].split()[1][-1] == "1" - assert mrcc_blocks["adsorbate_slab"].split()[1][-1] == "3" - assert mrcc_blocks["slab"].split()[1][-1] == "2" - - mrcc_blocks = generate_mrcc_coords_block( - embedded_adsorbed_cluster=embedded_adsorbed_cluster, - quantum_cluster_indices=[0, 1, 2, 3, 4, 5, 6, 7], - ecp_region_indices=[8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24], - element_info=element_info, - include_cp=True, - ) + mrcc_input_generator_nocp.include_cp = False + mrcc_input_generator_nocp._generate_coords_block() - mrcc_blocks_nocp = generate_mrcc_coords_block( - embedded_adsorbed_cluster=embedded_adsorbed_cluster, - quantum_cluster_indices=[0, 1, 2, 3, 4, 5, 6, 7], - ecp_region_indices=[8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24], - element_info=element_info, - include_cp=False, - ) + mrcc_input_generator._generate_coords_block() reference_block_collated = { "adsorbate_slab": { @@ -415,23 +464,23 @@ def test_generate_mrcc_coords_block(embedded_adsorbed_cluster, element_info): for system in ["adsorbate_slab", "adsorbate", "slab"]: generated_block_collated[system]["float"] = [ float(x) - for x in mrcc_blocks[system].split() + for x in mrcc_input_generator.mrccblocks[system].split() if x.replace(".", "", 1).replace("-", "", 1).isdigit() ][::10] generated_block_collated[system]["string"] = [ x - for x in mrcc_blocks[system].split() + for x in mrcc_input_generator.mrccblocks[system].split() if not x.replace(".", "", 1).replace("-", "", 1).isdigit() ][::5] generated_block_nocp_collated[system]["float"] = [ float(x) - for x in mrcc_blocks_nocp[system].split() + for x in mrcc_input_generator_nocp.mrccblocks[system].split() if x.replace(".", "", 1).replace("-", "", 1).isdigit() ][::10] generated_block_nocp_collated[system]["string"] = [ x - for x in mrcc_blocks_nocp[system].split() + for x in mrcc_input_generator_nocp.mrccblocks[system].split() if not x.replace(".", "", 1).replace("-", "", 1).isdigit() ][::5] @@ -458,51 +507,23 @@ def test_generate_mrcc_coords_block(embedded_adsorbed_cluster, element_info): ) -def test_generate_mrcc_point_charge_block(embedded_adsorbed_cluster): - with pytest.raises( - ValueError, match="An atom in the quantum cluster is also in the ECP region." - ): - generate_mrcc_point_charge_block( - embedded_adsorbed_cluster=embedded_adsorbed_cluster, - quantum_cluster_indices=[0, 1, 2, 3, 4, 5, 6, 7], - ecp_region_indices=[7, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24], - ) - - generated_point_charge_block = generate_mrcc_point_charge_block( - embedded_adsorbed_cluster=embedded_adsorbed_cluster, - quantum_cluster_indices=[0, 1, 2, 3, 4, 5, 6, 7], - ecp_region_indices=[8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24], - ).split() +def test_MRCCInputGenerator_generate_point_charge_block(mrcc_input_generator): + generated_point_charge_block = mrcc_input_generator._generate_point_charge_block() generated_point_charge_block_shortened = [ - float(x) for x in generated_point_charge_block[5::70] + float(x) for x in generated_point_charge_block.split()[5::180] ] reference_point_charge_block_shortened = [ -0.04367284424, - 2.12018425659, - -0.04269731856, - 0.0, - -4.26789528527, - -2.11024676395, - -6.37814204923, - -6.32889565859, - -2.14445912302, - 4.22049352791, - -2.14129966123, - 6.32954443328, + -0.03992370948, -2.14923989662, - 0.0, - -4.26789528527, - 8.44098705582, -6.37814204923, - 8.44098705582, + -2.1415520695, -4.26789528527, - -54.86641586281, - -57.14411935233, - 49.48825903601, + -2.1415520695, + -0.03992370948, 0.0, - 43.73621546646, ] assert_allclose( @@ -513,7 +534,7 @@ def test_generate_mrcc_point_charge_block(embedded_adsorbed_cluster): ) -def test_create_orca_eint_blocks(embedded_adsorbed_cluster, element_info): +def test_ORCAInputGenerator_init(adsorbate_slab_embedded_cluster, element_info): pal_nprocs_block = {"nprocs": 1, "maxcore": 5000} method_block = {"Method": "hf", "RI": "on", "RunTyp": "Energy"} @@ -542,752 +563,469 @@ def test_create_orca_eint_blocks(embedded_adsorbed_cluster, element_info): 1 1.000000000 0.000000000 2 end""" } - - orca_blocks = create_orca_eint_blocks( - embedded_adsorbed_cluster=embedded_adsorbed_cluster, + orca_input_generator = ORCAInputGenerator( + adsorbate_slab_embedded_cluster=adsorbate_slab_embedded_cluster, quantum_cluster_indices=[0, 1, 2, 3, 4, 5, 6, 7], ecp_region_indices=[8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24], element_info=element_info, + include_cp=True, pal_nprocs_block=pal_nprocs_block, method_block=method_block, scf_block=scf_block, ecp_info=ecp_info, - include_cp=True, - multiplicities={"adsorbate_slab": 1, "slab": 2, "adsorbate": 3}, - ) - - adsorbate_slab_block_float = [ - float(x) - for x in orca_blocks["adsorbate_slab"].split()[::10] - if x.replace(".", "", 1).replace("-", "", 1).isdigit() - ] - adsorbate_slab_block_string = [ - x - for x in orca_blocks["adsorbate_slab"].split()[::5] - if not x.replace(".", "", 1).replace("-", "", 1).isdigit() - ] - adsorbate_block_float = [ - float(x) - for x in orca_blocks["adsorbate"].split()[::2] - if x.replace(".", "", 1).replace("-", "", 1).isdigit() - ] - adsorbate_block_string = [ - x - for x in orca_blocks["adsorbate"].split()[::2] - if not x.replace(".", "", 1).replace("-", "", 1).isdigit() - ] - slab_block_float = [ - float(x) - for x in orca_blocks["slab"].split()[::10] - if x.replace(".", "", 1).replace("-", "", 1).isdigit() - ] - slab_block_string = [ - x - for x in orca_blocks["slab"].split()[::5] - if not x.replace(".", "", 1).replace("-", "", 1).isdigit() - ] - - # Check that the strings and floats in adsorbate_slab_coords matches reference - assert_allclose( - adsorbate_slab_block_float, - [ - 3.128, - 0.0, - 0.00567209089, - 2.0, - 1.203, - 5.1757, - 1.0, - 2.0, - 1.203, - 5.1757, - 1.0, - 2.0, - 1.203, - 5.1757, - 1.0, - 2.0, - 1.203, - 5.1757, - 1.0, - 2.0, - 1.203, - 5.1757, - 1.0, - 2.0, - 1.203, - 5.1757, - 1.0, - 2.0, - 1.203, - ], - rtol=1e-05, - atol=1e-07, - ) - - assert_equal( - adsorbate_slab_block_string, - [ - "%pal", - "Method", - "Energy", - "NewNCore", - "O", - "NewGTO", - "Mg", - '"aug-cc-pVDZ"', - "end", - "NewAuxJGTO", - "C", - '"cc-pVDZ/C"', - "end", - "Guess", - "Direct", - "MaxIter", - "xyz", - "Charge", - "O", - "Mg>", - "NewECP", - "s", - "Mg>", - "NewECP", - "s", - "Mg>", - "NewECP", - "s", - "Mg>", - "NewECP", - "s", - "Mg>", - "NewECP", - "s", - "Mg>", - "NewECP", - "s", - "Mg>", - "NewECP", - "s", - "Mg>", - "NewECP", - "s", - "Mg>", - "NewECP", - "s", - "Mg>", - "NewECP", - "s", - "Mg>", - "NewECP", - "s", - "Mg>", - "NewECP", - "s", - "Mg>", - "NewECP", - "s", - "end", - ], ) - # Check that the strings and floats in ad_coords matches reference - assert_allclose( - adsorbate_block_float, - [ - 1.0, - 2.0, - 2.0, - 2.0, - 0.0, - 2.0, - 0.0, - 3.128, - 0.0, - 0.0, - -2.12018425659, - 0.00567209089, - 0.0, - 0.00567209089, - 2.12018425659, - 0.00567209089, - 0.0, - 0.00567209089, - 0.0, - -2.14129966123, - ], - rtol=1e-05, - atol=1e-07, - ) + # Check when multiplicities is not provided + assert orca_input_generator.multiplicities == { + "adsorbate_slab": 1, + "adsorbate": 1, + "slab": 1, + } - assert_equal( - adsorbate_block_string, - [ - "%pal", - "%maxcore", - "end", - '"orca.pc"', - "Method", - "RI", - "RunTyp", - "NewNCore", - "NewNCore", - "NewNCore", - "end", - "NewGTO", - '"aug-cc-pVDZ"', - "NewGTO", - '"cc-pVDZ"', - "NewGTO", - '"aug-cc-pVDZ"', - "NewAuxJGTO", - '"def2/J"', - "NewAuxJGTO", - '"def2/J"', - "NewAuxJGTO", - '"def2/JK"', - "NewAuxCGTO", - '"aug-cc-pVDZ/C"', - "NewAuxCGTO", - '"cc-pVDZ/C"', - "NewAuxCGTO", - '"aug-cc-pVDZ/C"', - "end", - "HFTyp", - "Guess", - "MOInp", - "SCFMode", - "sthresh", - "AutoTRAHIter", - "MaxIter", - "end", - "CTyp", - "Mult", - "Units", - "Charge", - "coords", - "end", - ], + orca_input_generator = ORCAInputGenerator( + adsorbate_slab_embedded_cluster=adsorbate_slab_embedded_cluster, + quantum_cluster_indices=[0, 1, 2, 3, 4, 5, 6, 7], + ecp_region_indices=[8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24], + element_info=element_info, + include_cp=True, + multiplicities={"adsorbate_slab": 3, "adsorbate": 1, "slab": 2}, + pal_nprocs_block=pal_nprocs_block, + method_block=method_block, + scf_block=scf_block, + ecp_info=ecp_info, ) - # Check that the strings and floats in slab_coords matches reference - assert_allclose( - slab_block_float, - [ - 3.128, - 0.0, - 0.00567209089, - 2.0, - 1.203, - 5.1757, - 1.0, - 2.0, - 1.203, - 5.1757, - 1.0, - 2.0, - 1.203, - 5.1757, - 1.0, - 2.0, - 1.203, - 5.1757, - 1.0, - 2.0, - 1.203, - 5.1757, - 1.0, - 2.0, - 1.203, - 5.1757, - 1.0, - 2.0, - 1.203, - ], - rtol=1e-05, - atol=1e-07, + assert not compare_atoms( + orca_input_generator.adsorbate_slab_embedded_cluster, + adsorbate_slab_embedded_cluster, ) - + assert_equal(orca_input_generator.quantum_cluster_indices, [0, 1, 2, 3, 4, 5, 6, 7]) + assert_equal(orca_input_generator.adsorbate_indices, [0, 1]) + assert_equal(orca_input_generator.slab_indices, [2, 3, 4, 5, 6, 7]) assert_equal( - slab_block_string, - [ - "%pal", - "Method", - "Energy", - "NewNCore", - "O", - "NewGTO", - "Mg", - '"aug-cc-pVDZ"', - "end", - "NewAuxJGTO", - "C", - '"cc-pVDZ/C"', - "end", - "Guess", - "Direct", - "MaxIter", - "xyz", - "Charge", - "O", - "Mg>", - "NewECP", - "s", - "Mg>", - "NewECP", - "s", - "Mg>", - "NewECP", - "s", - "Mg>", - "NewECP", - "s", - "Mg>", - "NewECP", - "s", - "Mg>", - "NewECP", - "s", - "Mg>", - "NewECP", - "s", - "Mg>", - "NewECP", - "s", - "Mg>", - "NewECP", - "s", - "Mg>", - "NewECP", - "s", - "Mg>", - "NewECP", - "s", - "Mg>", - "NewECP", - "s", - "Mg>", - "NewECP", - "s", - "end", - ], - ) - + orca_input_generator.ecp_region_indices, + [8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24], + ) + assert orca_input_generator.element_info == element_info + assert orca_input_generator.include_cp is True + assert orca_input_generator.multiplicities == { + "adsorbate_slab": 3, + "adsorbate": 1, + "slab": 2, + } -def test_create_atom_coord_string(embedded_adsorbed_cluster): - atom = embedded_adsorbed_cluster[0] + assert orca_input_generator.pal_nprocs_block == pal_nprocs_block + assert orca_input_generator.method_block == method_block + assert orca_input_generator.scf_block == scf_block + assert orca_input_generator.ecp_info == ecp_info - # First let's try the case where it's a normal atom. - atom_coord_string = create_atom_coord_string(atom=atom) + # Check if error raise if quantum_cluster_indices and ecp_region_indices overlap with pytest.raises( - ValueError, match="ECP info cannot be provided for ghost atoms." + ValueError, match="An atom in the quantum cluster is also in the ECP region." ): - create_atom_coord_string( - atom, atom_ecp_info="NewECP\nECP_info1\nECP_info2\n", is_ghost_atom=True + orca_input_generator = ORCAInputGenerator( + adsorbate_slab_embedded_cluster=adsorbate_slab_embedded_cluster, + quantum_cluster_indices=[0, 1, 2, 3, 4, 5, 6, 7], + ecp_region_indices=[7, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24], + element_info=element_info, + include_cp=True, + multiplicities={"adsorbate_slab": 3, "adsorbate": 1, "slab": 2}, + pal_nprocs_block=pal_nprocs_block, + method_block=method_block, + scf_block=scf_block, + ecp_info=ecp_info, ) - with pytest.raises( - ValueError, match="Point charge value must be given for atoms with ECP info." - ): - create_atom_coord_string(atom, atom_ecp_info="NewECP\nECP_info1\nECP_info2\n") - - assert ( - atom_coord_string - == "C 0.00000000000 0.00000000000 2.00000000000\n" - ) - # Let's now try the case where it is a ghost atom. - atom_coord_string = create_atom_coord_string(atom=atom, is_ghost_atom=True) - assert ( - atom_coord_string - == "C: 0.00000000000 0.00000000000 2.00000000000\n" - ) +def test_ORCAInputGenerator_generate_input(orca_input_generator): + orca_input_generator_nocp = deepcopy(orca_input_generator) - # Let's now try the case where it is an atom in the ECP region. - atom_coord_string = create_atom_coord_string( - atom=atom, atom_ecp_info="NewECP\nECP_info1\nECP_info2\n", pc_charge=2.0 - ) - assert ( - atom_coord_string - == "C> 2.00000000000 0.00000000000 0.00000000000 2.00000000000\nNewECP\nECP_info1\nECP_info2\n" - ) + orca_input_generator_nocp.include_cp = False + orca_input_generator_nocp.generate_input() + orca_input_generator.generate_input() -def test_generate_coords_block(embedded_adsorbed_cluster): - ecp_info = { - "Mg": """NewECP -N_core 0 -lmax f -s 1 -1 1.732000000 14.676000000 2 -p 1 -1 1.115000000 5.175700000 2 -d 1 -1 1.203000000 -1.816000000 2 -f 1 -1 1.000000000 0.000000000 2 -end""" + reference_block_collated = { + "adsorbate_slab": { + "float": [1.0, 2.0, 1.0, 0.0, 2.0], + "string": [ + "%pal", + "NewNCore", + "O", + '"aug-cc-pVDZ/C"', + '"orca_svp_start.gbw"', + "O", + "Mg>", + "d", + "f", + "NewECP", + "f", + "s", + "N_core", + "end", + ], + }, + "adsorbate": { + "float": [1.0], + "string": [ + "%pal", + "NewNCore", + "O", + '"aug-cc-pVDZ/C"', + '"orca_svp_start.gbw"', + "O", + ], + }, + "slab": { + "float": [1.0, 2.0, 1.0, 0.0, 2.0], + "string": [ + "%pal", + "NewNCore", + "O", + '"aug-cc-pVDZ/C"', + '"orca_svp_start.gbw"', + "O:", + "Mg>", + "d", + "f", + "NewECP", + "f", + "s", + "N_core", + "end", + ], + }, } - # Confirm that multiplicity given as 1 if not provided - coords_block = generate_coords_block( - embedded_adsorbed_cluster=embedded_adsorbed_cluster, - quantum_cluster_indices=[0, 1, 2, 3, 4, 5, 6, 7], - ecp_region_indices=[8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24], - ecp_info=ecp_info, - ) - - assert coords_block["adsorbate_slab"].split()[4] == "1" - assert coords_block["slab"].split()[4] == "1" - assert coords_block["adsorbate"].split()[4] == "1" + reference_block_nocp_collated = { + "adsorbate_slab": { + "float": [1.0, 2.0, 1.0, 0.0, 2.0], + "string": [ + "%pal", + "NewNCore", + "O", + '"aug-cc-pVDZ/C"', + '"orca_svp_start.gbw"', + "O", + "Mg>", + "d", + "f", + "NewECP", + "f", + "s", + "N_core", + "end", + ], + }, + "adsorbate": { + "float": [1.0], + "string": [ + "%pal", + "NewNCore", + "O", + '"aug-cc-pVDZ/C"', + '"orca_svp_start.gbw"', + "O", + ], + }, + "slab": { + "float": [1.0, 2.0, 2.10705287155, 0.0, 1.0], + "string": [ + "%pal", + "NewNCore", + "O", + '"aug-cc-pVDZ/C"', + '"orca_svp_start.gbw"', + "O", + "N_core", + "end", + "p", + "lmax", + "Mg>", + "d", + "f", + "end", + ], + }, + } - coords_block = generate_coords_block( - embedded_adsorbed_cluster=embedded_adsorbed_cluster, - quantum_cluster_indices=[0, 1, 2, 3, 4, 5, 6, 7], - ecp_region_indices=[8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24], - ecp_info=ecp_info, - include_cp=True, - multiplicities={"adsorbate_slab": 1, "slab": 2, "adsorbate": 3}, - ) + generated_block_collated = { + system: {"float": [], "string": []} + for system in ["adsorbate_slab", "adsorbate", "slab"] + } + generated_block_nocp_collated = { + system: {"float": [], "string": []} + for system in ["adsorbate_slab", "adsorbate", "slab"] + } - # Check that the strings and floats in adsorbate_slab_coords matches reference - adsorbate_slab_coords_shortened_list_floats = [ - float(x) - for x in coords_block["adsorbate_slab"].split()[::10] - if x.replace(".", "", 1).replace("-", "", 1).isdigit() - ] - adsorbate_slab_coords_shortened_list_str = [ - x - for x in coords_block["adsorbate_slab"].split()[::5] - if not x.replace(".", "", 1).replace("-", "", 1).isdigit() - ] - assert_allclose( - adsorbate_slab_coords_shortened_list_floats, - [ - 0.0, - 0.0, - 1.0, - 2.11144262254, - 1.732, - 1.0, - 2.0, - 1.0, - -2.11144262254, - 1.732, - 1.0, - 2.0, - 1.0, - 2.10705287155, - 1.732, - 1.0, - 2.0, - 1.0, - -2.10705287155, - 1.732, - 1.0, - 2.0, - 1.0, - 4.22049352791, - 1.732, - 1.0, - 2.0, - 1.0, - -4.22049352791, - 1.732, - 1.0, - 2.0, - 1.0, - ], - rtol=1e-05, - atol=1e-07, - ) + for system in ["adsorbate_slab", "adsorbate", "slab"]: + generated_block_collated[system]["float"] = [ + float(x) + for x in orca_input_generator.orcablocks[system].split() + if x.replace(".", "", 1).replace("-", "", 1).isdigit() + ][::77] + generated_block_collated[system]["string"] = [ + x + for x in orca_input_generator.orcablocks[system].split() + if not x.replace(".", "", 1).replace("-", "", 1).isdigit() + ][::17] - assert_equal( - adsorbate_slab_coords_shortened_list_str, - [ - "%coords", - "Units", - "C", - "O", - "lmax", - "f", - "lmax", - "f", - "lmax", - "f", - "lmax", - "f", - "lmax", - "f", - "lmax", - "f", - "lmax", - "f", - "lmax", - "f", - "lmax", - "f", - "lmax", - "f", - "lmax", - "f", - "lmax", - "f", - "lmax", - "f", - ], - ) + assert_equal( + reference_block_collated[system]["string"], + generated_block_collated[system]["string"], + ) + assert_allclose( + generated_block_collated[system]["float"], + reference_block_collated[system]["float"], + rtol=1e-05, + atol=1e-07, + ) - # Check that the strings and floats in ad_coords matches reference + generated_block_nocp_collated[system]["float"] = [ + float(x) + for x in orca_input_generator_nocp.orcablocks[system].split() + if x.replace(".", "", 1).replace("-", "", 1).isdigit() + ][::77] + generated_block_nocp_collated[system]["string"] = [ + x + for x in orca_input_generator_nocp.orcablocks[system].split() + if not x.replace(".", "", 1).replace("-", "", 1).isdigit() + ][::17] - adsorbate_coords_shortened_list_floats = [ - float(x) - for x in coords_block["adsorbate"].split()[::2] - if x.replace(".", "", 1).replace("-", "", 1).isdigit() - ] - adsorbate_coords_shortened_list_str = [ - x - for x in coords_block["adsorbate"].split()[::2] - if not x.replace(".", "", 1).replace("-", "", 1).isdigit() - ] + assert_equal( + reference_block_nocp_collated[system]["string"], + generated_block_nocp_collated[system]["string"], + ) + assert_allclose( + generated_block_nocp_collated[system]["float"], + reference_block_nocp_collated[system]["float"], + rtol=1e-05, + atol=1e-07, + ) - assert_allclose( - adsorbate_coords_shortened_list_floats, - [3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.12018425659, 0.0, -2.12018425659, 0.0], - rtol=1e-05, - atol=1e-07, - ) - assert_equal( - adsorbate_coords_shortened_list_str, - [ - "%coords", - "xyz", - "angs", - "C", - "O", - "Mg:", - "O:", - "O:", - "O:", - "O:", - "O:", - "end", - ], - ) +def test_create_atom_coord_string(adsorbate_slab_embedded_cluster): + atom = adsorbate_slab_embedded_cluster[0] - # Check that the strings and floats in slab_coords matches reference - slab_coords_shortened_list_floats = [ - float(x) - for x in coords_block["slab"].split()[::10] - if x.replace(".", "", 1).replace("-", "", 1).isdigit() - ] - slab_coords_shortened_list_str = [ - x - for x in coords_block["slab"].split()[::5] - if not x.replace(".", "", 1).replace("-", "", 1).isdigit() - ] - assert_allclose( - slab_coords_shortened_list_floats, - [ - 0.0, - 0.0, - 1.0, - 2.11144262254, - 1.732, - 1.0, - 2.0, - 1.0, - -2.11144262254, - 1.732, - 1.0, - 2.0, - 1.0, - 2.10705287155, - 1.732, - 1.0, - 2.0, - 1.0, - -2.10705287155, - 1.732, - 1.0, - 2.0, - 1.0, - 4.22049352791, - 1.732, - 1.0, - 2.0, - 1.0, - -4.22049352791, - 1.732, - 1.0, - 2.0, - 1.0, - ], - rtol=1e-05, - atol=1e-07, - ) + # First let's try the case where it's a normal atom. + atom_coord_string = create_atom_coord_string(atom=atom) - assert_equal( - slab_coords_shortened_list_str, - [ - "%coords", - "Units", - "C:", - "O", - "lmax", - "f", - "lmax", - "f", - "lmax", - "f", - "lmax", - "f", - "lmax", - "f", - "lmax", - "f", - "lmax", - "f", - "lmax", - "f", - "lmax", - "f", - "lmax", - "f", - "lmax", - "f", - "lmax", - "f", - "lmax", - "f", - ], - ) + with pytest.raises( + ValueError, match="ECP info cannot be provided for ghost atoms." + ): + create_atom_coord_string( + atom, atom_ecp_info="NewECP\nECP_info1\nECP_info2\n", is_ghost_atom=True + ) - # Also check the case where include_cp is False - coords_block = generate_coords_block( - embedded_adsorbed_cluster=embedded_adsorbed_cluster, - quantum_cluster_indices=[0, 1, 2, 3, 4, 5, 6, 7], - ecp_region_indices=[8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24], - ecp_info=ecp_info, - include_cp=False, - multiplicities={"adsorbate_slab": 1, "slab": 2, "adsorbate": 3}, - ) + with pytest.raises( + ValueError, match="Point charge value must be given for atoms with ECP info." + ): + create_atom_coord_string(atom, atom_ecp_info="NewECP\nECP_info1\nECP_info2\n") - # Check that the strings and floats in ad_coords matches reference - adsorbate_coords_shortened_list_floats = [ - float(x) - for x in coords_block["adsorbate"].split()[::2] - if x.replace(".", "", 1).replace("-", "", 1).isdigit() - ] - adsorbate_coords_shortened_list_str = [ - x - for x in coords_block["adsorbate"].split()[::2] - if not x.replace(".", "", 1).replace("-", "", 1).isdigit() - ] - assert_allclose( - adsorbate_coords_shortened_list_floats, - [3.0, 0.0, 0.0, 0.0], - rtol=1e-05, - atol=1e-07, + assert ( + atom_coord_string + == "C 0.00000000000 0.00000000000 2.00000000000\n" ) - assert_equal( - adsorbate_coords_shortened_list_str, ["%coords", "xyz", "angs", "C", "O", "end"] + # Let's now try the case where it is a ghost atom. + atom_coord_string = create_atom_coord_string(atom=atom, is_ghost_atom=True) + assert ( + atom_coord_string + == "C: 0.00000000000 0.00000000000 2.00000000000\n" ) - # Check that the strings and float in slab_coords matches reference - slab_coords_shortened_list_floats = [ - float(x) - for x in coords_block["slab"].split()[::10] - if x.replace(".", "", 1).replace("-", "", 1).isdigit() - ] - slab_coords_shortened_list_str = [ - x - for x in coords_block["slab"].split()[::5] - if not x.replace(".", "", 1).replace("-", "", 1).isdigit() - ] - assert_allclose( - slab_coords_shortened_list_floats, - [ - 2.12018425659, - -1.816, - 2.0, - 1.0, - 2.0, - 1.0, - -1.816, - 2.0, - 1.0, - 2.0, - 1.0, - -1.816, - 2.0, - 1.0, - 2.0, - 1.0, - -1.816, - 2.0, - 1.0, - 2.0, - 1.0, - -1.816, - 2.0, - 1.0, - 2.0, - 1.0, - -1.816, - 2.0, - 1.0, - 2.0, - 1.0, - -1.816, - ], - rtol=1e-05, - atol=1e-07, + # Let's now try the case where it is an atom in the ECP region. + atom_coord_string = create_atom_coord_string( + atom=atom, atom_ecp_info="NewECP\nECP_info1\nECP_info2\n", pc_charge=2.0 ) - - assert_equal( - slab_coords_shortened_list_str, - [ - "%coords", - "Units", - "Mg", - "O", - "N_core", - "p", - "N_core", - "p", - "N_core", - "p", - "N_core", - "p", - "N_core", - "p", - "N_core", - "p", - "N_core", - "p", - "N_core", - "p", - "N_core", - "p", - "N_core", - "p", - "N_core", - "p", - "N_core", - "p", - "N_core", - "p", - "end", - ], + assert ( + atom_coord_string + == "C> 2.00000000000 0.00000000000 0.00000000000 2.00000000000\nNewECP\nECP_info1\nECP_info2\n" ) -def test_format_ecp_info(): +def test_ORCAInputGenerator_generate_coords_block(orca_input_generator): + orca_input_generator_nocp = deepcopy(orca_input_generator) + + orca_input_generator_nocp.include_cp = False + orca_input_generator_nocp._generate_coords_block() + + orca_input_generator._generate_coords_block() + + reference_block_collated = { + "adsorbate_slab": { + "float": [3.0, 1.0, 5.1757, 1.0, 0.0, 2.0, 1.0], + "string": [ + "%coords", + "coords", + "O", + "s", + "N_core", + "end", + "p", + "lmax", + "Mg>", + "d", + "f", + "NewECP", + "f", + "s", + "N_core", + "end", + "p", + "lmax", + "Mg>", + "d", + "f", + "end", + ], + }, + "adsorbate": {"float": [1.0], "string": ["%coords", "coords", "O:"]}, + "slab": { + "float": [2.0, 1.0, 5.1757, 1.0, 0.0, 2.0, 1.0], + "string": [ + "%coords", + "coords", + "O", + "s", + "N_core", + "end", + "p", + "lmax", + "Mg>", + "d", + "f", + "NewECP", + "f", + "s", + "N_core", + "end", + "p", + "lmax", + "Mg>", + "d", + "f", + "end", + ], + }, + } + + reference_block_nocp_collated = { + "adsorbate_slab": { + "float": [3.0, 1.0, 5.1757, 1.0, 0.0, 2.0, 1.0], + "string": [ + "%coords", + "coords", + "O", + "s", + "N_core", + "end", + "p", + "lmax", + "Mg>", + "d", + "f", + "NewECP", + "f", + "s", + "N_core", + "end", + "p", + "lmax", + "Mg>", + "d", + "f", + "end", + ], + }, + "adsorbate": {"float": [1.0], "string": ["%coords", "coords"]}, + "slab": { + "float": [2.0, 1.115, 2.0, 2.10705287155, 14.676, 1.0, 1.0], + "string": [ + "%coords", + "coords", + "Mg>", + "d", + "f", + "NewECP", + "f", + "s", + "N_core", + "end", + "p", + "lmax", + "Mg>", + "d", + "f", + "NewECP", + "f", + "s", + "N_core", + "end", + "p", + ], + }, + } + + generated_block_collated = { + system: {"float": [], "string": []} + for system in ["adsorbate_slab", "adsorbate", "slab"] + } + generated_block_nocp_collated = { + system: {"float": [], "string": []} + for system in ["adsorbate_slab", "adsorbate", "slab"] + } + + for system in ["adsorbate_slab", "adsorbate", "slab"]: + generated_block_collated[system]["float"] = [ + float(x) + for x in orca_input_generator.orcablocks[system].split() + if x.replace(".", "", 1).replace("-", "", 1).isdigit() + ][::57] + generated_block_collated[system]["string"] = [ + x + for x in orca_input_generator.orcablocks[system].split() + if not x.replace(".", "", 1).replace("-", "", 1).isdigit() + ][::7] + + assert_equal( + reference_block_collated[system]["string"], + generated_block_collated[system]["string"], + ) + assert_allclose( + generated_block_collated[system]["float"], + reference_block_collated[system]["float"], + rtol=1e-05, + atol=1e-07, + ) + + generated_block_nocp_collated[system]["float"] = [ + float(x) + for x in orca_input_generator_nocp.orcablocks[system].split() + if x.replace(".", "", 1).replace("-", "", 1).isdigit() + ][::57] + generated_block_nocp_collated[system]["string"] = [ + x + for x in orca_input_generator_nocp.orcablocks[system].split() + if not x.replace(".", "", 1).replace("-", "", 1).isdigit() + ][::7] + + assert_equal( + reference_block_nocp_collated[system]["string"], + generated_block_nocp_collated[system]["string"], + ) + assert_allclose( + generated_block_nocp_collated[system]["float"], + reference_block_nocp_collated[system]["float"], + rtol=1e-05, + atol=1e-07, + ) + + +def test_ORCAInputGenerator_format_ecp_info(orca_input_generator): with pytest.raises(ValueError): - format_ecp_info(atom_ecp_info="dummy_info\nN_core0\nend") + orca_input_generator._format_ecp_info(atom_ecp_info="dummy_info\nN_core0\nend") atom_ecp_info = """ NewECP @@ -1297,77 +1035,38 @@ def test_format_ecp_info(): 1 1.732000000 14.676000000 2 end """ - formatted_atom_ecp_info = format_ecp_info(atom_ecp_info=atom_ecp_info) + formatted_atom_ecp_info = orca_input_generator._format_ecp_info( + atom_ecp_info=atom_ecp_info + ) assert ( formatted_atom_ecp_info == "NewECP\nN_core 0\nlmax s\ns 1\n1 1.732000000 14.676000000 2\nend\n" ) -def test_generate_orca_input_preamble(embedded_adsorbed_cluster): - # Set-up some information needed for generating orca input - element_info = { - "C": { - "basis": "aug-cc-pVDZ", - "core": 2, - "ri_scf_basis": "def2/J", - "ri_cwft_basis": "aug-cc-pVDZ/C", - }, - "O": { - "basis": "aug-cc-pVDZ", - "core": 2, - "ri_scf_basis": "def2/JK", - "ri_cwft_basis": "aug-cc-pVDZ/C", - }, - "Mg": { - "basis": "cc-pVDZ", - "core": 2, - "ri_scf_basis": "def2/J", - "ri_cwft_basis": "cc-pVDZ/C", - }, - } - - pal_nprocs_block = {"nprocs": 1, "maxcore": 5000} - - method_block = {"Method": "hf", "RI": "on", "RunTyp": "Energy"} - - scf_block = { - "HFTyp": "rhf", - "Guess": "MORead", - "MOInp": '"orca_svp_start.gbw"', - "SCFMode": "Direct", - "sthresh": "1e-6", - "AutoTRAHIter": 60, - "MaxIter": 1000, - } - - # Check whether error raised if not all element_info is not provided - with pytest.raises(ValueError): - element_info_error = {"C": element_info["C"]} - generate_orca_input_preamble( - embedded_cluster=embedded_adsorbed_cluster, - quantum_cluster_indices=[0, 1, 2, 3, 4, 5, 6, 7], - element_info=element_info_error, - pal_nprocs_block=pal_nprocs_block, - method_block=method_block, - scf_block=scf_block, - ) +def test_ORCAInputGenerator_generate_preamble_block(orca_input_generator): + # Make copy of orca_input_generator for further tests + orca_input_generator_1 = deepcopy(orca_input_generator) + orca_input_generator_2 = deepcopy(orca_input_generator) + orca_input_generator_3 = deepcopy(orca_input_generator) # Generate the orca input preamble - preamble_input = generate_orca_input_preamble( - embedded_cluster=embedded_adsorbed_cluster, - quantum_cluster_indices=[0, 1, 2, 3, 4, 5, 6, 7], - element_info=element_info, - pal_nprocs_block=pal_nprocs_block, - method_block=method_block, - scf_block=scf_block, - ) + orca_input_generator_1._generate_preamble_block() assert ( - preamble_input + orca_input_generator_1.orcablocks["adsorbate_slab"] == '%pal nprocs 1 end\n%maxcore 5000 end\n%pointcharges "orca.pc"\n%method\nMethod hf\nRI on\nRunTyp Energy\nNewNCore C 2 end\nNewNCore Mg 2 end\nNewNCore O 2 end\nend\n%basis\nNewGTO C "aug-cc-pVDZ" end\nNewGTO Mg "cc-pVDZ" end\nNewGTO O "aug-cc-pVDZ" end\nNewAuxJGTO C "def2/J" end\nNewAuxJGTO Mg "def2/J" end\nNewAuxJGTO O "def2/JK" end\nNewAuxCGTO C "aug-cc-pVDZ/C" end\nNewAuxCGTO Mg "cc-pVDZ/C" end\nNewAuxCGTO O "aug-cc-pVDZ/C" end\nend\n%scf\nHFTyp rhf\nGuess MORead\nMOInp "orca_svp_start.gbw"\nSCFMode Direct\nsthresh 1e-6\nAutoTRAHIter 60\nMaxIter 1000\nend\n' ) + assert ( + orca_input_generator_1.orcablocks["adsorbate_slab"] + == orca_input_generator_1.orcablocks["adsorbate"] + ) + assert ( + orca_input_generator_1.orcablocks["adsorbate_slab"] + == orca_input_generator_1.orcablocks["slab"] + ) + # Check the case if the element_info has all of the same values element_info = { "C": { @@ -1379,382 +1078,174 @@ def test_generate_orca_input_preamble(embedded_adsorbed_cluster): "O": { "basis": "def2-SVP", "core": 2, - "ri_scf_basis": "def2/J", - "ri_cwft_basis": "def2-SVP/C", - }, - "Mg": { - "basis": "def2-SVP", - "core": 2, - "ri_scf_basis": "def2/J", - "ri_cwft_basis": "def2-SVP/C", - }, - } - - preamble_input = generate_orca_input_preamble( - embedded_cluster=embedded_adsorbed_cluster, - quantum_cluster_indices=[0, 1, 2, 3, 4, 5, 6, 7], - element_info=element_info, - pal_nprocs_block=pal_nprocs_block, - method_block=method_block, - scf_block=scf_block, - ) - - assert ( - preamble_input - == '%pal nprocs 1 end\n%maxcore 5000 end\n%pointcharges "orca.pc"\n%method\nMethod hf\nRI on\nRunTyp Energy\nNewNCore C 2 end\nNewNCore Mg 2 end\nNewNCore O 2 end\nend\n%basis\nBasis def2-SVP\nAux def2/J\nAuxC def2-SVP/C\nend\n%scf\nHFTyp rhf\nGuess MORead\nMOInp "orca_svp_start.gbw"\nSCFMode Direct\nsthresh 1e-6\nAutoTRAHIter 60\nMaxIter 1000\nend\n' - ) - - # Testing the case if we provide no blocks - preamble_input = generate_orca_input_preamble( - embedded_cluster=embedded_adsorbed_cluster, - quantum_cluster_indices=[0, 1, 2, 3, 4, 5, 6, 7], - ) - - assert preamble_input == '%pointcharges "orca.pc"\n' - - -def test_create_orca_point_charge_file(embedded_adsorbed_cluster, tmpdir): - # Test whether exception is raised if indices shared between quantum region and ecp region - with pytest.raises( - ValueError, match="An atom in the quantum cluster is also in the ECP region." - ): - create_orca_point_charge_file( - embedded_cluster=embedded_adsorbed_cluster, - quantum_cluster_indices=[0, 1, 2, 3, 4, 5, 6, 7], - ecp_region_indices=[7, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24], - pc_file=Path(tmpdir, "orca.pc"), - ) - - # Create the point charge file - create_orca_point_charge_file( - embedded_cluster=embedded_adsorbed_cluster, - quantum_cluster_indices=[0, 1, 2, 3, 4, 5, 6, 7], - ecp_region_indices=[8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24], - pc_file=Path(tmpdir, "orca.pc"), - ) - - # Read the written file - orca_pc_file = np.loadtxt(Path(tmpdir, "orca.pc"), skiprows=1) - - # Check that the contents of the file match the reference - assert len(orca_pc_file) == 371 - - assert_allclose( - orca_pc_file[::30], - np.array( - [ - [-2.00000000e00, -2.11070451e00, 2.11070451e00, -2.14923990e00], - [2.00000000e00, 2.11024676e00, -2.11024676e00, -4.26789529e00], - [2.00000000e00, 6.32954443e00, 2.11144262e00, -4.36728442e-02], - [-2.00000000e00, -4.22049353e00, 6.32889566e00, 7.72802266e-03], - [2.00000000e00, -6.33074029e00, -2.11024676e00, -4.26789529e00], - [-2.00000000e00, 4.22049353e00, -6.33074029e00, -4.26789529e00], - [-2.00000000e00, 6.33074029e00, 2.11024676e00, -6.37814205e00], - [-2.00000000e00, 2.11024676e00, -8.44098706e00, -4.26789529e00], - [-2.00000000e00, -8.44098706e00, -6.32080280e00, 5.67209089e-03], - [2.00000000e00, -2.11024676e00, 8.44098706e00, -6.37814205e00], - [8.00000000e-01, -4.64254288e01, 3.79844418e01, -3.99237095e-02], - [3.12302613e00, -0.00000000e00, -5.71441194e01, -2.36698692e01], - [2.10472999e00, -2.36698692e01, 5.71441194e01, 2.59086514e01], - ] - ), - rtol=1e-05, - atol=1e-07, - ) - - -def test_get_cluster_info_from_slab(): - ( - adsorbate, - slab, - slab_first_atom_idx, - center_position, - adsorbate_vector_from_slab, - ) = get_cluster_info_from_slab( - adsorbate_slab_file=Path(FILE_DIR, "skzcam_files", "NO_MgO.poscar.gz"), - slab_center_indices=[32, 33], - adsorbate_indices=[0, 1], - ) - - # Check adsorbate matches reference - assert_allclose( - adsorbate.get_positions(), - np.array( - [ - [5.39130495, 4.07523845, 15.96981134], - [5.88635842, 4.84892196, 16.72270959], - ] - ), - rtol=1e-05, - atol=1e-07, - ) - assert_equal(adsorbate.get_atomic_numbers().tolist(), [7, 8]) - - # Check slab matches reference - assert_allclose( - slab.get_positions()[::10], - np.array( - [ - [0.0, 6.33073849, 7.5], - [0.0, 4.22049233, 9.61024616], - [4.2206809, 6.32743192, 11.73976183], - [4.2019821, 4.21892378, 13.89202884], - [0.0, 2.11024616, 9.61024616], - [4.22049233, 0.0, 7.5], - [4.22098271, 2.10239745, 13.86181098], - ] - ), - rtol=1e-05, - atol=1e-07, - ) - assert_equal( - slab.get_atomic_numbers().tolist(), - [ - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - ], - ) - - # Check first atom index of slab - assert slab_first_atom_idx == 30 - - # Check center_position matches reference - assert_allclose( - center_position, - np.array([1.06307888, -1.06176564, 2.4591779]), - rtol=1e-05, - atol=1e-07, - ) - - # Check vector distance of adsorbate from first center atom (corresponding to first atom index) of slab matches reference - assert_allclose( - adsorbate_vector_from_slab, - np.array([1.18932285, -0.14368533, 2.0777825]), - rtol=1e-05, - atol=1e-07, - ) - - -def test_generate_chemshell_cluster(tmp_path): - from quacc.atoms.skzcam import generate_chemshell_cluster - - # First create the slab - slab = read(Path(FILE_DIR, "skzcam_files", "NO_MgO.poscar.gz"))[2:] - - # Run ChemShell - generate_chemshell_cluster( - slab=slab, - slab_center_idx=30, - atom_oxi_states={"Mg": 2.0, "O": -2.0}, - filepath=tmp_path, - chemsh_radius_active=15.0, - chemsh_radius_cluster=25.0, - write_xyz_file=True, - ) - - # Read the output .xyz file - chemshell_embedded_cluster = read(tmp_path / "ChemShell_cluster.xyz") - - # Check that the positions and atomic numbers match reference - assert_allclose( - chemshell_embedded_cluster.get_positions()[::100], - np.array( - [ - [0.00000000e00, 0.00000000e00, 0.00000000e00], - [-2.09173593e00, -2.10867761e00, -6.39202884e00], - [2.12875640e00, -6.32916994e00, -6.39202884e00], - [-2.09273725e00, 1.05516878e01, -2.16301583e00], - [2.12875640e00, -1.05496623e01, -6.39202884e00], - [6.34924872e00, -1.05496623e01, -6.39202884e00], - [1.05725789e01, -1.05444085e01, -2.15965963e00], - [1.47875715e01, 6.33408913e00, -2.16464681e00], - [6.34924872e00, -1.47701546e01, -6.39202884e00], - [1.69010014e01, 6.33551965e00, -2.15224877e00], - [1.05697410e01, -1.47701546e01, -6.39202884e00], - [1.05637735e01, 1.68825241e01, -2.17052139e00], - [-1.68651820e01, 1.26649992e01, -5.68710477e-02], - [-1.89763671e01, -1.05478802e01, -2.16464681e00], - [1.05697410e01, -1.89906469e01, -6.39202884e00], - [-2.31906127e01, -4.21607826e00, -1.24998430e-02], - [1.47951600e01, 1.89994594e01, -5.11097275e-02], - [-2.31941976e01, -6.32916994e00, -6.39202884e00], - ] - ), - rtol=1e-05, - atol=1e-07, - ) - - assert_equal( - chemshell_embedded_cluster.get_atomic_numbers()[::20].tolist(), - [ - 12, - 12, - 12, - 8, - 12, - 8, - 8, - 12, - 8, - 12, - 8, - 8, - 12, - 8, - 12, - 8, - 8, - 12, - 8, - 12, - 8, - 8, - 12, - 8, - 8, - 8, - 8, - 12, - 8, - 12, - 8, - 8, - 12, - 12, - 12, - 8, - 8, - 12, - 8, - 12, - 8, - 8, - 12, - 8, - 12, - 12, - 8, - 12, - 8, - 12, - 8, - 8, - 12, - 8, - 8, - 12, - 8, - 12, - 8, - 12, - 12, - 8, - 12, - 12, - 8, - 8, - 8, - 12, - 12, - 8, - 8, - 8, - 12, - 8, - 12, - 8, - 8, - 12, - 12, - 8, - 12, - 8, - 12, - 12, - 8, - 8, - 12, - 9, - 9, - 9, - ], + "ri_scf_basis": "def2/J", + "ri_cwft_basis": "def2-SVP/C", + }, + "Mg": { + "basis": "def2-SVP", + "core": 2, + "ri_scf_basis": "def2/J", + "ri_cwft_basis": "def2-SVP/C", + }, + } + orca_input_generator_2.element_info = element_info + orca_input_generator_2._generate_preamble_block() + + assert ( + orca_input_generator_2.orcablocks["adsorbate_slab"] + == '%pal nprocs 1 end\n%maxcore 5000 end\n%pointcharges "orca.pc"\n%method\nMethod hf\nRI on\nRunTyp Energy\nNewNCore C 2 end\nNewNCore Mg 2 end\nNewNCore O 2 end\nend\n%basis\nBasis def2-SVP\nAux def2/J\nAuxC def2-SVP/C\nend\n%scf\nHFTyp rhf\nGuess MORead\nMOInp "orca_svp_start.gbw"\nSCFMode Direct\nsthresh 1e-6\nAutoTRAHIter 60\nMaxIter 1000\nend\n' + ) + + # Testing the case if we provide no blocks + orca_input_generator_3.scf_block = None + orca_input_generator_3.method_block = None + orca_input_generator_3.pal_nprocs_block = None + orca_input_generator_3.element_info = None + orca_input_generator_3._generate_preamble_block() + + assert ( + orca_input_generator_3.orcablocks["adsorbate_slab"] + == '%pointcharges "orca.pc"\n' + ) + + # Check whether error raised if not all element_info is provided + with pytest.raises(ValueError): + element_info_error = {"C": element_info["C"]} + orca_input_generator_3.element_info = element_info_error + orca_input_generator_3._generate_preamble_block() + + +def test_ORCAInputGenerator_create_point_charge_file(orca_input_generator, tmp_path): + # Create the point charge file + orca_input_generator.create_point_charge_file(pc_file=tmp_path / "orca.pc") + + # Read the written file + orca_pc_file = np.loadtxt(tmp_path / "orca.pc", skiprows=1) + + # Check that the contents of the file match the reference + assert len(orca_pc_file) == 371 + + assert_allclose( + orca_pc_file[::30], + np.array( + [ + [-2.00000000e00, -2.11070451e00, 2.11070451e00, -2.14923990e00], + [2.00000000e00, 2.11024676e00, -2.11024676e00, -4.26789529e00], + [2.00000000e00, 6.32954443e00, 2.11144262e00, -4.36728442e-02], + [-2.00000000e00, -4.22049353e00, 6.32889566e00, 7.72802266e-03], + [2.00000000e00, -6.33074029e00, -2.11024676e00, -4.26789529e00], + [-2.00000000e00, 4.22049353e00, -6.33074029e00, -4.26789529e00], + [-2.00000000e00, 6.33074029e00, 2.11024676e00, -6.37814205e00], + [-2.00000000e00, 2.11024676e00, -8.44098706e00, -4.26789529e00], + [-2.00000000e00, -8.44098706e00, -6.32080280e00, 5.67209089e-03], + [2.00000000e00, -2.11024676e00, 8.44098706e00, -6.37814205e00], + [8.00000000e-01, -4.64254288e01, 3.79844418e01, -3.99237095e-02], + [3.12302613e00, -0.00000000e00, -5.71441194e01, -2.36698692e01], + [2.10472999e00, -2.36698692e01, 5.71441194e01, 2.59086514e01], + ] + ), + rtol=1e-05, + atol=1e-07, ) -def test_convert_pun_to_atoms(): - embedded_cluster = convert_pun_to_atoms( - pun_file=Path(FILE_DIR, "skzcam_files", "mgo_shells_cluster.pun.gz"), +def test_CreateSKZCAMClusters_init(): + skzcam_clusters = CreateSKZCAMClusters( + adsorbate_indices=[0, 1], + slab_center_indices=[32], atom_oxi_states={"Mg": 2.0, "O": -2.0}, + adsorbate_slab_file=Path(FILE_DIR, "skzcam_files", "CO_MgO.poscar.gz"), + pun_file="test.pun", + ) + + assert_equal(skzcam_clusters.adsorbate_indices, [0, 1]) + assert skzcam_clusters.slab_center_indices == [32] + assert skzcam_clusters.atom_oxi_states == {"Mg": 2.0, "O": -2.0} + assert skzcam_clusters.adsorbate_slab_file == Path( + FILE_DIR, "skzcam_files", "CO_MgO.poscar.gz" + ) + assert skzcam_clusters.pun_file == "test.pun" + + # Check if error raised if adsorbate_indices and slab_center_indices overlap + with pytest.raises( + ValueError, match="The adsorbate and slab center indices cannot be the same." + ): + skzcam_clusters = CreateSKZCAMClusters( + adsorbate_indices=[0, 1], + slab_center_indices=[0], + atom_oxi_states={"Mg": 2.0, "O": -2.0}, + adsorbate_slab_file=Path(FILE_DIR, "skzcam_files", "CO_MgO.poscar.gz"), + pun_file="test.pun", + ) + + # Check if error raised if both adsorbate_slab_file and pun_file are None + with pytest.raises( + ValueError, match="Either the adsorbate_slab_file or pun_file must be provided." + ): + skzcam_clusters = CreateSKZCAMClusters( + adsorbate_indices=[0, 1], + slab_center_indices=[32], + atom_oxi_states={"Mg": 2.0, "O": -2.0}, + adsorbate_slab_file=None, + pun_file=None, + ) + + +def test_CreateSKZCAMClusters_run_chemshell(skzcam_clusters, tmp_path): + # Test if xyz file doesn't get written when write_xyz_file=False + skzcam_clusters_nowrite = deepcopy(skzcam_clusters) + skzcam_clusters_nowrite.convert_slab_to_atoms() + skzcam_clusters_nowrite.run_chemshell( + filepath=tmp_path / "ChemShell_Cluster.pun", + chemsh_radius_active=5.0, + chemsh_radius_cluster=10.0, + write_xyz_file=False, + ) + assert not os.path.isfile(tmp_path / "ChemShell_Cluster.xyz") + + skzcam_clusters.convert_slab_to_atoms() + skzcam_clusters.run_chemshell( + filepath=tmp_path / "ChemShell_Cluster.pun", + chemsh_radius_active=5.0, + chemsh_radius_cluster=10.0, + write_xyz_file=True, + ) + + # Read the output .xyz file + chemshell_embedded_cluster = read(tmp_path / "ChemShell_Cluster.xyz") + + # Check that the positions and atomic numbers match reference + assert_allclose( + chemshell_embedded_cluster.get_positions()[::100], + np.array( + [ + [0.00000000e00, 0.00000000e00, -7.72802046e-03], + [-2.11024616e00, 2.11024616e00, -6.38586825e00], + [6.33073849e00, -2.11024616e00, -6.38586825e00], + [-1.09499282e01, -4.53560876e00, 4.95687508e00], + ] + ), + rtol=1e-05, + atol=1e-07, + ) + + assert_equal( + chemshell_embedded_cluster.get_atomic_numbers()[::40].tolist(), + [12, 12, 12, 8, 8, 8, 12, 9, 9], + ) + + +def test_CreateSKZCAMClusters_convert_pun_to_atoms(skzcam_clusters): + slab_embedded_cluster = skzcam_clusters._convert_pun_to_atoms( + pun_file=Path(FILE_DIR, "skzcam_files", "ChemShell_Cluster.pun.gz") ) # Check that number of atoms matches our reference - assert len(embedded_cluster) == 390 + assert len(slab_embedded_cluster) == 390 # Check that last 10 elements of the oxi_state match our reference assert_allclose( - embedded_cluster.get_array("oxi_states")[-10:], + slab_embedded_cluster.get_array("oxi_states")[-10:], np.array( [ -0.80812511, @@ -1775,7 +1266,7 @@ def test_convert_pun_to_atoms(): # Check that first 10 elements of atom_type array match our reference assert_equal( - embedded_cluster.get_array("atom_type")[:10], + slab_embedded_cluster.get_array("atom_type")[:10], [ "cation", "anion", @@ -1792,110 +1283,155 @@ def test_convert_pun_to_atoms(): # Check that the positions of the atom matches assert_allclose( - embedded_cluster[200].position, + slab_embedded_cluster[200].position, np.array([6.33074029, -2.11024676, -6.37814205]), rtol=1e-05, atol=1e-07, ) -def test_insert_adsorbate_to_embedded_cluster(embedded_cluster): - # Create a CO molecule - adsorbate = Atoms( - "CO", positions=[[0.0, 0.0, 0.0], [0.0, 0.0, 1.128]], pbc=[False, False, False] +def test_CreateSKZCAMClusters_convert_slab_to_atoms(): + # Test for CO on MgO example + skzcam_clusters = CreateSKZCAMClusters( + adsorbate_indices=[0, 1], + slab_center_indices=[32], + atom_oxi_states={"Mg": 2.0, "O": -2.0}, + adsorbate_slab_file=Path(FILE_DIR, "skzcam_files", "CO_MgO.poscar.gz"), + pun_file=None, ) + skzcam_clusters.convert_slab_to_atoms() - # Insert the CO molecule to the embedded cluster - embedded_cluster, quantum_idx, ecp_idx = insert_adsorbate_to_embedded_cluster( - embedded_cluster=embedded_cluster, - adsorbate=adsorbate, - adsorbate_vector_from_slab=[0.0, 0.0, 2.0], - quantum_cluster_indices=[[0, 1, 3, 4], [5, 6, 7, 8]], - ecp_region_indices=[[0, 1, 3, 4], [5, 6, 7, 8]], + # Check adsorbate matches reference + assert_allclose( + skzcam_clusters.adsorbate.get_positions(), + np.array([[0.0, 0.0, 2.44102236], [0.0, 0.0, 3.58784217]]), + rtol=1e-05, + atol=1e-07, ) + assert_equal(skzcam_clusters.adsorbate.get_atomic_numbers().tolist(), [6, 8]) - # Check that the positions of the first 10 atoms of the embedded cluster matches the reference positions, oxi_states and atom_type + # Check slab matches reference assert_allclose( - embedded_cluster.get_positions()[:10], + skzcam_clusters.slab.get_positions()[::10], np.array( [ - [0.0, 0.0, 2.0], - [0.0, 0.0, 3.128], - [0.0, 0.0, 0.0], - [-2.12018426, 0.0, 0.00567209], - [0.0, 2.12018426, 0.00567209], - [2.12018426, 0.0, 0.00567209], - [0.0, -2.12018426, 0.00567209], - [0.0, 0.0, -2.14129966], - [-2.11144262, 2.11144262, -0.04367284], - [2.11144262, 2.11144262, -0.04367284], + [0.00000000e00, 0.00000000e00, 0.00000000e00], + [-2.11024616e00, 0.00000000e00, -6.37814023e00], + [2.11024616e00, 2.11024616e00, -4.26789407e00], + [2.10705227e00, 0.00000000e00, -2.14155146e00], + [-4.22049233e00, -2.11024616e00, -4.26789407e00], + [0.00000000e00, -4.22049233e00, -6.37814023e00], + [0.00000000e00, -2.12018365e00, 5.67208927e-03], ] ), rtol=1e-05, atol=1e-07, ) - assert_equal( - embedded_cluster.get_chemical_symbols()[:10], - ["C", "O", "Mg", "O", "O", "O", "O", "O", "Mg", "Mg"], + skzcam_clusters.slab.get_atomic_numbers().tolist()[::10], + [12, 12, 12, 12, 8, 8, 8], ) + + # Check center_position matches reference assert_allclose( - embedded_cluster.get_array("oxi_states")[:10], - np.array([0.0, 0.0, 2.0, -2.0, -2.0, -2.0, -2.0, -2.0, 2.0, 2.0]), + skzcam_clusters.center_position, + np.array([0.0, 0.0, 3.09607306]), rtol=1e-05, atol=1e-07, ) - assert_equal( - embedded_cluster.get_array("atom_type")[:10], - [ - "adsorbate", - "adsorbate", - "cation", - "anion", - "anion", - "anion", - "anion", - "anion", - "cation", - "cation", - ], + + # Check vector distance of adsorbate from first center atom (corresponding to first atom index) of slab matches reference + assert_allclose( + skzcam_clusters.adsorbate_vector_from_slab, + np.array([0.0, 0.0, 2.44102236]), + rtol=1e-05, + atol=1e-07, ) - # Check that the quantum_idx and ecp_idx match the reference - assert_equal(quantum_idx, [[0, 1, 2, 3, 5, 6], [0, 1, 7, 8, 9, 10]]) - assert_equal(ecp_idx, [[2, 3, 5, 6], [7, 8, 9, 10]]) + # Test for NO on MgO example + skzcam_clusters = CreateSKZCAMClusters( + adsorbate_indices=[0, 1], + slab_center_indices=[32, 33], + atom_oxi_states={"Mg": 2.0, "O": -2.0}, + adsorbate_slab_file=Path(FILE_DIR, "skzcam_files", "NO_MgO.poscar.gz"), + pun_file=None, + ) + skzcam_clusters.convert_slab_to_atoms() + # Check adsorbate matches reference + assert_allclose( + skzcam_clusters.adsorbate.get_positions(), + np.array( + [[1.18932285, -0.14368533, 2.0777825], [1.68437633, 0.62999818, 2.83068075]] + ), + rtol=1e-05, + atol=1e-07, + ) + assert_equal(skzcam_clusters.adsorbate.get_atomic_numbers().tolist(), [7, 8]) -def test_get_atom_distances(): - # Creating a H2 molecule as an Atoms object - h2_molecule = Atoms("H2", positions=[(0, 0, 0), (0, 0, 2)]) + # Check slab matches reference + assert_allclose( + skzcam_clusters.slab.get_positions()[::10], + np.array( + [ + [0.0, 0.0, 0.0], + [-4.2019821, -2.10867761, -6.39202884], + [0.01851023, -4.21892378, -4.28178268], + [0.01903204, -2.105465, -2.15224877], + [-4.2019821, -2.10867761, -4.28178268], + [0.01851023, -4.21892378, -6.39202884], + [0.01900061, -2.11652633, -0.03021786], + ] + ), + rtol=1e-05, + atol=1e-07, + ) + assert_equal( + skzcam_clusters.slab.get_atomic_numbers().tolist()[::10], + [12, 12, 12, 12, 8, 8, 8], + ) - # Run _get_atom_distances function to get distance of h2 molecule atoms from a center position - atom_distances = _get_atom_distances( - embedded_cluster=h2_molecule, center_position=[2, 0, 0] + # Check center_position matches reference + assert_allclose( + skzcam_clusters.center_position, + np.array([1.06307888, -1.06176564, 2.47922285]), + rtol=1e-05, + atol=1e-07, ) - assert_allclose(atom_distances, np.array([2.0, 2.82842712]), rtol=1e-05, atol=1e-07) + # Check vector distance of adsorbate from first center atom (corresponding to first atom index) of slab matches reference + assert_allclose( + skzcam_clusters.adsorbate_vector_from_slab, + np.array([1.18932285, -0.14368533, 2.0777825]), + rtol=1e-05, + atol=1e-07, + ) -def test_find_cation_shells(embedded_cluster): +def test_CreateSKZCAMClusters_find_cation_shells( + skzcam_clusters, slab_embedded_cluster +): # Get distance of atoms from the center distances = _get_atom_distances( - embedded_cluster=embedded_cluster, center_position=[0, 0, 2] + atoms=slab_embedded_cluster, center_position=[0, 0, 2] ) # Find the cation shells from the distances - cation_shells, cation_shells_idx = _find_cation_shells( - embedded_cluster=embedded_cluster, distances=distances, shell_width=0.005 + cation_shells_distances, cation_shells_idx = skzcam_clusters._find_cation_shells( + slab_embedded_cluster=slab_embedded_cluster, + distances=distances, + shell_width=0.005, ) # As these list of lists do not have the same length, we flatten first 5 lists into a 1D list for comparison - cation_shells_flatten = [item for row in cation_shells[:5] for item in row] + cation_shells_distances_flatten = [ + item for row in cation_shells_distances[:5] for item in row + ] cation_shells_idx_flatten = [item for row in cation_shells_idx[:5] for item in row] # Check that these lists are correct assert_allclose( - cation_shells_flatten, + cation_shells_distances_flatten, np.array( [ 2.0, @@ -1923,10 +1459,12 @@ def test_find_cation_shells(embedded_cluster): ) -def test_get_anion_coordination(embedded_cluster, distance_matrix): +def test_CreateSKZCAMClusters_get_anion_coordination( + skzcam_clusters, slab_embedded_cluster, distance_matrix +): # Get the anions for the second SKZCAM shell - anion_shell_idx = _get_anion_coordination( - embedded_cluster=embedded_cluster, + anion_shell_idx = skzcam_clusters._get_anion_coordination( + slab_embedded_cluster=slab_embedded_cluster, cation_shell_indices=[9, 8, 6, 7], dist_matrix=distance_matrix, ) @@ -1937,11 +1475,13 @@ def test_get_anion_coordination(embedded_cluster, distance_matrix): ) -def test_get_ecp_region(embedded_cluster, distance_matrix): +def test_CreateSKZCAMClusters_get_ecp_region( + skzcam_clusters, slab_embedded_cluster, distance_matrix +): # Find the ECP region for the first cluster - ecp_region_idx = _get_ecp_region( - embedded_cluster=embedded_cluster, - quantum_cluster_indices=[[0, 1, 2, 3, 4, 5]], + ecp_region_idx = skzcam_clusters._get_ecp_region( + slab_embedded_cluster=slab_embedded_cluster, + quantum_cluster_indices_set=[[0, 1, 2, 3, 4, 5]], dist_matrix=distance_matrix, ecp_dist=3, ) @@ -1950,12 +1490,87 @@ def test_get_ecp_region(embedded_cluster, distance_matrix): assert_equal(ecp_region_idx[0], [6, 7, 8, 9, 10, 11, 12, 13, 18, 19, 20, 21, 22]) -def test_create_skzcam_clusters(tmp_path): +def test_CreateSKZCAMClusters_create_adsorbate_slab_embedded_cluster( + skzcam_clusters, slab_embedded_cluster +): + skzcam_clusters.slab_embedded_cluster = slab_embedded_cluster + skzcam_clusters.adsorbate = Atoms( + "CO", positions=[[0.0, 0.0, 0.0], [0.0, 0.0, 1.128]], pbc=[False, False, False] + ) + skzcam_clusters.adsorbate_vector_from_slab = [0.0, 0.0, 2.0] + + skzcam_clusters._create_adsorbate_slab_embedded_cluster( + quantum_cluster_indices_set=[[0, 1, 3, 4], [5, 6, 7, 8]], + ecp_region_indices_set=[[0, 1, 3, 4], [5, 6, 7, 8]], + ) + + # Check that the positions of the first 10 atoms of the embedded cluster matches the reference positions, oxi_states and atom_type + assert_allclose( + skzcam_clusters.adsorbate_slab_embedded_cluster.get_positions()[:10], + np.array( + [ + [0.0, 0.0, 2.0], + [0.0, 0.0, 3.128], + [0.0, 0.0, 0.0], + [-2.12018426, 0.0, 0.00567209], + [0.0, 2.12018426, 0.00567209], + [2.12018426, 0.0, 0.00567209], + [0.0, -2.12018426, 0.00567209], + [0.0, 0.0, -2.14129966], + [-2.11144262, 2.11144262, -0.04367284], + [2.11144262, 2.11144262, -0.04367284], + ] + ), + rtol=1e-05, + atol=1e-07, + ) + + assert_equal( + skzcam_clusters.adsorbate_slab_embedded_cluster.get_chemical_symbols()[:10], + ["C", "O", "Mg", "O", "O", "O", "O", "O", "Mg", "Mg"], + ) + assert_allclose( + skzcam_clusters.adsorbate_slab_embedded_cluster.get_array("oxi_states")[:10], + np.array([0.0, 0.0, 2.0, -2.0, -2.0, -2.0, -2.0, -2.0, 2.0, 2.0]), + rtol=1e-05, + atol=1e-07, + ) + assert_equal( + skzcam_clusters.adsorbate_slab_embedded_cluster.get_array("atom_type")[:10], + [ + "adsorbate", + "adsorbate", + "cation", + "anion", + "anion", + "anion", + "anion", + "anion", + "cation", + "cation", + ], + ) + + # Check that the quantum_idx and ecp_idx match the reference + assert_equal( + skzcam_clusters.quantum_cluster_indices_set, + [[0, 1, 2, 3, 5, 6], [0, 1, 7, 8, 9, 10]], + ) + assert_equal(skzcam_clusters.ecp_region_indices_set, [[2, 3, 5, 6], [7, 8, 9, 10]]) + + +def test_CreateSKZCAMClusters_run_skzcam(skzcam_clusters, tmp_path): # Get quantum cluster and ECP region indices - _, quantum_cluster_idx, ecp_region_idx = create_skzcam_clusters( - pun_file=Path(FILE_DIR, "skzcam_files", "mgo_shells_cluster.pun.gz"), - center_position=[0, 0, 2], - atom_oxi_states={"Mg": 2.0, "O": -2.0}, + skzcam_clusters.center_position = [0, 0, 2] + skzcam_clusters.pun_file = Path( + FILE_DIR, "skzcam_files", "ChemShell_Cluster.pun.gz" + ) + skzcam_clusters.adsorbate = Atoms( + "CO", positions=[[0.0, 0.0, 0.0], [0.0, 0.0, 1.128]], pbc=[False, False, False] + ) + skzcam_clusters.adsorbate_vector_from_slab = [0.0, 0.0, 2.0] + + skzcam_clusters.run_skzcam( shell_max=2, ecp_dist=3.0, shell_width=0.005, @@ -1965,25 +1580,48 @@ def test_create_skzcam_clusters(tmp_path): # Check quantum cluster indices match with reference assert_equal( - quantum_cluster_idx[1], - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 14, 15, 16, 17, 23, 24, 25, 26, 27, 28, 29, 30], + skzcam_clusters.quantum_cluster_indices_set[1], + [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 16, + 17, + 18, + 19, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + ], ) # Check ECP region indices match with reference assert_equal( - ecp_region_idx[1], + skzcam_clusters.ecp_region_indices_set[1], [ - 10, - 11, 12, 13, - 18, - 19, + 14, + 15, 20, 21, 22, - 39, - 40, + 23, + 24, 41, 42, 43, @@ -1998,23 +1636,27 @@ def test_create_skzcam_clusters(tmp_path): 52, 53, 54, - 76, - 77, + 55, + 56, 78, 79, 80, 81, 82, 83, + 84, + 85, ], ) # Read the written output and check that it matches with the reference positions and atomic numbers - skzcam_cluster = read(tmp_path / "SKZCAM_cluster_0.xyz") + skzcam_cluster_xyz = read(tmp_path / "SKZCAM_cluster_0.xyz") assert_allclose( - skzcam_cluster.get_positions(), + skzcam_cluster_xyz.get_positions(), np.array( [ + [0.0, 0.0, 2.0], + [0.0, 0.0, 3.128], [0.0, 0.0, 0.0], [-2.12018426, 0.0, 0.00567209], [0.0, 2.12018426, 0.00567209], @@ -2027,4 +1669,16 @@ def test_create_skzcam_clusters(tmp_path): atol=1e-07, ) - assert_equal(skzcam_cluster.get_atomic_numbers().tolist(), [12, 8, 8, 8, 8, 8]) + assert_equal( + skzcam_cluster_xyz.get_atomic_numbers().tolist(), [6, 8, 12, 8, 8, 8, 8, 8] + ) + + +def test_get_atom_distances(): + # Creating a H2 molecule as an Atoms object + h2_molecule = Atoms("H2", positions=[(0, 0, 0), (0, 0, 2)]) + + # Run _get_atom_distances function to get distance of h2 molecule atoms from a center position + atom_distances = _get_atom_distances(atoms=h2_molecule, center_position=[2, 0, 0]) + + assert_allclose(atom_distances, np.array([2.0, 2.82842712]), rtol=1e-05, atol=1e-07) diff --git a/tests/core/calculators/mrcc/test_mrcc.py b/tests/core/calculators/mrcc/test_mrcc.py index 77da885842..fda354de02 100644 --- a/tests/core/calculators/mrcc/test_mrcc.py +++ b/tests/core/calculators/mrcc/test_mrcc.py @@ -3,7 +3,7 @@ import pytest from ase.atoms import Atoms -from quacc import SETTINGS +from quacc import get_settings from quacc.calculators.mrcc.mrcc import MRCC, MrccProfile, _get_version_from_mrcc_header @@ -20,7 +20,7 @@ def test_mrcc_version_from_string(): def test_mrcc_singlepoint(tmp_path): calc = MRCC( - profile=MrccProfile(command=SETTINGS.MRCC_CMD), + profile=MrccProfile(command=get_settings().MRCC_CMD), mrccinput={"calc": "PBE", "basis": "STO-3G"}, mrccblocks="symm=off", directory=tmp_path,