Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changes for tackling the LevelOfTheory errors #970

Merged
merged 65 commits into from
Mar 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
65 commits
Select commit Hold shift + click to select a range
f91ceb5
Adding qc_tasks and calculation.py
rdguha1995 Aug 11, 2023
72ee7dc
big_commit for implementing the drone fucntionality of atomate(qchem)…
rdguha1995 Aug 18, 2023
f7272fc
Merge branch 'materialsproject:main' into main
rdguha1995 Aug 18, 2023
a721e22
ran pre-commit locally and some minor changes
rdguha1995 Aug 23, 2023
65cae25
Merge branch 'main' of github.com:rdguha1995/emmet
rdguha1995 Aug 23, 2023
64d923f
writing_unit_tests_for_sp_and_opt
rdguha1995 Oct 6, 2023
d294b32
ran pre-commit on test files
rdguha1995 Oct 6, 2023
97b31aa
corrected Union type error
rdguha1995 Oct 6, 2023
c664c4d
added numpy custom validators
rdguha1995 Oct 6, 2023
ade17a3
Merge remote-tracking branch 'upstream/main'
rdguha1995 Oct 6, 2023
df0a857
datetime import problem
rdguha1995 Oct 6, 2023
c18ab41
allowing arbitrary types
rdguha1995 Oct 6, 2023
96e84ff
further tests
rdguha1995 Oct 6, 2023
4548d7c
further tests
rdguha1995 Oct 6, 2023
5246bf0
change in io file convention
rdguha1995 Oct 7, 2023
fc7c3aa
Merge remote-tracking branch 'upstream/main'
rdguha1995 Oct 11, 2023
e29c98a
checking qcinput
rdguha1995 Oct 11, 2023
7d7ee29
checking qcinput
rdguha1995 Oct 11, 2023
e6cc06c
Incorporating all the pydantic 2 changes
rdguha1995 Oct 11, 2023
126bf5a
changes in lot, task_type, calc_type
rdguha1995 Oct 11, 2023
a899b1d
removing circular dependency
rdguha1995 Oct 12, 2023
3cf7691
calc_doc issue
rdguha1995 Oct 12, 2023
829f774
Make more fields optional in accordance with pydantic 2
rdguha1995 Oct 13, 2023
0c6bfbf
Corrected the Input Doc problems
rdguha1995 Oct 13, 2023
3306de5
CalcInput smx attribute issue
rdguha1995 Oct 13, 2023
5535e14
correcting input to qcinput and qcoutput
rdguha1995 Oct 13, 2023
11862f0
changes in the Optimization test doc for inputs
rdguha1995 Oct 13, 2023
1b9742d
molecule -> initial_molecule
rdguha1995 Oct 13, 2023
715a47b
changes to the sp valid task_schema
rdguha1995 Oct 13, 2023
28011d0
test_output breakdowns
rdguha1995 Oct 13, 2023
d9c79d4
test_output breakdowns OutputDoc
rdguha1995 Oct 13, 2023
a194c45
test_output breakdowns OutputDoc
rdguha1995 Oct 14, 2023
bbf9f45
test_output breakdowns OutputDoc
rdguha1995 Oct 14, 2023
626b184
test_output breakdowns OutputDoc
rdguha1995 Oct 14, 2023
4a7d529
test_output breakdowns OutputDoc
rdguha1995 Oct 14, 2023
fe7bfc4
test_output breakdowns OutputDoc
rdguha1995 Oct 14, 2023
9d1f87c
Changes to the TaskDoc
rdguha1995 Oct 19, 2023
98fa420
Changes to the TaskDoc np.array
rdguha1995 Oct 19, 2023
334068b
Changes to the conftest
rdguha1995 Oct 19, 2023
7c12f3e
Changes to the conftest arrays
rdguha1995 Oct 19, 2023
76944ba
Changes to the conftest arrays
rdguha1995 Oct 19, 2023
b4bf19b
Changes to test code
rdguha1995 Oct 19, 2023
b70ce6c
Changes to test code
rdguha1995 Oct 19, 2023
01173ea
Changes to test code
rdguha1995 Oct 19, 2023
9791d37
Changes to test code
rdguha1995 Oct 19, 2023
6a64691
Changes to test code
rdguha1995 Oct 19, 2023
65793d0
Changes to test code
rdguha1995 Oct 19, 2023
60ce203
Changes to test code
rdguha1995 Oct 19, 2023
828d5b0
Changes to test code
rdguha1995 Oct 19, 2023
b26b6fa
Changes to test code
rdguha1995 Oct 19, 2023
5c71bab
Changes to test code
rdguha1995 Oct 19, 2023
8640186
Merge branch 'materialsproject:main' into main
rdguha1995 Dec 11, 2023
3dee8d2
fixing bug where solvent field was being accessed as a dict
rdguha1995 Dec 13, 2023
4759da9
forgot pre-commit
rdguha1995 Dec 13, 2023
1028eab
Changed the default args for initial_molecule and optimized_molecule …
rdguha1995 Dec 13, 2023
e68bd20
deleted the superfluous FW files
rdguha1995 Dec 14, 2023
da6656f
making the TaskDoc.from_directory functionality for generalized to ha…
rdguha1995 Dec 21, 2023
ce1e962
Merge branch 'main' into main
rdguha1995 Jan 9, 2024
60aacd3
resolved the bugs with enthalpy, entropy and parsing frequencies
rdguha1995 Feb 6, 2024
8f92dcb
Merge branch 'main' into main
rdguha1995 Feb 6, 2024
c9cc045
Merge remote-tracking branch 'upstream/main'
rdguha1995 Feb 6, 2024
72e3a56
added the validate_lot flag to allow users flexibility in TaskDoc cre…
rdguha1995 Mar 19, 2024
1941f43
corrected the str errors in level_of_theory
rdguha1995 Mar 19, 2024
83ea3fb
Merge branch 'materialsproject:main' into dev_branch
rdguha1995 Mar 19, 2024
04233f2
corrected the downstream errors introduced due to the validate_lot flag
rdguha1995 Mar 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions emmet-core/emmet/core/qc_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,17 +186,26 @@ def from_qchem_calc_doc(cls, calc_doc: Calculation) -> "InputDoc":
InputDoc
A summary of the input molecule and corresponding calculation parameters
"""
try:
lot_val = calc_doc.level_of_theory.value
except AttributeError:
lot_val = calc_doc.level_of_theory

try:
ct_val = calc_doc.calc_type.value
except AttributeError:
ct_val = calc_doc.calc_type
# TODO : modify this to get the different variables from the task doc.
return cls(
initial_molecule=calc_doc.input.initial_molecule,
rem=calc_doc.input.rem,
level_of_theory=calc_doc.level_of_theory.value,
level_of_theory=lot_val,
task_type=calc_doc.task_type.value,
tags=calc_doc.input.tags,
solvation_lot_info=calc_doc.solvation_lot_info,
# special_run_type = calc_doc.input.special_run_type,
# smiles = calc_doc.input.smiles,
calc_type=calc_doc.calc_type.value,
calc_type=ct_val,
)


Expand Down Expand Up @@ -281,6 +290,7 @@ class TaskDoc(MoleculeMetadata):
def from_directory(
cls: Type[_T],
dir_name: Union[Path, str],
validate_lot: bool = True,
store_additional_json: bool = True,
additional_fields: Dict[str, Any] = None,
**qchem_calculation_kwargs,
Expand All @@ -292,6 +302,9 @@ def from_directory(
----------
dir_name
The path to the folder containing the calculation outputs.
validate_lot
Flag for matching the basis and functional with the list of functionals consistent with MPCules.
Defaults to True. Change to False if you want to create a TaskDoc with other basis sets and functionals.
store_additional_json
Whether to store additional json files in the calculation directory.
additional_fields
Expand Down Expand Up @@ -322,7 +335,11 @@ def from_directory(
continue
else:
calc_doc = Calculation.from_qchem_files(
dir_name, task_name, **files, **qchem_calculation_kwargs
dir_name,
task_name,
**files,
**qchem_calculation_kwargs,
validate_lot=validate_lot,
)
calcs_reversed.append(calc_doc)
# all_qchem_objects.append(qchem_objects)
Expand Down
99 changes: 68 additions & 31 deletions emmet-core/emmet/core/qchem/calculation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import Any, Dict, List, Optional, Union

import numpy as np
import warnings
from pydantic import field_validator, BaseModel, Field, ConfigDict
from datetime import datetime
from pymatgen.io.qchem.inputs import QCInput
Expand Down Expand Up @@ -316,7 +317,7 @@ class Calculation(BaseModel):
None,
description="Paths (relative to dir_name) of the QChem output files associated with this calculation",
)
level_of_theory: LevelOfTheory = Field(
level_of_theory: Union[LevelOfTheory, str] = Field(
None,
description="Levels of theory used for the QChem calculation: For instance, B97-D/6-31g*",
)
Expand All @@ -328,7 +329,7 @@ class Calculation(BaseModel):
None,
description="Calculation task type like Single Point, Geometry Optimization. Frequency...",
)
calc_type: CalcType = Field(
calc_type: Union[CalcType, str] = Field(
None,
description="Combination dict of LOT + TaskType: B97-D/6-31g*/VACUUM Geometry Optimization",
)
Expand All @@ -340,6 +341,7 @@ def from_qchem_files(
task_name: str,
qcinput_file: Union[Path, str],
qcoutput_file: Union[Path, str],
validate_lot: bool = True,
store_energy_trajectory: bool = False,
qcinput_kwargs: Optional[Dict] = None,
qcoutput_kwargs: Optional[Dict] = None,
Expand Down Expand Up @@ -410,10 +412,10 @@ def from_qchem_files(
else {k2: Path(v2) for k2, v2 in v.items()}
for k, v in output_file_paths.items()
},
level_of_theory=level_of_theory(input_doc),
solvation_lot_info=lot_solvent_string(input_doc),
level_of_theory=level_of_theory(input_doc, validate_lot=validate_lot),
solvation_lot_info=lot_solvent_string(input_doc, validate_lot=validate_lot),
task_type=task_type(input_doc),
calc_type=calc_type(input_doc),
calc_type=calc_type(input_doc, validate_lot=validate_lot),
)


Expand Down Expand Up @@ -501,7 +503,9 @@ def _find_qchem_files(
return task_files


def level_of_theory(parameters: CalculationInput) -> LevelOfTheory:
def level_of_theory(
parameters: CalculationInput, validate_lot: bool = True
) -> LevelOfTheory:
"""

Returns the level of theory for a calculation,
Expand Down Expand Up @@ -532,19 +536,8 @@ def level_of_theory(parameters: CalculationInput) -> LevelOfTheory:

basis_lower = basis_raw.lower()

functional = [f for f in FUNCTIONALS if f.lower() == funct_lower]
if not functional:
raise ValueError(f"Unexpected functional {funct_lower}!")

functional = functional[0]

basis = [b for b in BASIS_SETS if b.lower() == basis_lower]
if not basis:
raise ValueError(f"Unexpected basis set {basis_lower}!")

basis = basis[0]

solvent_method = parameters.rem.get("solvent_method", "").lower()

if solvent_method == "":
solvation = "VACUUM"
elif solvent_method in ["pcm", "cosmo"]:
Expand All @@ -560,12 +553,44 @@ def level_of_theory(parameters: CalculationInput) -> LevelOfTheory:
else:
raise ValueError(f"Unexpected implicit solvent method {solvent_method}!")

lot = f"{functional}/{basis}/{solvation}"
if validate_lot:
functional = [f for f in FUNCTIONALS if f.lower() == funct_lower]
if not functional:
raise ValueError(f"Unexpected functional {funct_lower}!")

return LevelOfTheory(lot)
functional = functional[0]

basis = [b for b in BASIS_SETS if b.lower() == basis_lower]
if not basis:
raise ValueError(f"Unexpected basis set {basis_lower}!")

basis = basis[0]

lot = f"{functional}/{basis}/{solvation}"

return LevelOfTheory(lot)
else:
warnings.warn(
"User has turned the validate flag off."
"This can have downstream effects if the chosen functional and basis "
"is not in the available sets of MP employed functionals and the user"
"wants to include the TaskDoc in the MP infrastructure."
"Users should ignore this warning if their objective is just to create TaskDocs",
UserWarning,
stacklevel=2,
)
functional = funct_lower
basis = basis_lower
lot = f"{functional}/{basis}/{solvation}"

def solvent(parameters: CalculationInput, custom_smd: Optional[str] = None) -> str:
return lot


def solvent(
parameters: CalculationInput,
validate_lot: bool = True,
custom_smd: Optional[str] = None,
) -> str:
"""
Returns the solvent used for this calculation.

Expand All @@ -574,9 +599,11 @@ def solvent(parameters: CalculationInput, custom_smd: Optional[str] = None) -> s
custom_smd: (Optional) string representing SMD parameters for a
non-standard solvent
"""

lot = level_of_theory(parameters)
solvation = lot.value.split("/")[-1]
lot = level_of_theory(parameters, validate_lot=validate_lot)
if validate_lot:
solvation = lot.value.split("/")[-1]
else:
solvation = lot.split("/")[-1]

if solvation == "PCM":
# dielectric = float(parameters.get("solvent", {}).get("dielectric", 78.39))
Expand Down Expand Up @@ -631,7 +658,9 @@ def solvent(parameters: CalculationInput, custom_smd: Optional[str] = None) -> s


def lot_solvent_string(
parameters: CalculationInput, custom_smd: Optional[str] = None
parameters: CalculationInput,
validate_lot: bool = True,
custom_smd: Optional[str] = None,
) -> str:
"""
Returns a string representation of the level of theory and solvent used for this calculation.
Expand All @@ -641,9 +670,11 @@ def lot_solvent_string(
custom_smd: (Optional) string representing SMD parameters for a
non-standard solvent
"""

lot = level_of_theory(parameters).value
solv = solvent(parameters, custom_smd=custom_smd)
if validate_lot:
lot = level_of_theory(parameters, validate_lot=validate_lot).value
else:
lot = level_of_theory(parameters, validate_lot=validate_lot)
solv = solvent(parameters, custom_smd=custom_smd, validate_lot=validate_lot)
return f"{lot}({solv})"


Expand All @@ -670,14 +701,20 @@ def task_type(


def calc_type(
parameters: CalculationInput, special_run_type: Optional[str] = None
parameters: CalculationInput,
validate_lot: bool = True,
special_run_type: Optional[str] = None,
) -> CalcType:
"""
Determines the calc type

Args:
parameters: CalculationInput parameters
"""
rt = level_of_theory(parameters).value
tt = task_type(parameters, special_run_type=special_run_type).value
return CalcType(f"{rt} {tt}")
if validate_lot:
rt = level_of_theory(parameters, validate_lot=validate_lot).value
return CalcType(f"{rt} {tt}")
else:
rt = level_of_theory(parameters, validate_lot=validate_lot)
return str(f"{rt} {tt}")
4 changes: 2 additions & 2 deletions emmet-core/tests/conftest_qchem.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ class SinglePointTest(SchemaTestData):
"level_of_theory": "wB97M-V/def2-QZVPPD/SMD",
"task_type": "Single Point",
"calc_type": "wB97M-V/def2-QZVPPD/SMD Single Point",
"solvation_lot_nfo": "wB97M-V/def2-QZVPPD/SMD(SOLVENT=WATER)",
"solvation_lot_info": "wB97M-V/def2-QZVPPD/SMD(SOLVENT=WATER)",
},
"output": {
"mulliken": [np.array([-0.713178, 0.357278, 0.3559])],
Expand Down Expand Up @@ -301,7 +301,7 @@ class OptimizationTest(SchemaTestData):
"level_of_theory": "wB97M-V/def2-SVPD/SMD",
"task_type": "Geometry Optimization",
"calc_type": "wB97M-V/def2-SVPD/SMD Geometry Optimization",
"solvation_lot_nfo": "wB97M-V/def2-SVPD/SMD(SOLVENT=WATER)",
"solvation_lot_info": "wB97M-V/def2-SVPD/SMD(SOLVENT=WATER)",
},
"output": {
"initial_molecule": {
Expand Down
30 changes: 30 additions & 0 deletions emmet-core/tests/test_qc_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,33 @@ def test_task_doc(test_dir, object_name):
# Test that additional_fields works
test_doc = TaskDoc.from_directory(dir_name, additional_fields={"foo": "bar"})
assert test_doc.model_dump()["additional_fields"] == {"foo": "bar"}


@pytest.mark.parametrize(
"object_name",
[
pytest.param("SinglePointTest", id="SinglePointTest"),
pytest.param("OptimizationTest", id="OptimizationTest"),
],
)
def test_task_doc_val_flag(test_dir, object_name):
from monty.json import MontyDecoder, jsanitize
from emmet.core.qc_tasks import TaskDoc

test_object = get_test_object(object_name)
dir_name = test_dir / "qchem" / test_object.folder
print(f"The test object is {test_object.task_doc}")
test_doc = TaskDoc.from_directory(dir_name, validate_lot=False)
assert_schemas_equal(test_doc, test_object.task_doc)

# test document can be jsanitized
d = jsanitize(test_doc, strict=True, enum_values=True, allow_bson=True)

# and decoded
MontyDecoder().process_decoded(d)

# Test that additional_fields works
test_doc = TaskDoc.from_directory(
dir_name, validate_lot=False, additional_fields={"foo": "bar"}
)
assert test_doc.model_dump()["additional_fields"] == {"foo": "bar"}
Loading