Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new POTCAR validation to VASP validation plus tests #892

Merged
merged 10 commits into from
Nov 16, 2023
2 changes: 1 addition & 1 deletion emmet-builders/emmet/builders/vasp/task_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def __init__(
potcar = PotcarSingle.from_symbol_and_functional(
symbol=potcar_symbol, functional=functional
)
hashes[calc_type][potcar_symbol] = potcar.md5_header_hash
hashes[calc_type][potcar_symbol] = potcar._summary_stats

self.potcar_hashes = potcar_hashes
else:
Expand Down
2 changes: 1 addition & 1 deletion emmet-core/emmet/core/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ class CustodianDoc(BaseModel):
)
job: Optional[Any] = Field(
None,
title="Cusotodian Job Data",
title="Custodian Job Data",
description="Job data logged by custodian.",
)

Expand Down
47 changes: 33 additions & 14 deletions emmet-core/emmet/core/vasp/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def from_task_doc(
valid_input_set = None

if valid_input_set:
# Checking POTCAR hashes if a directory is supplied
# Checking POTCAR summary_stats if a directory is supplied
if potcar_hashes:
if _potcar_hash_check(task_doc, potcar_hashes):
if task_type in [
Expand Down Expand Up @@ -313,28 +313,47 @@ def _kspacing_warnings(input_set, inputs, data, warnings, kspacing_tolerance):

def _potcar_hash_check(task_doc, potcar_hashes):
"""
Checks to make sure the POTCAR hash is equal to the correct value from the
pymatgen input set.
Checks to make sure the POTCAR summary stats is equal to the correct
value from the pymatgen input set.
"""
data_tol = 1.0e-6

try:
potcar_details = task_doc.calcs_reversed[0]["input"]["potcar_spec"]

all_match = True
except KeyError:
# Assume it is an old calculation without potcar_spec data and treat it as passing POTCAR hash check
return False

for entry in potcar_details:
symbol = entry["titel"].split(" ")[1]
hash = potcar_hashes[str(task_doc.calc_type)].get(symbol, None)
all_match = True
for entry in potcar_details:
symbol = entry["titel"].split(" ")[1]
ref_summ_stats = potcar_hashes[str(task_doc.calc_type)].get(symbol, None)
if not ref_summ_stats:
all_match = False
break

key_match = all(
set(ref_summ_stats["keywords"][key])
== set(entry["summary_stats"]["keywords"][key])
for key in ["header", "data"]
)

if not hash or hash != entry["hash"]:
all_match = False
break
data_match = all(
abs(
ref_summ_stats["stats"][key][stat]
- entry["summary_stats"]["stats"][key][stat]
)
< data_tol
for stat in ["MEAN", "ABSMEAN", "VAR", "MIN", "MAX"]
for key in ["header", "data"]
)

return not all_match
if (not key_match) or (not data_match):
all_match = False
break

except KeyError:
# Assume it is an old calculation without potcar_spec data and treat it as passing POTCAR hash check
return False
return not all_match


def _magmom_check(task_doc, chemsys):
Expand Down
27 changes: 27 additions & 0 deletions emmet-core/tests/test_calculation.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,30 @@ def test_calculation(test_dir, object_name, task_name):

# and decoded
MontyDecoder().process_decoded(d)


def test_PotcarSpec(test_dir):
from emmet.core.vasp.calculation import PotcarSpec
from pymatgen.io.vasp import PotcarSingle, Potcar

try:
# First test, PotcarSingle object
potcar = PotcarSingle.from_symbol_and_functional(symbol="Si", functional="PBE")
ps_spec = PotcarSpec.from_potcar_single(potcar_single=potcar)

assert ps_spec.titel == potcar.symbol
assert ps_spec.hash == potcar.md5_header_hash
assert ps_spec.summary_stats == potcar._summary_stats

# Second test, Potcar object containing mulitple PotcarSingle obejcts
potcars = Potcar(symbols=["Ga_d", "As"], functional="PBE")
ps_spec = PotcarSpec.from_potcar(potcar=potcars)

for ips, ps in enumerate(ps_spec):
assert ps.titel == potcars[ips].symbol
assert ps.hash == potcars[ips].md5_header_hash
assert ps.summary_stats == potcars[ips]._summary_stats

except (OSError, ValueError):
# missing Pymatgen POTCARs, cannot perform test
assert True
2 changes: 1 addition & 1 deletion emmet-core/tests/test_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
("calculator", "prop_kwargs"),
[
(get_universal_calculator("chgnet"), None),
("m3gnet", {"ElasticityCalc": {"relax_structure": False}}),
("M3GNet-MP-2021.2.8-PES", {"ElasticityCalc": {"relax_structure": False}}),
],
)
def test_ml_doc(calculator: Union[str, "Calculator"], prop_kwargs: dict) -> None:
Expand Down
56 changes: 55 additions & 1 deletion emmet-core/tests/vasp/test_vasp.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from emmet.core.vasp.calc_types import RunType, TaskType, run_type, task_type
from emmet.core.vasp.task_valid import TaskDocument
from emmet.core.vasp.validation import ValidationDoc
from emmet.core.vasp.validation import ValidationDoc, _potcar_hash_check


def test_task_type():
Expand Down Expand Up @@ -85,3 +85,57 @@ def test_ldau_validation(test_dir):
valid = ValidationDoc.from_task_doc(task)

assert valid.valid


def test_potcar_hash_check(test_dir):
from pymatgen.io.vasp import PotcarSingle

with zopen(test_dir / "CoF_TaskDoc.json") as f:
data = json.load(f)

"""
NB: seems like TaskDoc is not fully compatible with TaskDocument
excluding all keys but `last_updated` ensures TaskDocument can be built

Similarly, after a TaskDoc is dumped to a file, using
json.dump(
jsanitize(
< Task Doc >.model_dump()
),
< filename > )
I cannot rebuild the TaskDoc without excluding the `orig_inputs` key.
"""
task_doc = TaskDocument(**{key: data[key] for key in data if key != "last_updated"})

# First check: generate hashes from POTCARs in TaskDoc, check should pass
calc_type = str(task_doc.calc_type)
expected_hashes = {calc_type: {}}
try:
for spec in task_doc.calcs_reversed[0]["input"]["potcar_spec"]:
symbol = spec["titel"].split(" ")[1]
expected_hashes[calc_type][
symbol
] = PotcarSingle.from_symbol_and_functional(
symbol=symbol, functional="PBE"
)._summary_stats

assert not _potcar_hash_check(task_doc, expected_hashes)

# Second check: remove POTCAR from expected_hashes, check should fail

missing_hashes = {calc_type: {**expected_hashes[calc_type]}}
first_element = list(missing_hashes[calc_type])[0]
missing_hashes[calc_type].pop(first_element)
assert _potcar_hash_check(task_doc, missing_hashes)

# Third check: change data in expected hashes, check should fail

wrong_hashes = {calc_type: {**expected_hashes[calc_type]}}
for key in wrong_hashes[calc_type][first_element]["stats"]["data"]:
wrong_hashes[calc_type][first_element]["stats"]["data"][key] *= 1.1

assert _potcar_hash_check(task_doc, wrong_hashes)

except (OSError, ValueError):
# missing Pymatgen POTCARs, cannot perform test
assert True
1 change: 1 addition & 0 deletions test_files/CoF_TaskDoc.json

Large diffs are not rendered by default.

Loading