Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Assyst stability #159

Merged
merged 10 commits into from
Oct 17, 2024
36 changes: 25 additions & 11 deletions pyiron_potentialfit/assyst/calculations/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,17 +118,25 @@ def run_container(pr: Project, cont: "StructureContainer", config: CalculationCo

def if_new(train):
if config.min_dist is not None:
if isinstance(config.min_dist, float):
dfilter = DistanceFilter(
{el: config.min_dist / 2 for el in cont._container.get_elements()}
)
elif isinstance(config.min_dist, dict):
dfilter = DistanceFilter(config.min_dist)
else:
assert False, f"min_dist cannot by of type {type(config.min_dist)}: {config.min_dist}!"
match config.min_dist:
case float():
dfilter = DistanceFilter(
{
el: config.min_dist / 2
for el in cont._container.get_elements()
}
)
case dict():
dfilter = DistanceFilter(config.min_dist)
case _:
assert (
False
), f"min_dist cannot by of type {type(config.min_dist)}: {config.min_dist}!"
filtered_cont = cont._container.sample(
lambda f, i: dfilter(f.get_structure(i))
)
else:
dfilter = DistanceFilter()
filtered_cont = cont._container.sample(lambda f, i: dfilter(f.get_structure(i)))
filtered_cont = cont._container.copy()

if train.input.read_only:
train.input.unlock()
Expand Down Expand Up @@ -176,7 +184,7 @@ def run(
config: CalculationConfig,
*containers: "StructureContainer",
tries: int = 10,
wait: float = 60
wait: float = 60,
):
"""
Run high quality DFT on all structures in `containers`.
Expand Down Expand Up @@ -256,6 +264,7 @@ def combine(
containers: Iterable[TrainingContainer],
name="Everything",
min_dist=None,
reference_energies=None,
force_cap=None,
energy_cap=None,
check_duplicates=True,
Expand All @@ -269,7 +278,10 @@ def combine(
containers (iterable of TrainingContainer): containers to combine
min_dist (float or dict of str to float, optional): if given, filter structures that are have atoms than this;
if a dict it specifies the minimal allowed radii of each element
reference_energies (dict of str to float): energies of the isolated atoms; if present will be subtracted from
the raw energies present in all containers; do not pass when input energies are already corrected
force_cap (float): filter structures that have atomic forces larger than this value
energy_cap (float): filter structures that have larger energy (uncorrected) than this value
check_duplicates (bool): discard duplicated structures; some care has been taken to optimize this, but it can be
costly for large datasets
delete_existing_job (bool): combine containers again, even if `pr[name]` exists already
Expand Down Expand Up @@ -331,6 +343,8 @@ def larger_than_min_dist(a):
every.save()
every.status.finished = True
every = deduplicate(every, replace=True)
if reference_energies is not None:
every.subtract_reference(reference_energies)
every.input.save_neighbors = True
every.input.num_neighbors = 150
every.server.queue = "cmti"
Expand Down
1 change: 1 addition & 0 deletions pyiron_potentialfit/assyst/projectflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,7 @@ def _run(self, delete_existing_job=False, delete_aborted_job=True):
for i, structure in tqdm(
enumerate(self.input.structures.iter_structures()),
total=len(self.input.structures),
desc="Submitting structures",
):

def modify(job):
Expand Down
20 changes: 18 additions & 2 deletions pyiron_potentialfit/assyst/structures/minimize.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from dataclasses import asdict
from logging import getLogger

import numpy as np

from ..util import ServerConfig
from ..vasp import VaspConfig
from ..projectflow import (
Expand Down Expand Up @@ -67,10 +69,12 @@ def _run(self, delete_existing_job=False, delete_aborted_job=True):
vasp = VaspFactory()
# AlH specific hack, VaspFactory ignores this for other structures automatically
vasp.enable_nband_hack({"Al": 2, "H": 2}) # = 3/2 + 1/2 VASP default
vasp_config.configure_vasp_job(vasp)
server_config.configure_server_on_job(vasp)

if self.input.degrees_of_freedom == "volume":
ediffg = vasp_config.incar.get("EDIFFG", 10 * vasp_config.incar["EDIFF"])
if ediffg < 0:
# user tries to set force tolerance which won't work for volume minimization!
del vasp_config.incar["EDIFFG"]
vasp.minimize_volume()
elif self.input.degrees_of_freedom == "all":
vasp.minimize_all()
Expand All @@ -81,6 +85,8 @@ def _run(self, delete_existing_job=False, delete_aborted_job=True):
False
), f"DoF cannot be {self.input.degrees_of_freedom}, traitlets broken?"

server_config.configure_server_on_job(vasp)
vasp_config.configure_vasp_job(vasp)
sflow.input.job = vasp
if vasp_config.magmoms is not None and len(vasp_config.magmoms) > 0:

Expand Down Expand Up @@ -182,7 +188,10 @@ def minimize(
minf = MinimizeVaspFlow(pr, f"{cont.name}{n}")

vasp.incar.setdefault("ISYM", 0)
vasp.incar.setdefault("IBRION", 2)
vasp.incar.setdefault("POTIM", 0.1)
vasp.incar.setdefault("EDIFF", 1e-6)

if server.queue is None:
server.queue = "cmti"

Expand All @@ -194,6 +203,13 @@ def if_new(flow):
flow.input.vasp_config = asdict(vasp)
flow.input.degrees_of_freedom = degrees_of_freedom
flow.input.server_config = asdict(server)
# tricky: I kind of do not want to filter here
# if a dict it's a dict of atomic radii, MinimizeVaspInput can only
# understand scalars for now, so take twice the smallest radius
# if isinstance(min_dist, dict):
# flow.input.min_dist = 2 * min(min_dist.values())
# else:
# flow.input.min_dist = min_dist
flow.run(delete_existing_job=workflow.delete_existing_job)
raise RunAgain("Just starting!")

Expand Down
17 changes: 14 additions & 3 deletions pyiron_potentialfit/assyst/structures/spg.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pyiron_atomistics.atomistics.structure.atoms import Atoms
from pyiron_atomistics.atomistics.structure.structurestorage import StructureStorage

from ase.data import atomic_numbers
from tqdm.auto import tqdm
from pyxtal import pyxtal
from pyxtal.msg import Comp_CompatibilityError, VolumeError
Expand Down Expand Up @@ -152,8 +153,16 @@ def spg(
if isinstance(min_dist, float):
tm = Tol_matrix.from_single_value(min_dist)
elif isinstance(min_dist, dict):
tm = Tol_matrix(*( (atomic_numbers[e1], atomic_numbers[e2], min_dist[e1] + min_dist[e2])
for e1, e2 in product(min_dist, repeat=2) ))
tm = Tol_matrix(
*(
(
atomic_numbers[e1],
atomic_numbers[e2],
min_dist[e1] + min_dist[e2],
)
for e1, e2 in product(min_dist, repeat=2)
)
)
else:
assert False, f"min_dist cannot by of type {type(min_dist)}: {min_dist}!"
else:
Expand Down Expand Up @@ -191,7 +200,9 @@ def check_cell_shape(structure):
elif isinstance(min_dist, dict):
distance_filter = DistanceFilter(min_dist)
else:
assert False, f"min_dist cannot by of type {type(min_dist)}: {min_dist}!"
assert (
False
), f"min_dist cannot by of type {type(min_dist)}: {min_dist}!"
el, ni = zip(*((el, ni) for el, ni in zip(elements, num_ions) if ni > 0))
# missing checker support
# pr.create.structure.pyxtal(
Expand Down
6 changes: 4 additions & 2 deletions pyiron_potentialfit/assyst/structures/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,13 @@ def __post_init__(self):
def get_distance_filter(self):
match self.min_dist:
case float():
return DistanceFilter({el: self.min_dist/2 for el in self.elements})
return DistanceFilter({el: self.min_dist / 2 for el in self.elements})
case dict():
return DistanceFilter(self.min_dist)
case _:
assert False, f"min_dist cannot by of type {type(self.min_dist)}: {self.min_dist}!"
assert (
False
), f"min_dist cannot by of type {type(self.min_dist)}: {self.min_dist}!"


def create_structure_set(
Expand Down
3 changes: 3 additions & 0 deletions pyiron_potentialfit/assyst/vasp.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class VaspConfig:
encut: Optional[float] = None
# if float interpreted as k mesh spacing
kmesh: Optional[Union[int, float, KMeshSpec]] = None
empty_states: Optional[float] = None
incar: dict = field(default_factory=dict)

# pyiron executable version name
Expand Down Expand Up @@ -74,6 +75,8 @@ def configure_vasp_job(self, job):
and "LORBIT" not in self.incar
):
self.incar["LORBIT"] = 10
if self.empty_states is not None:
job.set_empty_states(self.empty_states)
for k, v in self.incar.items():
try:
job.input.incar[k] = v
Expand Down
40 changes: 40 additions & 0 deletions pyiron_potentialfit/atomistics/job/trainingcontainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"""

from typing import Callable, Dict, Any, Optional
from collections import Counter

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -294,6 +295,22 @@ def iter(self, *arrays, wrap_atoms=True):
"""
yield from self._container.iter(*arrays, wrap_atoms=wrap_atoms)

def subtract_reference(self, reference_energies: dict[str, float]):
"""
Subtract Atomic energies from structure energies.

Must be called before the job is run!
The original energy is kept in a new array "energy_uncorrected".

Args:
reference_energies (dict): a dictionary from element symbols to the energy of their isolated atoms.
"""
if not self.status.initialized:
raise ValueError(
f"Must be called before job is run, not in state: '{self.status}'!"
)
self._container.subtract_reference(reference_energies)


class TrainingPlots(StructurePlots):
"""
Expand Down Expand Up @@ -670,3 +687,26 @@ def plot(self):
if self._plots is None:
self._plots = TrainingPlots(self)
return self._plots

def subtract_reference(self, reference_energies: dict[str, float]):
"""
Subtract Atomic energies from structure energies.

The original energy is kept in a new array "energy_uncorrected".

Args:
reference_energies (dict): a dictionary from element symbols to the energy of their isolated atoms.
"""
elements = self.get_elements()
if not set(reference_energies).issuperset(elements):
raise ValueError(
f"Must specify reference energies for all present elements: {elements}!"
)
counts = list(map(Counter, self.get_array_ragged("symbols")))
bias = np.array(
[sum(reference_energies[e] * c[e] for e in elements) for c in counts]
)
self._per_chunk_arrays["energy_uncorrected"] = self._per_chunk_arrays[
"energy"
].copy()
self._per_chunk_arrays["energy"][: self.num_chunks] -= bias
Loading