Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ASE <---> pymatgen trajectory conversion #4253

Merged
merged 14 commits into from
Jan 15, 2025
Merged
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
201 changes: 178 additions & 23 deletions src/pymatgen/core/trajectory.py
Original file line number Diff line number Diff line change
@@ -8,23 +8,30 @@
import warnings
from fnmatch import fnmatch
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import TYPE_CHECKING, TypeAlias, cast

import numpy as np
from monty.io import zopen
from monty.json import MSONable

from pymatgen.core.structure import Composition, DummySpecies, Element, Lattice, Molecule, Species, Structure
from pymatgen.io.ase import NO_ASE_ERR, AseAtomsAdaptor

if NO_ASE_ERR is None:
from ase.io.trajectory import Trajectory as AseTrajectory
else:
AseTrajectory = None


if TYPE_CHECKING:
from collections.abc import Iterator
from collections.abc import Iterator, Sequence
from typing import Any

from typing_extensions import Self

from pymatgen.util.typing import Matrix3D, PathLike, SitePropsType, Vector3D


__author__ = "Eric Sivonxay, Shyam Dwaraknath, Mingjian Wen, Evan Spotte-Smith"
__version__ = "0.1"
__date__ = "Jun 29, 2022"
@@ -563,8 +570,6 @@ def from_file(cls, filename: str | Path, constant_lattice: bool = True, **kwargs
Trajectory: containing the structures or molecules in the file.
"""
filename = str(Path(filename).expanduser().resolve())
is_mol = False
molecules = []
structures = []

if fnmatch(filename, "*XDATCAR*"):
@@ -578,31 +583,24 @@ def from_file(cls, filename: str | Path, constant_lattice: bool = True, **kwargs
structures = Vasprun(filename).structures

elif fnmatch(filename, "*.traj"):
try:
from ase.io.trajectory import Trajectory as AseTrajectory

from pymatgen.io.ase import AseAtomsAdaptor

ase_traj = AseTrajectory(filename)
# Periodic boundary conditions should be the same for all frames so just check the first
pbc = ase_traj[0].pbc
if NO_ASE_ERR is None:
return cls.ase_to_pmg_trajectory(
filename,
constant_lattice=constant_lattice,
store_frame_properties=True,
additional_fields=None,
)
raise ImportError("ASE is required to read .traj files. pip install ase")

if any(pbc):
structures = [AseAtomsAdaptor.get_structure(atoms) for atoms in ase_traj]
else:
molecules = [AseAtomsAdaptor.get_molecule(atoms) for atoms in ase_traj]
is_mol = True
elif fnmatch(filename, "*.json*"):
from monty.serialization import loadfn

except ImportError as exc:
raise ImportError("ASE is required to read .traj files. pip install ase") from exc
return loadfn(filename, **kwargs)

else:
supported_file_types = ("XDATCAR", "vasprun.xml", "*.traj")
supported_file_types = ("XDATCAR", "vasprun.xml", "*.traj", ".json")
raise ValueError(f"Expect file to be one of {supported_file_types}; got {filename}.")

if is_mol:
return cls.from_molecules(molecules, **kwargs)

return cls.from_structures(structures, constant_lattice=constant_lattice, **kwargs)

@staticmethod
@@ -734,3 +732,160 @@ def _get_site_props(self, frames: ValidIndex) -> SitePropsType | None:
return [self.site_properties[idx] for idx in frames]
raise ValueError("Unexpected frames type.")
raise ValueError("Unexpected site_properties type.")

def to_ase_trajectory(self, **kwargs) -> AseTrajectory:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest we just call it to_ase. Given this class is already called Trajectory, I don't think it is a stretch to imagine to_ase meaning the ASE format for trajectory.

"""
Convert to an ASE trajectory.

Args:
**kwargs : kwargs to pass to `pmg_to_ase_trajectory`

Returns:
ASE Trajectory
"""
if NO_ASE_ERR is None:
return self.pmg_to_ase_trajectory(self, **kwargs)
raise ImportError("ASE is required to write .traj files. pip install ase")

@classmethod
def ase_to_pmg_trajectory(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be just called from_ase.

cls,
trajectory: str | Path | AseTrajectory,
constant_lattice: bool | None = None,
store_frame_properties: bool = True,
property_map: dict[str, str] | None = None,
lattice_match_tol: float = 1.0e-6,
additional_fields: Sequence[str] | None = ["temperature", "velocities"],
) -> Trajectory:
"""
Convert an ASE trajectory to a pymatgen trajectory.

Args:
trajectory (str, .Path, or ASE .Trajectory) : the ASE trajectory, or a file path to it if a str or .Path
constant_lattice (bool or None) : if a bool, whether the lattice is constant in the .Trajectory.
If `None`, this is determined on the fly.
store_frame_properties (bool) : Whether to store pymatgen .Trajectory `frame_properties` as
ASE calculator properties. Defaults to True
property_map (dict[str,str]) : A mapping between ASE calculator properties and
pymatgen .Trajectory `frame_properties` keys. Ex.:
property_map = {"energy": "e_0_energy"}
would map `e_0_energy` in the pymatgen .Trajectory `frame_properties`
to ASE's `get_potential_energy` function.
See `ase.calculators.calculator.all_properties` for a list of acceptable calculator properties.
lattice_match_tol (float = 1.0e-6) : tolerance to which lattices are matched if
`constant_lattice = None`.
additional_fields (Sequence of str, defaults to ["temperature", "velocities"]) :
Optional other fields to save in the pymatgen .Trajectory.
Valid options are "temperature" and "velocities".

Returns:
pymatgen .Trajectory
"""
if isinstance(trajectory, str | Path):
trajectory = AseTrajectory(trajectory, "r")

property_map = property_map or {
"energy": "energy",
"forces": "forces",
"stress": "stress",
}
additional_fields = additional_fields or []

adaptor = AseAtomsAdaptor()

structures = []
frame_properties = []
converter = adaptor.get_structure if (is_pbc := any(trajectory[0].pbc)) else adaptor.get_molecule

for atoms in trajectory:
site_properties = {}
if "velocities" in additional_fields:
site_properties["velocities"] = atoms.get_velocities()

structures.append(converter(atoms, site_properties=site_properties))

if store_frame_properties and atoms.calc:
props = {v: atoms.calc.get_property(k) for k, v in property_map.items()}
if "temperature" in additional_fields:
props["temperature"] = atoms.get_temperature()

frame_properties.append(props)

if constant_lattice is None:
constant_lattice = all(
np.all(np.abs(ref_struct.lattice.matrix - structures[j].lattice.matrix)) < lattice_match_tol
for i, ref_struct in enumerate(structures)
for j in range(i + 1, len(structures))
)

if is_pbc:
return cls.from_structures(structures, constant_lattice=constant_lattice, frame_properties=frame_properties)
return cls.from_molecules(
structures,
constant_lattice=constant_lattice,
frame_properties=frame_properties,
)

@staticmethod
def pmg_to_ase_trajectory(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need a separate method from to_ase?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @shyuep - the intent was to have two methods that don't depend on the Trajectory class being instantiated to use as general converters. Similar design as AseAtomsAdaptor(get_atoms and get_structure are both staticmethods)

I'm OK changing this if you'd rather just have the to_ase method

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need a method that is independent of Trajectory.

trajectory: Trajectory,
property_map: dict[str, str] | None = None,
ase_traj_file: str | Path | None = None,
) -> AseTrajectory:
"""
Convert a pymatgen .Trajectory to an ASE .Trajectory.

Args:
trajectory (pymatgen .Trajectory) : trajectory to convert
property_map (dict[str,str]) : A mapping between ASE calculator properties and
pymatgen .Trajectory `frame_properties` keys. Ex.:
property_map = {"energy": "e_0_energy"}
would map `e_0_energy` in the pymatgen .Trajectory `frame_properties`
to ASE's `get_potential_energy` function.
See `ase.calculators.calculator.all_properties` for a list of acceptable calculator properties.
ase_traj_file (str, Path, or None (default) ) : If not None, the name of
the file to write the ASE trajectory to.

Returns:
ase .Trajectory
"""
from ase.calculators.calculator import all_properties
from ase.calculators.singlepoint import SinglePointCalculator

property_map = property_map or {
"energy": "energy",
"forces": "forces",
"stress": "stress",
}

if (unrecognized_props := set(property_map).difference(set(all_properties))) != set():
raise ValueError(f"Unrecognized ASE calculator properties:\n{', '.join(unrecognized_props)}")

adaptor = AseAtomsAdaptor()

temp_file = None
if ase_traj_file is None:
temp_file = NamedTemporaryFile(delete=False) # noqa: SIM115
ase_traj_file = temp_file.name

frame_props = trajectory.frame_properties or [{} for _ in range(len(trajectory))]
for idx, structure in enumerate(trajectory):
atoms = adaptor.get_atoms(structure, msonable=False, velocities=structure.site_properties.get("velocities"))

props: dict[str, Any] = {k: frame_props[idx][v] for k, v in property_map.items() if v in frame_props[idx]}

# Ensure that `charges` and `magmoms` are not lost from AseAtomsAdaptor
for k in ("charges", "magmoms"):
if k in atoms.calc.implemented_properties or k in atoms.calc.results:
props[k] = atoms.calc.get_property(k)

atoms.calc = SinglePointCalculator(atoms=atoms, **props)

with AseTrajectory(ase_traj_file, "a" if idx > 0 else "w", atoms=atoms) as _traj_file:
_traj_file.write()

ase_traj = AseTrajectory(ase_traj_file, "r")
if temp_file is not None:
temp_file.close()

return ase_traj
27 changes: 27 additions & 0 deletions tests/core/test_trajectory.py
Original file line number Diff line number Diff line change
@@ -488,6 +488,12 @@ def test_from_file(self):

# Check composition of the first frame of the trajectory
assert traj[0].formula == "Li2 Mn2 O4"

# Check that ASE calculator properties are converted to frame properties
assert all(
all(frame.get(k) is not None for k in ("energy", "forces", "stress")) for frame in traj.frame_properties
)

except ImportError:
with pytest.raises(
ImportError,
@@ -546,3 +552,24 @@ def test_incorrect_dims(self):
Trajectory(species=species, coords=unphysical_coords, lattice=const_lattice)
with pytest.raises(ValueError, match="coords must have 3 dimensions!"):
Trajectory(species=species, coords=wrong_dim_coords, lattice=const_lattice)

def test_to_ase_traj(self):
traj = Trajectory.from_file(f"{TEST_DIR}/LiMnO2_chgnet_relax.json.gz")

try:
ase_traj = traj.to_ase_trajectory()

assert len(ase_traj) == len(traj)

# Ensure all frame properties and the magmoms are populated correctly
assert all(
all(atoms.calc.get_property(k) is not None for k in ("energy", "forces", "stress", "magmoms"))
for atoms in ase_traj
)

except ImportError:
with pytest.raises(
ImportError,
match="ASE is required to write .traj files. pip install ase",
):
ase_traj = traj.to_ase_trajectory()
Binary file not shown.