Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

121 add training #123

Merged
merged 18 commits into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 170 additions & 0 deletions aiida_mlip/calculations/train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
"""Class for training machine learning models."""

from pathlib import Path

from aiida.common import InputValidationError, datastructures
import aiida.common.folders
from aiida.engine import CalcJob, CalcJobProcessSpec
import aiida.engine.processes
from aiida.orm import Dict, FolderData, SinglefileData

from aiida_mlip.data.config import JanusConfigfile
from aiida_mlip.data.model import ModelData


def validate_inputs(
inputs: dict, port_namespace: aiida.engine.processes.ports.PortNamespace
):
"""
Check if the inputs are valid.

Parameters
----------
inputs : dict
The inputs dictionary.

port_namespace : `aiida.engine.processes.ports.PortNamespace`
An instance of aiida's `PortNameSpace`.

Raises
------
ValueError
federicazanca marked this conversation as resolved.
Show resolved Hide resolved
Error message if validation fails, None otherwise.
"""
if "mlip_config" in port_namespace:
if "mlip_config" not in inputs:
raise InputValidationError("No config file given")
mlip_dict = inputs.mlip_config.as_dictionary()
list_of_keys = ["train_file", "valid_file", "test_file", "name"]
for key in list_of_keys:
federicazanca marked this conversation as resolved.
Show resolved Hide resolved
if key not in mlip_dict:
raise InputValidationError(f"Mandatory key {key} not in config file")
# Check if the keys actually correspond to a path except name which is
# just the name to use for the output files
if key in mlip_dict and key != "name":
federicazanca marked this conversation as resolved.
Show resolved Hide resolved
if Path(key).exists() is False:
federicazanca marked this conversation as resolved.
Show resolved Hide resolved
raise InputValidationError(f"Path given for {key} does not exist")


class Train(CalcJob): # numpydoc ignore=PR01
"""
Calcjob implementation to train mlips.

Attributes
----------
DEFAULT_OUTPUT_FILE : str
Default stdout file name.
DEFAULT_INPUT_FILE : str
Default input file name.
LOG_FILE : str
Default log file name.

Methods
-------
define(spec: CalcJobProcessSpec) -> None:
Define the process specification, its inputs, outputs and exit codes.
validate_inputs(value: dict, port_namespace: PortNamespace) -> Optional[str]:
Check if the inputs are valid.
prepare_for_submission(folder: Folder) -> CalcInfo:
Create the input files for the `CalcJob`.
"""

DEFAULT_OUTPUT_FILE = "aiida-stdout.txt"
DEFAULT_INPUT_FILE = "aiida.xyz"
LOG_FILE = "aiida.log"

@classmethod
def define(cls, spec: CalcJobProcessSpec) -> None:
"""
Define the process specification, its inputs, outputs and exit codes.

Parameters
----------
spec : `aiida.engine.CalcJobProcessSpec`
The calculation job process spec to define.
"""
super().define(spec)
spec.inputs.validator = validate_inputs
# Define inputs
spec.input(
"mlip_config",
valid_type=JanusConfigfile,
required=False,
help="Mlip architecture to use for calculation, defaults to mace",
federicazanca marked this conversation as resolved.
Show resolved Hide resolved
)
spec.output("model", valid_type=ModelData)
spec.output("compiled_model", valid_type=SinglefileData)
spec.output(
"results_dict",
valid_type=Dict,
help="The `results_dict` output node of the successful calculation.",
federicazanca marked this conversation as resolved.
Show resolved Hide resolved
)
spec.output("logs", valid_type=FolderData)
spec.output("checkpoints", valid_type=FolderData)
spec.default_output_node = "results_dict"
# Exit codes
spec.exit_code(
305,
"ERROR_MISSING_OUTPUT_FILES",
message="Some output files missing or cannot be read",
)

# pylint: disable=too-many-locals
def prepare_for_submission(
self, folder: aiida.common.folders.Folder
) -> datastructures.CalcInfo:
"""
Create the input files for the `Calcjob`.

Parameters
----------
folder : aiida.common.folders.Folder
An `aiida.common.folders.Folder` to temporarily write files on disk.

Returns
-------
aiida.common.datastructures.CalcInfo
An instance of `aiida.common.datastructures.CalcInfo`.
"""
cmd_line = {}

cmd_line["mlip-config"] = "mlip_train.yaml"
config_parse = self.inputs.config.get_content()
mlip_dict = self.inputs.mlip_config.as_dictionary()
# Copy config file content inside the folder where the calculation is run
with folder.open("mlip_config.yaml", "w", encoding="utf-8") as configfile:
configfile.write(config_parse)

model_dir = Path(mlip_dict.get("model_dir", "."))
model_output = model_dir / f"{mlip_dict['name']}.model"
compiled_model_output = model_dir / f"{mlip_dict['name']}_compiled.model"

codeinfo = datastructures.CodeInfo()

# Initialize cmdline_params with train command
codeinfo.cmdline_params = ["train"]

for flag, value in cmd_line.items():
codeinfo.cmdline_params += [f"--{flag}", str(value)]
federicazanca marked this conversation as resolved.
Show resolved Hide resolved

# Node where the code is saved
codeinfo.code_uuid = self.inputs.code.uuid
# Save name of output as you need it for running the code
codeinfo.stdout_name = self.metadata.options.output_filename

calcinfo = datastructures.CalcInfo()
calcinfo.codes_info = [codeinfo]
# Save the info about the node where the calc is stored
calcinfo.uuid = str(self.uuid)
# Retrieve output files
calcinfo.retrieve_list = [
self.metadata.options.output_filename,
self.uuid,
mlip_dict["log_dir"],
mlip_dict["result_dir"],
mlip_dict["checkpoint_dir"],
model_output,
compiled_model_output,
]

return calcinfo
117 changes: 117 additions & 0 deletions aiida_mlip/parsers/train_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
"""
Parser for mlip train.
"""

from pathlib import Path

from ase.io import read

from aiida.engine import ExitCode
from aiida.orm import Dict, FolderData
from aiida.orm.nodes.process.process import ProcessNode
from aiida.parsers.parser import Parser

from aiida_mlip.data.model import ModelData
from aiida_mlip.helpers.converters import convert_numpy


class TrainParser(Parser):
"""
Parser class for parsing output of calculation.

Parameters
----------
node : aiida.orm.nodes.process.process.ProcessNode
ProcessNode of calculation.

Methods
-------
__init__(node: aiida.orm.nodes.process.process.ProcessNode)
Initialize the SPParser instance.

parse(**kwargs: Any) -> int:
Parse outputs, store results in the database.

Returns
-------
int
An exit code.

Raises
------
exceptions.ParsingError
If the ProcessNode being passed was not produced by a singlePointCalculation.
"""

def __init__(self, node: ProcessNode):
"""
Check that the ProcessNode being passed was produced by a `Singlepoint`.

Parameters
----------
node : aiida.orm.nodes.process.process.ProcessNode
ProcessNode of calculation.
"""
super().__init__(node)

# disable for now
federicazanca marked this conversation as resolved.
Show resolved Hide resolved
# pylint: disable=too-many-locals
def parse(self, **kwargs) -> int:
"""
Parse outputs, store results in the database.

Parameters
----------
**kwargs : Any
Any keyword arguments.

Returns
-------
int
An exit code.
"""
remote_dir = Path(self.node.get_remote_workdir())
mlip_dict = self.node.get_option("mlip_config").as_dictionary()
log_dir = remote_dir / Path(mlip_dict.get("log_dir", "logs"))
checkpoint_dir = remote_dir / Path(
mlip_dict.get("checkpoint_dir", "checkpoints")
)
federicazanca marked this conversation as resolved.
Show resolved Hide resolved
results_dir = remote_dir / Path(mlip_dict.get("results_dir", "results"))
model_dir = remote_dir / Path(mlip_dict.get("model_dir", ""))
federicazanca marked this conversation as resolved.
Show resolved Hide resolved

output_filename = self.node.get_option("output_filename")
model_output = model_dir / f"{mlip_dict['name']}.model"
compiled_model_output = model_dir / f"{mlip_dict['name']}_compiled.model"
result_name = results_dir / f"{mlip_dict['name']}_run-2024.txt"
federicazanca marked this conversation as resolved.
Show resolved Hide resolved

# Check that folder content is as expected
files_retrieved = self.retrieved.list_object_names()

files_expected = {output_filename}
if not files_expected.issubset(files_retrieved):
self.logger.error(
f"Found files '{files_retrieved}', expected to find '{files_expected}'"
)
return self.exit_codes.ERROR_MISSING_OUTPUT_FILES

# Need to change the architecture
architecture = "mace_mp"
model = ModelData.local_file(model_output, architecture=architecture)
compiled_model = ModelData.local_file(
compiled_model_output, architecture=architecture
)
self.out("model", model)
self.out("compiled_model", compiled_model)

content = read(result_name)
results = convert_numpy(content.todict())
results_node = Dict(results)
self.out("results_dict", results_node)

log_node = FolderData(log_dir)
self.out("logs", log_node)

checkpoint_node = FolderData(checkpoint_dir)
self.out("checkpoints", checkpoint_node)

return ExitCode(0)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ voluptuous = "^0.14"

[tool.poetry.group.dev.dependencies]
coverage = {extras = ["toml"], version = "^7.4.1"}
janus-core = "^0.2.0b4"
janus-core = "^v0.4.0b0"
pgtest = "^1.3.2"
pytest = "^8.0"
pytest-cov = "^4.1.0"
Expand Down
45 changes: 45 additions & 0 deletions tests/calculations/configs/mlip_train.yml
federicazanca marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
model: ScaleShiftMACE
train_file: "../structures/mlip_train.xyz"
valid_file: "../structures/mlip_valid.xyz"
test_file: "../structures/mlip_test.xyz"
loss: 'universal'
energy_weight: 1
forces_weight: 10
stress_weight: 100
compute_stress: True
energy_key: 'dft_energy'
forces_key: 'dft_forces'
stress_key: 'dft_stress'
eval_interval: 2
error_table: PerAtomRMSE
# main model params
interaction_first: "RealAgnosticResidualInteractionBlock"
interaction: "RealAgnosticResidualInteractionBlock"
num_interactions: 2
correlation: 3
max_ell: 3
r_max: 4.0
max_L: 0
num_channels: 16
num_radial_basis: 6
MLP_irreps: '16x0e'
# end model params
scaling: 'rms_forces_scaling'
lr: 0.005
weight_decay: 1e-8
ema: True
ema_decay: 0.995
scheduler_patience: 5
batch_size: 4
valid_batch_size: 4
max_num_epochs: 1
patience: 50
amsgrad: True
default_dtype: float32
device: cpu
distributed: False
seed: 2024
clip_grad: 100
keep_checkpoints: False
keep_isolated_atoms: True
save_cpu: True
Binary file added tests/calculations/configs/test.model
Binary file not shown.
Binary file added tests/calculations/configs/test_compiled.model
Binary file not shown.
Loading