stfc · alinelena · May 23, 2024 · May 13, 2024 · May 15, 2024 · May 15, 2024
diff --git a/aiida_mlip/calculations/train.py b/aiida_mlip/calculations/train.py
@@ -0,0 +1,170 @@
+"""Class for training machine learning models."""
+
+from pathlib import Path
+
+from aiida.common import InputValidationError, datastructures
+import aiida.common.folders
+from aiida.engine import CalcJob, CalcJobProcessSpec
+import aiida.engine.processes
+from aiida.orm import Dict, FolderData, SinglefileData
+
+from aiida_mlip.data.config import JanusConfigfile
+from aiida_mlip.data.model import ModelData
+
+
+def validate_inputs(
+    inputs: dict, port_namespace: aiida.engine.processes.ports.PortNamespace
+):
+    """
+    Check if the inputs are valid.
+
+    Parameters
+    ----------
+    inputs : dict
+        The inputs dictionary.
+
+    port_namespace : `aiida.engine.processes.ports.PortNamespace`
+        An instance of aiida's `PortNameSpace`.
+
+    Raises
+    ------
+    ValueError
+        Error message if validation fails, None otherwise.
+    """
+    if "mlip_config" in port_namespace:
+        if "mlip_config" not in inputs:
+            raise InputValidationError("No config file given")
+        mlip_dict = inputs.mlip_config.as_dictionary()
+        list_of_keys = ["train_file", "valid_file", "test_file", "name"]
+        for key in list_of_keys:
+            if key not in mlip_dict:
+                raise InputValidationError(f"Mandatory key {key} not in config file")
+            # Check if the keys actually correspond to a path except name which is
+            # just the name to use for the output files
+            if key in mlip_dict and key != "name":
+                if Path(key).exists() is False:
+                    raise InputValidationError(f"Path given for {key} does not exist")
+
+
+class Train(CalcJob):  # numpydoc ignore=PR01
+    """
+    Calcjob implementation to train mlips.
+
+    Attributes
+    ----------
+    DEFAULT_OUTPUT_FILE : str
+        Default stdout file name.
+    DEFAULT_INPUT_FILE : str
+        Default input file name.
+    LOG_FILE : str
+        Default log file name.
+
+    Methods
+    -------
+    define(spec: CalcJobProcessSpec) -> None:
+        Define the process specification, its inputs, outputs and exit codes.
+    validate_inputs(value: dict, port_namespace: PortNamespace) -> Optional[str]:
+        Check if the inputs are valid.
+    prepare_for_submission(folder: Folder) -> CalcInfo:
+        Create the input files for the `CalcJob`.
+    """
+
+    DEFAULT_OUTPUT_FILE = "aiida-stdout.txt"
+    DEFAULT_INPUT_FILE = "aiida.xyz"
+    LOG_FILE = "aiida.log"
+
+    @classmethod
+    def define(cls, spec: CalcJobProcessSpec) -> None:
+        """
+        Define the process specification, its inputs, outputs and exit codes.
+
+        Parameters
+        ----------
+        spec : `aiida.engine.CalcJobProcessSpec`
+            The calculation job process spec to define.
+        """
+        super().define(spec)
+        spec.inputs.validator = validate_inputs
+        # Define inputs
+        spec.input(
+            "mlip_config",
+            valid_type=JanusConfigfile,
+            required=False,
+            help="Mlip architecture to use for calculation, defaults to mace",
+        )
+        spec.output("model", valid_type=ModelData)
+        spec.output("compiled_model", valid_type=SinglefileData)
+        spec.output(
+            "results_dict",
+            valid_type=Dict,
+            help="The `results_dict` output node of the successful calculation.",
+        )
+        spec.output("logs", valid_type=FolderData)
+        spec.output("checkpoints", valid_type=FolderData)
+        spec.default_output_node = "results_dict"
+        # Exit codes
+        spec.exit_code(
+            305,
+            "ERROR_MISSING_OUTPUT_FILES",
+            message="Some output files missing or cannot be read",
+        )
+
+    # pylint: disable=too-many-locals
+    def prepare_for_submission(
+        self, folder: aiida.common.folders.Folder
+    ) -> datastructures.CalcInfo:
+        """
+        Create the input files for the `Calcjob`.
+
+        Parameters
+        ----------
+        folder : aiida.common.folders.Folder
+            An `aiida.common.folders.Folder` to temporarily write files on disk.
+
+        Returns
+        -------
+        aiida.common.datastructures.CalcInfo
+            An instance of `aiida.common.datastructures.CalcInfo`.
+        """
+        cmd_line = {}
+
+        cmd_line["mlip-config"] = "mlip_train.yaml"
+        config_parse = self.inputs.config.get_content()
+        mlip_dict = self.inputs.mlip_config.as_dictionary()
+        # Copy config file content inside the folder where the calculation is run
+        with folder.open("mlip_config.yaml", "w", encoding="utf-8") as configfile:
+            configfile.write(config_parse)
+
+        model_dir = Path(mlip_dict.get("model_dir", "."))
+        model_output = model_dir / f"{mlip_dict['name']}.model"
+        compiled_model_output = model_dir / f"{mlip_dict['name']}_compiled.model"
+
+        codeinfo = datastructures.CodeInfo()
+
+        # Initialize cmdline_params with train command
+        codeinfo.cmdline_params = ["train"]
+
+        for flag, value in cmd_line.items():
+            codeinfo.cmdline_params += [f"--{flag}", str(value)]
+
+        # Node where the code is saved
+        codeinfo.code_uuid = self.inputs.code.uuid
+        # Save name of output as you need it for running the code
+        codeinfo.stdout_name = self.metadata.options.output_filename
+
+        calcinfo = datastructures.CalcInfo()
+        calcinfo.codes_info = [codeinfo]
+        # Save the info about the node where the calc is stored
+        calcinfo.uuid = str(self.uuid)
+        # Retrieve output files
+        calcinfo.retrieve_list = [
+            self.metadata.options.output_filename,
+            self.uuid,
+            mlip_dict["log_dir"],
+            mlip_dict["result_dir"],
+            mlip_dict["checkpoint_dir"],
+            model_output,
+            compiled_model_output,
+        ]
+
+        return calcinfo
diff --git a/aiida_mlip/parsers/train_parser.py b/aiida_mlip/parsers/train_parser.py
@@ -0,0 +1,117 @@
+"""
+Parser for mlip train.
+"""
+
+from pathlib import Path
+
+from ase.io import read
+
+from aiida.engine import ExitCode
+from aiida.orm import Dict, FolderData
+from aiida.orm.nodes.process.process import ProcessNode
+from aiida.parsers.parser import Parser
+
+from aiida_mlip.data.model import ModelData
+from aiida_mlip.helpers.converters import convert_numpy
+
+
+class TrainParser(Parser):
+    """
+    Parser class for parsing output of calculation.
+
+    Parameters
+    ----------
+    node : aiida.orm.nodes.process.process.ProcessNode
+        ProcessNode of calculation.
+
+    Methods
+    -------
+    __init__(node: aiida.orm.nodes.process.process.ProcessNode)
+        Initialize the SPParser instance.
+
+    parse(**kwargs: Any) -> int:
+        Parse outputs, store results in the database.
+
+    Returns
+    -------
+    int
+        An exit code.
+
+    Raises
+    ------
+    exceptions.ParsingError
+        If the ProcessNode being passed was not produced by a singlePointCalculation.
+    """
+
+    def __init__(self, node: ProcessNode):
+        """
+        Check that the ProcessNode being passed was produced by a `Singlepoint`.
+
+        Parameters
+        ----------
+        node : aiida.orm.nodes.process.process.ProcessNode
+            ProcessNode of calculation.
+        """
+        super().__init__(node)
+
+    # disable for now
+    # pylint: disable=too-many-locals
+    def parse(self, **kwargs) -> int:
+        """
+        Parse outputs, store results in the database.
+
+        Parameters
+        ----------
+        **kwargs : Any
+            Any keyword arguments.
+
+        Returns
+        -------
+        int
+            An exit code.
+        """
+        remote_dir = Path(self.node.get_remote_workdir())
+        mlip_dict = self.node.get_option("mlip_config").as_dictionary()
+        log_dir = remote_dir / Path(mlip_dict.get("log_dir", "logs"))
+        checkpoint_dir = remote_dir / Path(
+            mlip_dict.get("checkpoint_dir", "checkpoints")
+        )
+        results_dir = remote_dir / Path(mlip_dict.get("results_dir", "results"))
+        model_dir = remote_dir / Path(mlip_dict.get("model_dir", ""))
+
+        output_filename = self.node.get_option("output_filename")
+        model_output = model_dir / f"{mlip_dict['name']}.model"
+        compiled_model_output = model_dir / f"{mlip_dict['name']}_compiled.model"
+        result_name = results_dir / f"{mlip_dict['name']}_run-2024.txt"
+
+        # Check that folder content is as expected
+        files_retrieved = self.retrieved.list_object_names()
+
+        files_expected = {output_filename}
+        if not files_expected.issubset(files_retrieved):
+            self.logger.error(
+                f"Found files '{files_retrieved}', expected to find '{files_expected}'"
+            )
+            return self.exit_codes.ERROR_MISSING_OUTPUT_FILES
+
+        # Need to change the architecture
+        architecture = "mace_mp"
+        model = ModelData.local_file(model_output, architecture=architecture)
+        compiled_model = ModelData.local_file(
+            compiled_model_output, architecture=architecture
+        )
+        self.out("model", model)
+        self.out("compiled_model", compiled_model)
+
+        content = read(result_name)
+        results = convert_numpy(content.todict())
+        results_node = Dict(results)
+        self.out("results_dict", results_node)
+
+        log_node = FolderData(log_dir)
+        self.out("logs", log_node)
+
+        checkpoint_node = FolderData(checkpoint_dir)
+        self.out("checkpoints", checkpoint_node)
+
+        return ExitCode(0)
diff --git a/pyproject.toml b/pyproject.toml
@@ -33,7 +33,7 @@ voluptuous = "^0.14"
 
 [tool.poetry.group.dev.dependencies]
 coverage = {extras = ["toml"], version = "^7.4.1"}
-janus-core = "^0.2.0b4"
+janus-core = "^v0.4.0b0"
 pgtest = "^1.3.2"
 pytest = "^8.0"
 pytest-cov = "^4.1.0"

diff --git a/tests/calculations/configs/mlip_train.yml b/tests/calculations/configs/mlip_train.yml
@@ -0,0 +1,45 @@
+model: ScaleShiftMACE
+train_file: "../structures/mlip_train.xyz"
+valid_file: "../structures/mlip_valid.xyz"
+test_file: "../structures/mlip_test.xyz"
+loss: 'universal'
+energy_weight: 1
+forces_weight: 10
+stress_weight: 100
+compute_stress: True
+energy_key: 'dft_energy'
+forces_key: 'dft_forces'
+stress_key: 'dft_stress'
+eval_interval: 2
+error_table: PerAtomRMSE
+# main model params
+interaction_first: "RealAgnosticResidualInteractionBlock"
+interaction: "RealAgnosticResidualInteractionBlock"
+num_interactions: 2
+correlation: 3
+max_ell: 3
+r_max: 4.0
+max_L: 0
+num_channels: 16
+num_radial_basis: 6
+MLP_irreps: '16x0e'
+# end model params
+scaling: 'rms_forces_scaling'
+lr: 0.005
+weight_decay: 1e-8
+ema: True
+ema_decay: 0.995
+scheduler_patience: 5
+batch_size: 4
+valid_batch_size: 4
+max_num_epochs: 1
+patience: 50
+amsgrad: True
+default_dtype: float32
+device: cpu
+distributed: False
+seed: 2024
+clip_grad: 100
+keep_checkpoints: False
+keep_isolated_atoms: True
+save_cpu: True
diff --git a/tests/calculations/configs/test.model b/tests/calculations/configs/test.model
diff --git a/tests/calculations/configs/test_compiled.model b/tests/calculations/configs/test_compiled.model