diff --git a/aiida_mlip/calculations/base.py b/aiida_mlip/calculations/base.py index 0284aea3..a7131254 100644 --- a/aiida_mlip/calculations/base.py +++ b/aiida_mlip/calculations/base.py @@ -1,5 +1,7 @@ """Base class for features common to most calculations.""" +import shutil + from ase.io import read, write from aiida.common import InputValidationError, datastructures @@ -63,10 +65,13 @@ def validate_inputs( if ( "arch" in inputs and "model" in inputs - and inputs["arch"].value is not inputs["model"].architecture + and inputs["arch"].value != inputs["model"].architecture ): + inputvalue = inputs["arch"].value + modelvalue = inputs["model"].architecture raise InputValidationError( - "'arch' in ModelData and in 'arch' input must be the same" + "'arch' in ModelData and in inputs must be the same, " + f"but they are {modelvalue} and {inputvalue}" ) @@ -191,7 +196,7 @@ def prepare_for_submission( Parameters ---------- folder : aiida.common.folders.Folder - An `aiida.common.folders.Folder` to temporarily write files on disk. + Folder where the calculation is run. Returns ------- @@ -199,8 +204,6 @@ def prepare_for_submission( An instance of `aiida.common.datastructures.CalcInfo`. """ - # Create needed inputs - if "struct" in self.inputs: structure = self.inputs.struct elif "config" in self.inputs and "struct" in self.inputs.config.as_dictionary: @@ -211,8 +214,8 @@ def prepare_for_submission( # Transform the structure data in xyz file called input_filename input_filename = self.inputs.metadata.options.input_filename atoms = structure.get_ase() - # with folder.open(input_filename, mode="w", encoding='utf8') as file: - write(folder.abspath + "/" + input_filename, images=atoms) + with folder.open(input_filename, mode="w", encoding="utf8") as file: + write(file.name, images=atoms) log_filename = (self.inputs.log_filename).value cmd_line = { @@ -231,7 +234,7 @@ def prepare_for_submission( # Define architecture from model if model is given, # otherwise get architecture from inputs and download default model self._add_arch_to_cmdline(cmd_line) - self._add_model_to_cmdline(cmd_line) + self._add_model_to_cmdline(cmd_line, folder) if "config" in self.inputs: # Add config file to command line @@ -290,8 +293,7 @@ def _add_arch_to_cmdline(self, cmd_line: dict) -> dict: cmd_line["arch"] = architecture def _add_model_to_cmdline( - self, - cmd_line: dict, + self, cmd_line: dict, folder: aiida.common.folders.Folder ) -> dict: """ Find model in inputs or config file and add to command line if needed. @@ -301,6 +303,9 @@ def _add_model_to_cmdline( cmd_line : dict Dictionary containing the cmd line keys. + folder : ~aiida.common.folders.Folder + Folder where the calculation is run. + Returns ------- dict @@ -311,6 +316,12 @@ def _add_model_to_cmdline( # Raise error if model is None (different than model not given as input) if self.inputs.model is None: raise ValueError("Model cannot be None") - model_path = self.inputs.model.filepath - if model_path: + + with ( + self.inputs.model.open(mode="rb") as source, + folder.open("mlff.model", mode="wb") as target, + ): + shutil.copyfileobj(source, target) + + model_path = "mlff.model" cmd_line.setdefault("calc-kwargs", {})["model"] = model_path diff --git a/aiida_mlip/calculations/geomopt.py b/aiida_mlip/calculations/geomopt.py index 462395f6..3fec149c 100644 --- a/aiida_mlip/calculations/geomopt.py +++ b/aiida_mlip/calculations/geomopt.py @@ -100,7 +100,7 @@ def prepare_for_submission( Parameters ---------- folder : aiida.common.folders.Folder - An `aiida.common.folders.Folder` to temporarily write files on disk. + Folder where the calculation is run. Returns ------- diff --git a/aiida_mlip/calculations/md.py b/aiida_mlip/calculations/md.py index 7bef085c..d998707a 100644 --- a/aiida_mlip/calculations/md.py +++ b/aiida_mlip/calculations/md.py @@ -83,7 +83,7 @@ def prepare_for_submission( Parameters ---------- folder : aiida.common.folders.Folder - An `aiida.common.folders.Folder` to temporarily write files on disk. + Folder where the calculation is run. Returns ------- diff --git a/aiida_mlip/calculations/singlepoint.py b/aiida_mlip/calculations/singlepoint.py index b4656ed2..9519c370 100644 --- a/aiida_mlip/calculations/singlepoint.py +++ b/aiida_mlip/calculations/singlepoint.py @@ -82,7 +82,7 @@ def prepare_for_submission( Parameters ---------- folder : aiida.common.folders.Folder - An `aiida.common.folders.Folder` to temporarily write files on disk. + Folder where the calculation is run. Returns ------- diff --git a/aiida_mlip/calculations/train.py b/aiida_mlip/calculations/train.py index a43aafd3..bad60ad5 100644 --- a/aiida_mlip/calculations/train.py +++ b/aiida_mlip/calculations/train.py @@ -1,6 +1,7 @@ """Class for training machine learning models.""" from pathlib import Path +import shutil from aiida.common import InputValidationError, datastructures import aiida.common.folders @@ -154,7 +155,7 @@ def prepare_for_submission( Parameters ---------- folder : aiida.common.folders.Folder - An `aiida.common.folders.Folder` to temporarily write files on disk. + Folder where the calculation is run. Returns ------- @@ -175,9 +176,10 @@ def prepare_for_submission( # Add foundation_model to the config file if fine-tuning is enabled if self.inputs.fine_tune and "foundation_model" in self.inputs: - model_data = self.inputs.foundation_model - foundation_model_path = model_data.filepath - config_parse += f"\nfoundation_model: {foundation_model_path}" + with self.inputs.foundation_model.open(mode="rb") as source: + with folder.open("mlff.model", mode="wb") as target: + shutil.copyfileobj(source, target) + config_parse += "foundation_model: mlff.model" # Copy config file content inside the folder where the calculation is run config_copy = "mlip_train.yml" diff --git a/aiida_mlip/data/model.py b/aiida_mlip/data/model.py index 8fa8fb95..398d408d 100644 --- a/aiida_mlip/data/model.py +++ b/aiida_mlip/data/model.py @@ -4,9 +4,8 @@ from pathlib import Path from typing import Any, Optional, Union from urllib import request -from urllib.parse import urlparse -from aiida.orm import SinglefileData +from aiida.orm import QueryBuilder, SinglefileData, load_node class ModelData(SinglefileData): @@ -26,17 +25,17 @@ class ModelData(SinglefileData): ---------- architecture : str Architecture of the mlip model. - filepath : str - Path of the mlip model. + model_hash : str + Hash of the model. Methods ------- set_file(file, filename=None, architecture=None, **kwargs) Set the file for the node. - local_file(file, architecture, filename=None): + from_local(file, architecture, filename=None): Create a ModelData instance from a local file. - download(url, architecture, filename=None, cache_dir=None, force_download=False) - Download a file from a URL and save it as ModelData. + from_uri(uri, architecture, filename=None, cache_dir=None, keep_file=False) + Download a file from a URI and save it as ModelData. Other Parameters ---------------- @@ -69,47 +68,6 @@ def _calculate_hash(file: Union[str, Path]) -> str: file_hash = sha256.hexdigest() return file_hash - @classmethod - def _check_existing_file(cls, file: Union[str, Path]) -> Path: - """ - Check if a file already exists and return the path of the existing file. - - Parameters - ---------- - file : Union[str, Path] - Path to the downloaded model file. - - Returns - ------- - Path - The path of the model file of interest (same as input path if no duplicates - were found). - """ - file_hash = cls._calculate_hash(file) - - def is_diff_file(curr_path: Path) -> bool: - """ - Filter to check if two files are different. - - Parameters - ---------- - curr_path : Path - Path to the file to compare with. - - Returns - ------- - bool - True if the files are different, False otherwise. - """ - return curr_path.is_file() and not curr_path.samefile(file) - - file_folder = Path(file).parent - for existing_file in filter(is_diff_file, file_folder.rglob("*")): - if cls._calculate_hash(existing_file) == file_hash: - file.unlink() - return existing_file - return Path(file) - def __init__( self, file: Union[str, Path], @@ -136,7 +94,6 @@ def __init__( """ super().__init__(file, filename, **kwargs) self.base.attributes.set("architecture", architecture) - self.base.attributes.set("filepath", str(file)) def set_file( self, @@ -164,10 +121,12 @@ def set_file( """ super().set_file(file, filename, **kwargs) self.base.attributes.set("architecture", architecture) - self.base.attributes.set("filepath", str(file)) + # here compute hash and set attribute + model_hash = self._calculate_hash(file) + self.base.attributes.set("model_hash", model_hash) @classmethod - def local_file( + def from_local( cls, file: Union[str, Path], architecture: str, @@ -195,31 +154,31 @@ def local_file( @classmethod # pylint: disable=too-many-arguments - def download( + def from_uri( cls, - url: str, + uri: str, architecture: str, - filename: Optional[str] = None, + filename: Optional[str] = "tmp_file.model", cache_dir: Optional[Union[str, Path]] = None, - force_download: Optional[bool] = False, + keep_file: Optional[bool] = False, ): """ - Download a file from a URL and save it as ModelData. + Download a file from a URI and save it as ModelData. Parameters ---------- - url : str - URL of the file to download. + uri : str + URI of the file to download. architecture : [str] Architecture of the mlip model. filename : Optional[str], optional - Name to be used for the file (defaults to the name of provided file). + Name to be used for the file defaults to tmp_file.model. cache_dir : Optional[Union[str, Path]], optional Path to the folder where the file has to be saved (defaults to "~/.cache/mlips/"). - force_download : Optional[bool], optional - True to keep the downloaded model even if there are duplicates - (default: False). + keep_file : Optional[bool], optional + True to keep the downloaded model, even if there are duplicates. + (default: False, the file is deleted and only saved in the database). Returns ------- @@ -231,32 +190,39 @@ def download( ) arch_dir = (cache_dir / architecture) if architecture else cache_dir - # cache_path = cache_dir.resolve() arch_path = arch_dir.resolve() arch_path.mkdir(parents=True, exist_ok=True) - model_name = urlparse(url).path.split("/")[-1] + file = arch_path / filename - file = arch_path / filename if filename else arch_path / model_name + # Download file + request.urlretrieve(uri, file) - # If file already exists, use next indexed name - stem = file.stem - i = 1 - while file.exists(): - i += 1 - file = file.with_stem(f"{stem}_{i}") + model = cls.from_local(file=file, architecture=architecture) - # Download file - request.urlretrieve(url, file) + if keep_file: + return model - if force_download: - print(f"filename changed to {file}") - return cls.local_file(file=file, architecture=architecture) + file.unlink(missing_ok=True) + + # Check if the same model was used previously + qb = QueryBuilder() + qb.append( + ModelData, + filters={ + "attributes.model_hash": model.model_hash, + "attributes.architecture": model.architecture, + "ctime": {"!in": [model.ctime]}, + }, + project=["attributes", "pk", "ctime"], + ) - # Check if the hash of the just downloaded file matches any other file - filepath = cls._check_existing_file(file) + if qb.count() != 0: + model = load_node( + qb.first()[1] + ) # This gets the pk of the first model in the query - return cls.local_file(file=filepath, architecture=architecture) + return model @property def architecture(self) -> str: @@ -271,13 +237,13 @@ def architecture(self) -> str: return self.base.attributes.get("architecture") @property - def filepath(self) -> str: + def model_hash(self) -> str: """ - Return the filepath. + Return hash of the architecture. Returns ------- str - Path of the mlip model. + Hash of the MLIP model. """ - return self.base.attributes.get("filepath") + return self.base.attributes.get("model_hash") diff --git a/aiida_mlip/helpers/help_load.py b/aiida_mlip/helpers/help_load.py index 937c906f..f998425b 100644 --- a/aiida_mlip/helpers/help_load.py +++ b/aiida_mlip/helpers/help_load.py @@ -20,17 +20,17 @@ def load_model( cache_dir: Optional[Union[str, Path]] = None, ) -> ModelData: """ - Load a model from a file path or URL. + Load a model from a file path or URI. If the string represents a file path, the model will be loaded from that path. - If it's a URL, the model will be downloaded from the specified location. + If it's a URI, the model will be downloaded from the specified location. If the input model is None it returns a default model corresponding to the default used in the Calcjobs. Parameters ---------- model : Optional[Union[str, Path]] - Model file path or a URL for downloading the model or None to use the default. + Model file path or a URI for downloading the model or None to use the default. architecture : str The architecture of the model. cache_dir : Optional[Union[str, Path]] @@ -42,15 +42,15 @@ def load_model( The loaded model. """ if model is None: - loaded_model = ModelData.download( + loaded_model = ModelData.from_uri( "https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", # pylint: disable=line-too-long architecture, cache_dir=cache_dir, ) elif (file_path := Path(model)).is_file(): - loaded_model = ModelData.local_file(file_path, architecture=architecture) + loaded_model = ModelData.from_local(file_path, architecture=architecture) else: - loaded_model = ModelData.download( + loaded_model = ModelData.from_uri( model, architecture=architecture, cache_dir=cache_dir ) return loaded_model diff --git a/aiida_mlip/parsers/train_parser.py b/aiida_mlip/parsers/train_parser.py index d12d2662..bc22baa2 100644 --- a/aiida_mlip/parsers/train_parser.py +++ b/aiida_mlip/parsers/train_parser.py @@ -164,8 +164,8 @@ def _save_models(self, model_output: Path, compiled_model_output: Path) -> None: Path to the compiled model output file. """ architecture = "mace_mp" - model = ModelData.local_file(model_output, architecture=architecture) - compiled_model = ModelData.local_file( + model = ModelData.from_local(model_output, architecture=architecture) + compiled_model = ModelData.from_local( compiled_model_output, architecture=architecture ) diff --git a/docs/source/user_guide/data.rst b/docs/source/user_guide/data.rst index 4f9437ac..3b7c63d2 100644 --- a/docs/source/user_guide/data.rst +++ b/docs/source/user_guide/data.rst @@ -4,7 +4,7 @@ Data types ModelData --------- -Defines a custom data type called `ModelData` in AiiDA, which is a subclass of the `SinglefileData` type. `ModelData` is used to handle model files and provides functionalities for handling local files and downloading files from URLs. +Defines a custom data type called `ModelData` in AiiDA, which is a subclass of the `SinglefileData` type. `ModelData` is used to handle model files and provides functionalities for handling local files and downloading files from URIs. Additional features compared to `SinglefileData`: - It can take a relative path as an argument @@ -12,7 +12,7 @@ Additional features compared to `SinglefileData`: - It takes the argument "architecture" which is specifically related to the mlip model and it is added to the node attributes. - Download functionality: - - When provided with a URL, `ModelData` automatically downloads the file. + - When provided with a URI, `ModelData` automatically downloads the file. - Saves the downloaded file in a specified folder (default: `./cache/mlips`), creating a subfolder if the architecture, and stores it as an AiiDA data type. - Handles duplicate files: if the file is downloaded twice, duplicates within the same folder are canceled, unless `force_download=True` is stated. @@ -23,13 +23,13 @@ Usage .. code-block:: python - model = ModelData.local_file('/path/to/file', filename='model', architecture='mace') + model = ModelData.from_local('/path/to/file', filename='model', architecture='mace') - To download a file and save it as a `ModelData` object: .. code-block:: python - model = ModelData.download('http://yoururl.test/model', architecture='mace', filename='model', cache_dir='/home/mlip/', force_download=False) + model = ModelData.from_uri('http://yoururl.test/model', architecture='mace', filename='model', cache_dir='/home/mlip/', force_download=False) - The architecture of the model file can be accessed using the `architecture` property: @@ -37,20 +37,7 @@ Usage model_arch = model.architecture - - -- The filepath of the model file can be accessed using the `filepath` property: - -.. code-block:: python - - file_path = model.filepath - -.. warning:: - - When using shared data, the ``filepath`` could point to a inaccessible location on another computer. - So if you are using data from someone else, for both the model data and the config file, consider using the ``get_content()`` method to create a new file with identical content. - Then, use the filepath of the newly created file for running calculation. - A more robust solution to this problem is going to be implemented. +As for a `SinglefileData`, the content of the model file can be accessed using the function `get_content()` JanusConfigfile diff --git a/docs/source/user_guide/tutorial.rst b/docs/source/user_guide/tutorial.rst index dcc2df6a..f07d6189 100644 --- a/docs/source/user_guide/tutorial.rst +++ b/docs/source/user_guide/tutorial.rst @@ -29,19 +29,19 @@ The input structure in aiida-mlip needs to be saved as a StructureData type: structure = StructureData(ase=read("/path/to/structure.cif")) Then we need to choose a model and architecture to be used for the calculation and save it as ModelData type, a specific data type of this plugin. -In this example we use MACE with a model that we download from this URL: "https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", and we save the file in the cache folder (default="~/.cache/mlips/"): +In this example we use MACE with a model that we download from this URI: "https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", and we save the file in the cache folder (default="~/.cache/mlips/"): .. code-block:: python from aiida_mlip.data.model import ModelData - url = "https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model" - model = ModelData.download(url, architecture="mace", cache_dir="/.cache/") + uri = "https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model" + model = ModelData.from_uri(uri, architecture="mace", cache_dir="/.cache/") If we already have the model saved in some folder we can save it as: .. code-block:: python - model = ModelData.local_file("/path/to/model", architecture="mace") + model = ModelData.from_local("/path/to/model", architecture="mace") Another parameter that we need to define as AiiDA type is the code. Assuming the code is saved as `janus` in the `localhost` computer, the code info that are needed can be loaded as follow: diff --git a/examples/calculations/submit_geomopt.py b/examples/calculations/submit_geomopt.py index d7396068..cd826cc9 100644 --- a/examples/calculations/submit_geomopt.py +++ b/examples/calculations/submit_geomopt.py @@ -4,7 +4,7 @@ from aiida.common import NotExistent from aiida.engine import run_get_node -from aiida.orm import Bool, Dict, Float, Int, Str, load_code +from aiida.orm import Bool, Float, Int, Str, load_code from aiida.plugins import CalculationFactory from aiida_mlip.helpers.help_load import load_model, load_structure @@ -44,7 +44,7 @@ def geomopt(params: dict) -> None: "fmax": Float(params["fmax"]), "vectors_only": Bool(params["vectors_only"]), "fully_opt": Bool(params["fully_opt"]), - "opt_kwargs": Dict({"restart": "rest.pkl"}), + # "opt_kwargs": Dict({"restart": "rest.pkl"}), "steps": Int(params["steps"]), } @@ -67,7 +67,7 @@ def geomopt(params: dict) -> None: "--model", default=None, type=str, - help="Specify path or url of the model to use", + help="Specify path or URI of the model to use", ) @click.option( "--arch", diff --git a/examples/calculations/submit_md.py b/examples/calculations/submit_md.py index 98950eea..1858be46 100644 --- a/examples/calculations/submit_md.py +++ b/examples/calculations/submit_md.py @@ -66,7 +66,7 @@ def MD(params: dict) -> None: "--model", default=None, type=str, - help="Specify path or url of the model to use", + help="Specify path or URI of the model to use", ) @click.option( "--arch", diff --git a/examples/calculations/submit_singlepoint.py b/examples/calculations/submit_singlepoint.py index 9b30b95b..a5997b6d 100644 --- a/examples/calculations/submit_singlepoint.py +++ b/examples/calculations/submit_singlepoint.py @@ -62,7 +62,7 @@ def singlepoint(params: dict) -> None: "--model", default=None, type=str, - help="Specify path or url of the model to use", + help="Specify path or URI of the model to use", ) @click.option( "--arch", diff --git a/examples/tutorials/geometry-optimisation.ipynb b/examples/tutorials/geometry-optimisation.ipynb index 4032433f..48842970 100644 --- a/examples/tutorials/geometry-optimisation.ipynb +++ b/examples/tutorials/geometry-optimisation.ipynb @@ -75,8 +75,8 @@ "outputs": [], "source": [ "from aiida_mlip.data.model import ModelData\n", - "url = \"https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model\"\n", - "model = ModelData.download(url, architecture=\"mace_mp\", cache_dir=\"mlips\")" + "uri = \"https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model\"\n", + "model = ModelData.from_uri(uri, architecture=\"mace_mp\", cache_dir=\"mlips\")" ] }, { @@ -98,7 +98,7 @@ "outputs": [], "source": [ "# from aiida_mlip.data.model import ModelData\n", - "# model = ModelData.local_file(\"mlips/mace_mp/mace_mp_small.model\", architecture=\"mace_mp\")" + "# model = ModelData.from_local(\"mlips/mace_mp/mace_mp_small.model\", architecture=\"mace_mp\")" ] }, { @@ -367,8 +367,8 @@ "def prepare_struct_inputs(traj, index):\n", " return traj.get_step_structure(index.value)\n", "\n", - "url = \"https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model\"\n", - "model = ModelData.download(url, architecture=\"mace_mp\", cache_dir=\"mlips\")\n", + "uri = \"https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model\"\n", + "model = ModelData.from_uri(uri, architecture=\"mace_mp\", cache_dir=\"mlips\")\n", "list_of_nodes = []\n", "\n", "\n", diff --git a/examples/tutorials/high-throughput-screening.ipynb b/examples/tutorials/high-throughput-screening.ipynb index d8a7e786..35b2cb8b 100644 --- a/examples/tutorials/high-throughput-screening.ipynb +++ b/examples/tutorials/high-throughput-screening.ipynb @@ -80,7 +80,7 @@ "outputs": [], "source": [ "Calculation = CalculationFactory(\"mlip.sp\")\n", - "model = ModelData.local_file(\"mlips/mace_mp/mace_mp_small.model\", architecture=\"mace_mp\")\n", + "model = ModelData.from_local(\"mlips/mace_mp/mace_mp_small.model\", architecture=\"mace_mp\")\n", "metadata = {\"options\": {\"resources\": {\"num_machines\": 1}}}\n", "code = load_code(\"janus@localhost\")" ] diff --git a/examples/tutorials/singlepoint.ipynb b/examples/tutorials/singlepoint.ipynb index aee8b8fb..32f91df1 100644 --- a/examples/tutorials/singlepoint.ipynb +++ b/examples/tutorials/singlepoint.ipynb @@ -65,8 +65,8 @@ "outputs": [], "source": [ "from aiida_mlip.data.model import ModelData\n", - "url = \"https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model\"\n", - "model = ModelData.download(url, architecture=\"mace_mp\", cache_dir=\"mlips\")" + "uri = \"https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model\"\n", + "model = ModelData.from_uri(uri, architecture=\"mace_mp\", cache_dir=\"mlips\")" ] }, { @@ -86,7 +86,7 @@ }, "outputs": [], "source": [ - "model = ModelData.local_file(\"/path/to/model\", architecture=\"mace\")" + "model = ModelData.from_local(\"/path/to/model\", architecture=\"mace\")" ] }, { diff --git a/tests/calculations/test_geomopt.py b/tests/calculations/test_geomopt.py index 457516b4..0cb76c2a 100644 --- a/tests/calculations/test_geomopt.py +++ b/tests/calculations/test_geomopt.py @@ -24,7 +24,7 @@ def test_geomopt(fixture_sandbox, generate_calc_job, janus_code, model_folder): "arch": Str("mace"), "precision": Str("float64"), "struct": StructureData(ase=bulk("NaCl", "rocksalt", 5.63)), - "model": ModelData.local_file(model_file, architecture="mace"), + "model": ModelData.from_local(model_file, architecture="mace"), "device": Str("cpu"), } @@ -43,7 +43,7 @@ def test_geomopt(fixture_sandbox, generate_calc_job, janus_code, model_folder): "--out", "aiida-results.xyz", "--calc-kwargs", - f"{{'default_dtype': 'float64', 'model': '{model_file}'}}", + "{'default_dtype': 'float64', 'model': 'mlff.model'}", "--traj", "aiida-traj.xyz", ] @@ -57,7 +57,9 @@ def test_geomopt(fixture_sandbox, generate_calc_job, janus_code, model_folder): ] # Check the attributes of the returned `CalcInfo` - assert fixture_sandbox.get_content_list() == ["aiida.xyz"] + assert sorted(fixture_sandbox.get_content_list()) == sorted( + ["aiida.xyz", "mlff.model"] + ) assert isinstance(calc_info, datastructures.CalcInfo) assert isinstance(calc_info.codes_info[0], datastructures.CodeInfo) assert len(calc_info.codes_info[0].cmdline_params) == len(cmdline_params) @@ -77,7 +79,7 @@ def test_run_opt(model_folder, janus_code): "arch": Str("mace"), "precision": Str("float64"), "struct": StructureData(ase=bulk("NaCl", "rocksalt", 5.63)), - "model": ModelData.local_file(model_file, architecture="mace"), + "model": ModelData.from_local(model_file, architecture="mace"), "device": Str("cpu"), "fully_opt": Bool(True), "fmax": Float(0.1), diff --git a/tests/calculations/test_md.py b/tests/calculations/test_md.py index 3b4a2863..2b907c3d 100644 --- a/tests/calculations/test_md.py +++ b/tests/calculations/test_md.py @@ -27,7 +27,7 @@ def test_MD(fixture_sandbox, generate_calc_job, janus_code, model_folder): "arch": Str("mace"), "precision": Str("float64"), "struct": StructureData(ase=bulk("NaCl", "rocksalt", 5.63)), - "model": ModelData.local_file(model_file, architecture="mace"), + "model": ModelData.from_local(model_file, architecture="mace"), "device": Str("cpu"), "ensemble": Str("nve"), "md_kwargs": Dict( @@ -56,7 +56,7 @@ def test_MD(fixture_sandbox, generate_calc_job, janus_code, model_folder): "--summary", "md_summary.yml", "--calc-kwargs", - f"{{'default_dtype': 'float64', 'model': '{model_file}'}}", + "{'default_dtype': 'float64', 'model': 'mlff.model'}", "--ensemble", "nve", "--temp", @@ -85,7 +85,9 @@ def test_MD(fixture_sandbox, generate_calc_job, janus_code, model_folder): ] # Check the attributes of the returned `CalcInfo` - assert fixture_sandbox.get_content_list() == ["aiida.xyz"] + assert sorted(fixture_sandbox.get_content_list()) == sorted( + ["aiida.xyz", "mlff.model"] + ) assert isinstance(calc_info, datastructures.CalcInfo) assert isinstance(calc_info.codes_info[0], datastructures.CodeInfo) assert len(calc_info.codes_info[0].cmdline_params) == len(cmdline_params) @@ -107,7 +109,7 @@ def test_MD_with_config( model_file = model_folder / "mace_mp_small.model" inputs = { "code": janus_code, - "model": ModelData.local_file(file=model_file, architecture="mace"), + "model": ModelData.from_local(file=model_file, architecture="mace"), "metadata": {"options": {"resources": {"num_machines": 1}}}, "config": JanusConfigfile(config_folder / "config_janus_md.yaml"), } @@ -123,7 +125,7 @@ def test_MD_with_config( "--arch", "mace", "--calc-kwargs", - f"{{'model': '{model_file}'}}", + "{'model': 'mlff.model'}", "--config", "config.yaml", "--ensemble", @@ -146,7 +148,13 @@ def test_MD_with_config( ] # Check the attributes of the returned `CalcInfo` - assert sorted(fixture_sandbox.get_content_list()) == ["aiida.xyz", "config.yaml"] + assert sorted(fixture_sandbox.get_content_list()) == sorted( + [ + "aiida.xyz", + "config.yaml", + "mlff.model", + ] + ) assert isinstance(calc_info, datastructures.CalcInfo) assert isinstance(calc_info.codes_info[0], datastructures.CodeInfo) assert len(calc_info.codes_info[0].cmdline_params) == len(cmdline_params) @@ -168,7 +176,7 @@ def test_run_md(model_folder, structure_folder, janus_code): "arch": Str("mace"), "precision": Str("float64"), "struct": StructureData(ase=read(structure_file)), - "model": ModelData.local_file(model_file, architecture="mace"), + "model": ModelData.from_local(model_file, architecture="mace"), "device": Str("cpu"), "ensemble": Str("nve"), "md_kwargs": Dict( diff --git a/tests/calculations/test_singlepoint.py b/tests/calculations/test_singlepoint.py index 6cd1e890..31959603 100644 --- a/tests/calculations/test_singlepoint.py +++ b/tests/calculations/test_singlepoint.py @@ -25,7 +25,7 @@ def test_singlepoint(fixture_sandbox, generate_calc_job, janus_code, model_folde "arch": Str("mace"), "precision": Str("float64"), "struct": StructureData(ase=bulk("NaCl", "rocksalt", 5.63)), - "model": ModelData.local_file(model_file, architecture="mace"), + "model": ModelData.from_local(model_file, architecture="mace"), "device": Str("cpu"), } @@ -44,7 +44,7 @@ def test_singlepoint(fixture_sandbox, generate_calc_job, janus_code, model_folde "--out", "aiida-results.xyz", "--calc-kwargs", - f"{{'default_dtype': 'float64', 'model': '{model_file}'}}", + "{'default_dtype': 'float64', 'model': 'mlff.model'}", ] retrieve_list = [ @@ -55,7 +55,9 @@ def test_singlepoint(fixture_sandbox, generate_calc_job, janus_code, model_folde ] # Check the attributes of the returned `CalcInfo` - assert fixture_sandbox.get_content_list() == ["aiida.xyz"] + assert sorted(fixture_sandbox.get_content_list()) == sorted( + ["aiida.xyz", "mlff.model"] + ) assert isinstance(calc_info, datastructures.CalcInfo) assert isinstance(calc_info.codes_info[0], datastructures.CodeInfo) assert sorted(calc_info.codes_info[0].cmdline_params) == sorted(cmdline_params) @@ -72,7 +74,7 @@ def test_sp_nostruct(fixture_sandbox, generate_calc_job, model_folder, janus_cod "code": janus_code, "arch": Str("mace"), "precision": Str("float64"), - "model": ModelData.local_file(model_file, architecture="mace"), + "model": ModelData.from_local(model_file, architecture="mace"), "device": Str("cpu"), } with pytest.raises(InputValidationError): @@ -117,7 +119,7 @@ def test_two_arch(fixture_sandbox, generate_calc_job, model_folder, janus_code): inputs = { "code": janus_code, "metadata": {"options": {"resources": {"num_machines": 1}}}, - "model": ModelData.local_file(model_file, architecture="mace_mp"), + "model": ModelData.from_local(model_file, architecture="mace_mp"), "arch": Str("chgnet"), "struct": StructureData(ase=bulk("NaCl", "rocksalt", 5.63)), } @@ -135,7 +137,7 @@ def test_run_sp(model_folder, janus_code): "arch": Str("mace"), "precision": Str("float64"), "struct": StructureData(ase=bulk("NaCl", "rocksalt", 5.63)), - "model": ModelData.local_file(model_file, architecture="mace"), + "model": ModelData.from_local(model_file, architecture="mace"), "device": Str("cpu"), } diff --git a/tests/calculations/test_train.py b/tests/calculations/test_train.py index f303b8b1..cdb27d7d 100644 --- a/tests/calculations/test_train.py +++ b/tests/calculations/test_train.py @@ -121,7 +121,7 @@ def test_prepare_tune(fixture_sandbox, generate_calc_job, janus_code, config_fol "code": janus_code, "mlip_config": config, "fine_tune": Bool(True), - "foundation_model": ModelData.local_file( + "foundation_model": ModelData.from_local( file=model_file, architecture="mace_mp" ), } @@ -141,7 +141,9 @@ def test_prepare_tune(fixture_sandbox, generate_calc_job, janus_code, config_fol ] # Check the attributes of the returned `CalcInfo` - assert fixture_sandbox.get_content_list() == ["mlip_train.yml"] + assert sorted(fixture_sandbox.get_content_list()) == sorted( + ["mlip_train.yml", "mlff.model"] + ) assert isinstance(calc_info, datastructures.CalcInfo) assert isinstance(calc_info.codes_info[0], datastructures.CodeInfo) assert sorted(calc_info.retrieve_list) == sorted(retrieve_list) @@ -177,7 +179,7 @@ def test_run_train(janus_code, config_folder): "fine_tune": Bool(True), "code": janus_code, "mlip_config": config, - "foundation_model": ModelData.local_file( + "foundation_model": ModelData.from_local( file=model_file, architecture="mace_mp" ), } diff --git a/tests/data/test_model.py b/tests/data/test_model.py index e85e2542..145340e0 100644 --- a/tests/data/test_model.py +++ b/tests/data/test_model.py @@ -1,27 +1,27 @@ -"""Test for ModelData class""" +"""Test for ModelData class.""" from pathlib import Path from aiida_mlip.data.model import ModelData +model_path = Path(__file__).parent / "input_files" / "model_local_file.txt" + def test_local_file(): - """Testing that the local file function works""" + """Testing that the from_local function works""" # Construct a ModelData instance with the local file - model_path = Path(__file__).parent / "input_files" / "model_local_file.txt" absolute_path = model_path.resolve() - model = ModelData.local_file(file=absolute_path, architecture="mace") + model = ModelData.from_local(file=absolute_path, architecture="mace") # Assert the ModelData contains the content we expect content = model.get_content() assert content == model_path.read_text(encoding="utf-8") def test_relativepath(): - """Testing that the local file function works""" + """Testing that the from_local function works with a relative path""" # Construct a ModelData instance with the local file - model_path = Path(__file__).parent / "input_files" / "model_local_file.txt" relative_path = model_path.relative_to(Path.cwd()) - model = ModelData.local_file(file=relative_path, architecture="mace") + model = ModelData.from_local(file=relative_path, architecture="mace") # Assert the ModelData contains the content we expect content = model.get_content() assert content == relative_path.read_text(encoding="utf-8") @@ -29,53 +29,76 @@ def test_relativepath(): def test_architecture(): """Testing that the architecture is read and added to attributes""" - file = Path(__file__).parent / "input_files/model_local_file.txt" - model = ModelData.local_file( - file=file, + model = ModelData.from_local( + file=model_path, filename="model", architecture="mace", ) assert model.architecture == "mace" +def test_download_fresh_file_keep(tmp_path): + """Test if download works and the downloaded file is kept in the chosen folder.""" + # Ensure we do not have the file cached already + path_test = tmp_path / "mace" / "mace.model" + path_test.unlink(missing_ok=True) + + # Construct a ModelData instance downloading a non-cached file + # pylint:disable=line-too-long + model = ModelData.from_uri( + uri="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", + filename="mace.model", + cache_dir=tmp_path, + architecture="mace", + keep_file=True, + ) + + # Assert the ModelData is downloaded + assert model.architecture == "mace" + assert path_test.exists(), f"File {path_test} does not exist." + + def test_download_fresh_file(tmp_path): - """Test if download works""" + """Test if download works and the file is only saved in the database not locally""" # Ensure we do not have the file cached already path_test = tmp_path / "mace" / "mace.model" path_test.unlink(missing_ok=True) # Construct a ModelData instance downloading a non-cached file # pylint:disable=line-too-long - model = ModelData.download( - url="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", + model = ModelData.from_uri( + uri="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", filename="mace.model", cache_dir=tmp_path, architecture="mace", ) # Assert the ModelData is downloaded - file_path = tmp_path / "mace" / "mace.model" assert model.architecture == "mace" - assert file_path.exists(), f"File {file_path} does not exist." + assert path_test.exists() is False, f"File {path_test} exists and shouldn't." -def test_no_download_cached_file(): - """Test if the caching work for avoiding double download""" +def test_no_download_cached_file(tmp_path): + """Test if the caching prevents saving duplicate model in the database.""" + # pylint:disable=line-too-long + existing_model = ModelData.from_uri( + uri="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", + filename="mace_existing.model", + cache_dir=tmp_path, + architecture="mace_mp", + ) # Construct a ModelData instance that should use the cached file # pylint:disable=line-too-long - model = ModelData.download( - url="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", - cache_dir=Path(__file__).parent / "input_files", + model = ModelData.from_uri( + uri="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", + cache_dir=tmp_path, filename="test_model.model", - architecture="mace", + architecture="mace_mp", ) + file_path = tmp_path / "test_model.model" # Assert the new ModelData was not downloaded and the old one is still there - file_path = Path(__file__).parent / "input_files" / "mace" / "test_model.model" - assert not file_path.exists(), f"File {file_path} exists but it shouldn't." - file_path2 = Path(__file__).parent / "input_files" / "test_model.model" - assert not file_path2.exists(), f"File {file_path2} exists but it shouldn't." - file_path3 = Path(__file__).parent / "input_files" / "mace" / "mace_mp_small.model" - assert file_path3.exists(), f"File {file_path3} should exist" - assert model.architecture == "mace" + assert model.pk == existing_model.pk + assert model.model_hash == existing_model.model_hash + assert file_path.exists() is False, f"File {file_path} exists and shouldn't." diff --git a/tests/helpers/test_load.py b/tests/helpers/test_load.py index cc03df6c..912500d4 100644 --- a/tests/helpers/test_load.py +++ b/tests/helpers/test_load.py @@ -20,12 +20,12 @@ def test_load_local_model(model_folder): def test_download_model(tmp_path): - """Test for the load_model function for loading from URL.""" - url_model = ( + """Test for the load_model function for loading from URI.""" + uri_model = ( "https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model" ) loaded_model = load_model( - url_model, architecture="example_architecture", cache_dir=tmp_path + uri_model, architecture="example_architecture", cache_dir=tmp_path ) assert isinstance(loaded_model, ModelData)