From 7b31e129de4f9a7088b801abe5cf879fdf598318 Mon Sep 17 00:00:00 2001 From: federica Date: Tue, 9 Jul 2024 16:30:56 +0100 Subject: [PATCH 01/20] fixed filepath issue, all works for now --- aiida_mlip/calculations/base.py | 23 ++++++++++++++-------- aiida_mlip/calculations/train.py | 8 +++++--- aiida_mlip/data/model.py | 27 +++++++++++++++++--------- tests/calculations/test_geomopt.py | 4 ++-- tests/calculations/test_md.py | 12 ++++++++---- tests/calculations/test_singlepoint.py | 4 ++-- tests/calculations/test_train.py | 2 +- 7 files changed, 51 insertions(+), 29 deletions(-) diff --git a/aiida_mlip/calculations/base.py b/aiida_mlip/calculations/base.py index 0284aea3..3b52f379 100644 --- a/aiida_mlip/calculations/base.py +++ b/aiida_mlip/calculations/base.py @@ -1,5 +1,7 @@ """Base class for features common to most calculations.""" +import shutil + from ase.io import read, write from aiida.common import InputValidationError, datastructures @@ -199,8 +201,6 @@ def prepare_for_submission( An instance of `aiida.common.datastructures.CalcInfo`. """ - # Create needed inputs - if "struct" in self.inputs: structure = self.inputs.struct elif "config" in self.inputs and "struct" in self.inputs.config.as_dictionary: @@ -211,8 +211,8 @@ def prepare_for_submission( # Transform the structure data in xyz file called input_filename input_filename = self.inputs.metadata.options.input_filename atoms = structure.get_ase() - # with folder.open(input_filename, mode="w", encoding='utf8') as file: - write(folder.abspath + "/" + input_filename, images=atoms) + with folder.open(input_filename, mode="w", encoding=None) as file: + write(file.name, images=atoms) log_filename = (self.inputs.log_filename).value cmd_line = { @@ -231,7 +231,7 @@ def prepare_for_submission( # Define architecture from model if model is given, # otherwise get architecture from inputs and download default model self._add_arch_to_cmdline(cmd_line) - self._add_model_to_cmdline(cmd_line) + self._add_model_to_cmdline(cmd_line, folder) if "config" in self.inputs: # Add config file to command line @@ -290,8 +290,7 @@ def _add_arch_to_cmdline(self, cmd_line: dict) -> dict: cmd_line["arch"] = architecture def _add_model_to_cmdline( - self, - cmd_line: dict, + self, cmd_line: dict, folder: aiida.common.folders.Folder ) -> dict: """ Find model in inputs or config file and add to command line if needed. @@ -301,6 +300,9 @@ def _add_model_to_cmdline( cmd_line : dict Dictionary containing the cmd line keys. + folder : aiida.common.folders.Folder + An `aiida.common.folders.Folder` to temporarily write files on disk. + Returns ------- dict @@ -311,6 +313,11 @@ def _add_model_to_cmdline( # Raise error if model is None (different than model not given as input) if self.inputs.model is None: raise ValueError("Model cannot be None") - model_path = self.inputs.model.filepath + + with self.inputs.model.open(mode="rb") as source: + with folder.open("modelcopy.model", mode="wb") as target: + shutil.copyfileobj(source, target) + + model_path = "modelcopy.model" if model_path: cmd_line.setdefault("calc-kwargs", {})["model"] = model_path diff --git a/aiida_mlip/calculations/train.py b/aiida_mlip/calculations/train.py index a43aafd3..1dd5e79b 100644 --- a/aiida_mlip/calculations/train.py +++ b/aiida_mlip/calculations/train.py @@ -1,6 +1,7 @@ """Class for training machine learning models.""" from pathlib import Path +import shutil from aiida.common import InputValidationError, datastructures import aiida.common.folders @@ -175,9 +176,10 @@ def prepare_for_submission( # Add foundation_model to the config file if fine-tuning is enabled if self.inputs.fine_tune and "foundation_model" in self.inputs: - model_data = self.inputs.foundation_model - foundation_model_path = model_data.filepath - config_parse += f"\nfoundation_model: {foundation_model_path}" + with self.inputs.foundation_model.open(mode="rb") as source: + with folder.open("modelcopy.model", mode="wb") as target: + shutil.copyfileobj(source, target) + config_parse += "foundation_model: modelcopy.model" # Copy config file content inside the folder where the calculation is run config_copy = "mlip_train.yml" diff --git a/aiida_mlip/data/model.py b/aiida_mlip/data/model.py index 8fa8fb95..953970f4 100644 --- a/aiida_mlip/data/model.py +++ b/aiida_mlip/data/model.py @@ -70,7 +70,9 @@ def _calculate_hash(file: Union[str, Path]) -> str: return file_hash @classmethod - def _check_existing_file(cls, file: Union[str, Path]) -> Path: + def _check_existing_file( + cls, file: Union[str, Path] + ) -> Path: # just don't do this, do the hash and then the querybuilder """ Check if a file already exists and return the path of the existing file. @@ -136,7 +138,7 @@ def __init__( """ super().__init__(file, filename, **kwargs) self.base.attributes.set("architecture", architecture) - self.base.attributes.set("filepath", str(file)) + self.base.attributes.set("filepath", str(file)) # no need def set_file( self, @@ -164,9 +166,11 @@ def set_file( """ super().set_file(file, filename, **kwargs) self.base.attributes.set("architecture", architecture) - self.base.attributes.set("filepath", str(file)) + # here compute hash and set attribute + model_hash = self._calculate_hash(file) + self.base.attributes.set("model_hash", model_hash) - @classmethod + @classmethod # if I change I won't need this def local_file( cls, file: Union[str, Path], @@ -195,7 +199,7 @@ def local_file( @classmethod # pylint: disable=too-many-arguments - def download( + def download( # change names with from (from_local and from_url) cls, url: str, architecture: str, @@ -231,6 +235,11 @@ def download( ) arch_dir = (cache_dir / architecture) if architecture else cache_dir + # do it with the temp file I download the file + # sha witrh querybuilder + # don't need to have the file + # all this check I do here would be different, it's not robust like this + # cache_path = cache_dir.resolve() arch_path = arch_dir.resolve() arch_path.mkdir(parents=True, exist_ok=True) @@ -271,13 +280,13 @@ def architecture(self) -> str: return self.base.attributes.get("architecture") @property - def filepath(self) -> str: + def model_hash(self) -> str: """ - Return the filepath. + Return the architecture. Returns ------- str - Path of the mlip model. + Architecture of the mlip model. """ - return self.base.attributes.get("filepath") + return self.base.attributes.get("model_hash") diff --git a/tests/calculations/test_geomopt.py b/tests/calculations/test_geomopt.py index 457516b4..4d6450d3 100644 --- a/tests/calculations/test_geomopt.py +++ b/tests/calculations/test_geomopt.py @@ -43,7 +43,7 @@ def test_geomopt(fixture_sandbox, generate_calc_job, janus_code, model_folder): "--out", "aiida-results.xyz", "--calc-kwargs", - f"{{'default_dtype': 'float64', 'model': '{model_file}'}}", + "{'default_dtype': 'float64', 'model': 'modelcopy.model'}", "--traj", "aiida-traj.xyz", ] @@ -57,7 +57,7 @@ def test_geomopt(fixture_sandbox, generate_calc_job, janus_code, model_folder): ] # Check the attributes of the returned `CalcInfo` - assert fixture_sandbox.get_content_list() == ["aiida.xyz"] + assert fixture_sandbox.get_content_list() == ["aiida.xyz", "modelcopy.model"] assert isinstance(calc_info, datastructures.CalcInfo) assert isinstance(calc_info.codes_info[0], datastructures.CodeInfo) assert len(calc_info.codes_info[0].cmdline_params) == len(cmdline_params) diff --git a/tests/calculations/test_md.py b/tests/calculations/test_md.py index 3b4a2863..4a617592 100644 --- a/tests/calculations/test_md.py +++ b/tests/calculations/test_md.py @@ -56,7 +56,7 @@ def test_MD(fixture_sandbox, generate_calc_job, janus_code, model_folder): "--summary", "md_summary.yml", "--calc-kwargs", - f"{{'default_dtype': 'float64', 'model': '{model_file}'}}", + "{'default_dtype': 'float64', 'model': 'modelcopy.model'}", "--ensemble", "nve", "--temp", @@ -85,7 +85,7 @@ def test_MD(fixture_sandbox, generate_calc_job, janus_code, model_folder): ] # Check the attributes of the returned `CalcInfo` - assert fixture_sandbox.get_content_list() == ["aiida.xyz"] + assert fixture_sandbox.get_content_list() == ["aiida.xyz", "modelcopy.model"] assert isinstance(calc_info, datastructures.CalcInfo) assert isinstance(calc_info.codes_info[0], datastructures.CodeInfo) assert len(calc_info.codes_info[0].cmdline_params) == len(cmdline_params) @@ -123,7 +123,7 @@ def test_MD_with_config( "--arch", "mace", "--calc-kwargs", - f"{{'model': '{model_file}'}}", + "{'model': 'modelcopy.model'}", "--config", "config.yaml", "--ensemble", @@ -146,7 +146,11 @@ def test_MD_with_config( ] # Check the attributes of the returned `CalcInfo` - assert sorted(fixture_sandbox.get_content_list()) == ["aiida.xyz", "config.yaml"] + assert sorted(fixture_sandbox.get_content_list()) == [ + "aiida.xyz", + "config.yaml", + "modelcopy.model", + ] assert isinstance(calc_info, datastructures.CalcInfo) assert isinstance(calc_info.codes_info[0], datastructures.CodeInfo) assert len(calc_info.codes_info[0].cmdline_params) == len(cmdline_params) diff --git a/tests/calculations/test_singlepoint.py b/tests/calculations/test_singlepoint.py index 6cd1e890..53114e04 100644 --- a/tests/calculations/test_singlepoint.py +++ b/tests/calculations/test_singlepoint.py @@ -44,7 +44,7 @@ def test_singlepoint(fixture_sandbox, generate_calc_job, janus_code, model_folde "--out", "aiida-results.xyz", "--calc-kwargs", - f"{{'default_dtype': 'float64', 'model': '{model_file}'}}", + "{'default_dtype': 'float64', 'model': 'modelcopy.model'}", ] retrieve_list = [ @@ -55,7 +55,7 @@ def test_singlepoint(fixture_sandbox, generate_calc_job, janus_code, model_folde ] # Check the attributes of the returned `CalcInfo` - assert fixture_sandbox.get_content_list() == ["aiida.xyz"] + assert fixture_sandbox.get_content_list() == ["aiida.xyz", "modelcopy.model"] assert isinstance(calc_info, datastructures.CalcInfo) assert isinstance(calc_info.codes_info[0], datastructures.CodeInfo) assert sorted(calc_info.codes_info[0].cmdline_params) == sorted(cmdline_params) diff --git a/tests/calculations/test_train.py b/tests/calculations/test_train.py index f303b8b1..2a247da9 100644 --- a/tests/calculations/test_train.py +++ b/tests/calculations/test_train.py @@ -141,7 +141,7 @@ def test_prepare_tune(fixture_sandbox, generate_calc_job, janus_code, config_fol ] # Check the attributes of the returned `CalcInfo` - assert fixture_sandbox.get_content_list() == ["mlip_train.yml"] + assert fixture_sandbox.get_content_list() == ["mlip_train.yml", "modelcopy.model"] assert isinstance(calc_info, datastructures.CalcInfo) assert isinstance(calc_info.codes_info[0], datastructures.CodeInfo) assert sorted(calc_info.retrieve_list) == sorted(retrieve_list) From 6ba4100a503d5493fb4d8711b7a3c65d0d565e6d Mon Sep 17 00:00:00 2001 From: federica Date: Wed, 10 Jul 2024 09:06:11 +0100 Subject: [PATCH 02/20] fix test? --- tests/calculations/test_train.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/calculations/test_train.py b/tests/calculations/test_train.py index 2a247da9..7c543858 100644 --- a/tests/calculations/test_train.py +++ b/tests/calculations/test_train.py @@ -141,7 +141,9 @@ def test_prepare_tune(fixture_sandbox, generate_calc_job, janus_code, config_fol ] # Check the attributes of the returned `CalcInfo` - assert fixture_sandbox.get_content_list() == ["mlip_train.yml", "modelcopy.model"] + assert sorted(fixture_sandbox.get_content_list()) == sorted( + ["mlip_train.yml", "modelcopy.model"] + ) assert isinstance(calc_info, datastructures.CalcInfo) assert isinstance(calc_info.codes_info[0], datastructures.CodeInfo) assert sorted(calc_info.retrieve_list) == sorted(retrieve_list) From ecdf461ed125ceb69e926977ac86bf0ded1456d0 Mon Sep 17 00:00:00 2001 From: federica Date: Wed, 10 Jul 2024 09:06:11 +0100 Subject: [PATCH 03/20] fix test? --- tests/calculations/test_geomopt.py | 4 +++- tests/calculations/test_md.py | 16 ++++++++++------ tests/calculations/test_singlepoint.py | 4 +++- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/tests/calculations/test_geomopt.py b/tests/calculations/test_geomopt.py index 4d6450d3..9cc0a952 100644 --- a/tests/calculations/test_geomopt.py +++ b/tests/calculations/test_geomopt.py @@ -57,7 +57,9 @@ def test_geomopt(fixture_sandbox, generate_calc_job, janus_code, model_folder): ] # Check the attributes of the returned `CalcInfo` - assert fixture_sandbox.get_content_list() == ["aiida.xyz", "modelcopy.model"] + assert sorted(fixture_sandbox.get_content_list()) == sorted( + ["aiida.xyz", "modelcopy.model"] + ) assert isinstance(calc_info, datastructures.CalcInfo) assert isinstance(calc_info.codes_info[0], datastructures.CodeInfo) assert len(calc_info.codes_info[0].cmdline_params) == len(cmdline_params) diff --git a/tests/calculations/test_md.py b/tests/calculations/test_md.py index 4a617592..795a5204 100644 --- a/tests/calculations/test_md.py +++ b/tests/calculations/test_md.py @@ -85,7 +85,9 @@ def test_MD(fixture_sandbox, generate_calc_job, janus_code, model_folder): ] # Check the attributes of the returned `CalcInfo` - assert fixture_sandbox.get_content_list() == ["aiida.xyz", "modelcopy.model"] + assert sorted(fixture_sandbox.get_content_list()) == sorted( + ["aiida.xyz", "modelcopy.model"] + ) assert isinstance(calc_info, datastructures.CalcInfo) assert isinstance(calc_info.codes_info[0], datastructures.CodeInfo) assert len(calc_info.codes_info[0].cmdline_params) == len(cmdline_params) @@ -146,11 +148,13 @@ def test_MD_with_config( ] # Check the attributes of the returned `CalcInfo` - assert sorted(fixture_sandbox.get_content_list()) == [ - "aiida.xyz", - "config.yaml", - "modelcopy.model", - ] + assert sorted(fixture_sandbox.get_content_list()) == sorted( + [ + "aiida.xyz", + "config.yaml", + "modelcopy.model", + ] + ) assert isinstance(calc_info, datastructures.CalcInfo) assert isinstance(calc_info.codes_info[0], datastructures.CodeInfo) assert len(calc_info.codes_info[0].cmdline_params) == len(cmdline_params) diff --git a/tests/calculations/test_singlepoint.py b/tests/calculations/test_singlepoint.py index 53114e04..5627a3a7 100644 --- a/tests/calculations/test_singlepoint.py +++ b/tests/calculations/test_singlepoint.py @@ -55,7 +55,9 @@ def test_singlepoint(fixture_sandbox, generate_calc_job, janus_code, model_folde ] # Check the attributes of the returned `CalcInfo` - assert fixture_sandbox.get_content_list() == ["aiida.xyz", "modelcopy.model"] + assert sorted(fixture_sandbox.get_content_list()) == sorted( + ["aiida.xyz", "modelcopy.model"] + ) assert isinstance(calc_info, datastructures.CalcInfo) assert isinstance(calc_info.codes_info[0], datastructures.CodeInfo) assert sorted(calc_info.codes_info[0].cmdline_params) == sorted(cmdline_params) From 329bfc51a8c5aa3e0f05c8575eb723bd2c5d0845 Mon Sep 17 00:00:00 2001 From: federica Date: Wed, 10 Jul 2024 15:03:56 +0100 Subject: [PATCH 04/20] change hashing --- aiida_mlip/data/model.py | 64 ++++++++++++++++++++-------------------- tests/data/test_model.py | 45 +++++++++++++++++++++------- 2 files changed, 66 insertions(+), 43 deletions(-) diff --git a/aiida_mlip/data/model.py b/aiida_mlip/data/model.py index 953970f4..37e69a64 100644 --- a/aiida_mlip/data/model.py +++ b/aiida_mlip/data/model.py @@ -4,9 +4,9 @@ from pathlib import Path from typing import Any, Optional, Union from urllib import request -from urllib.parse import urlparse -from aiida.orm import SinglefileData +from aiida.orm import QueryBuilder, SinglefileData, load_node +from aiida.tools import delete_nodes class ModelData(SinglefileData): @@ -203,9 +203,9 @@ def download( # change names with from (from_local and from_url) cls, url: str, architecture: str, - filename: Optional[str] = None, + filename: Optional[str] = "tmp_file.model", cache_dir: Optional[Union[str, Path]] = None, - force_download: Optional[bool] = False, + keep_file: Optional[bool] = False, ): """ Download a file from a URL and save it as ModelData. @@ -217,13 +217,13 @@ def download( # change names with from (from_local and from_url) architecture : [str] Architecture of the mlip model. filename : Optional[str], optional - Name to be used for the file (defaults to the name of provided file). + Name to be used for the file defaults to tmp_file.model. cache_dir : Optional[Union[str, Path]], optional Path to the folder where the file has to be saved (defaults to "~/.cache/mlips/"). - force_download : Optional[bool], optional - True to keep the downloaded model even if there are duplicates - (default: False). + keep_file : Optional[bool], optional + True to keep the downloaded model, even if there are duplicates) + (default: False, the file is cancelled and only saved in database). Returns ------- @@ -235,37 +235,37 @@ def download( # change names with from (from_local and from_url) ) arch_dir = (cache_dir / architecture) if architecture else cache_dir - # do it with the temp file I download the file - # sha witrh querybuilder - # don't need to have the file - # all this check I do here would be different, it's not robust like this - - # cache_path = cache_dir.resolve() arch_path = arch_dir.resolve() arch_path.mkdir(parents=True, exist_ok=True) - model_name = urlparse(url).path.split("/")[-1] - - file = arch_path / filename if filename else arch_path / model_name - - # If file already exists, use next indexed name - stem = file.stem - i = 1 - while file.exists(): - i += 1 - file = file.with_stem(f"{stem}_{i}") + file = arch_path / filename # Download file request.urlretrieve(url, file) - if force_download: - print(f"filename changed to {file}") - return cls.local_file(file=file, architecture=architecture) - - # Check if the hash of the just downloaded file matches any other file - filepath = cls._check_existing_file(file) - - return cls.local_file(file=filepath, architecture=architecture) + model = cls.local_file(file=file, architecture=architecture) + + if keep_file: + return model + + file.unlink(missing_ok=True) + + qb = QueryBuilder() + qb.append(ModelData, project=["attributes", "pk", "ctime"]) + + for i in qb.iterdict(): + if i["ModelData_1"]["attributes"]["model_hash"] == model.model_hash: + if i["ModelData_1"]["ctime"] != model.ctime: + delete_nodes( + [model.uuid], + dry_run=False, + create_forward=True, + call_calc_forward=True, + call_work_forward=True, + ) + model = load_node(i["ModelData_1"]["pk"]) + break + return model @property def architecture(self) -> str: diff --git a/tests/data/test_model.py b/tests/data/test_model.py index e85e2542..f5409a0b 100644 --- a/tests/data/test_model.py +++ b/tests/data/test_model.py @@ -38,7 +38,7 @@ def test_architecture(): assert model.architecture == "mace" -def test_download_fresh_file(tmp_path): +def test_download_fresh_file_keep(tmp_path): """Test if download works""" # Ensure we do not have the file cached already path_test = tmp_path / "mace" / "mace.model" @@ -51,6 +51,7 @@ def test_download_fresh_file(tmp_path): filename="mace.model", cache_dir=tmp_path, architecture="mace", + keep_file=True, ) # Assert the ModelData is downloaded @@ -59,23 +60,45 @@ def test_download_fresh_file(tmp_path): assert file_path.exists(), f"File {file_path} does not exist." -def test_no_download_cached_file(): +def test_download_fresh_file(tmp_path): + """Test if download works""" + # Ensure we do not have the file cached already + path_test = tmp_path / "mace" / "mace.model" + path_test.unlink(missing_ok=True) + + # Construct a ModelData instance downloading a non-cached file + # pylint:disable=line-too-long + model = ModelData.download( + url="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", + filename="mace.model", + cache_dir=tmp_path, + architecture="mace", + ) + + # Assert the ModelData is downloaded + file_path = tmp_path / "mace" / "mace.model" + assert model.architecture == "mace" + assert file_path.exists() is False, f"File {file_path} exists and shouldn't." + + +def test_no_download_cached_file(tmp_path): """Test if the caching work for avoiding double download""" + # pylint:disable=line-too-long + existing_model = ModelData.download( + url="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", + filename="mace_existing.model", + cache_dir=tmp_path, + architecture="mace_mp", + ) # Construct a ModelData instance that should use the cached file # pylint:disable=line-too-long model = ModelData.download( url="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", - cache_dir=Path(__file__).parent / "input_files", + cache_dir=tmp_path, filename="test_model.model", - architecture="mace", + architecture="mace_mp", ) # Assert the new ModelData was not downloaded and the old one is still there - file_path = Path(__file__).parent / "input_files" / "mace" / "test_model.model" - assert not file_path.exists(), f"File {file_path} exists but it shouldn't." - file_path2 = Path(__file__).parent / "input_files" / "test_model.model" - assert not file_path2.exists(), f"File {file_path2} exists but it shouldn't." - file_path3 = Path(__file__).parent / "input_files" / "mace" / "mace_mp_small.model" - assert file_path3.exists(), f"File {file_path3} should exist" - assert model.architecture == "mace" + assert model.pk == existing_model.pk From efb9a434aad05fc5bf8df2abb9ac2a09c3099325 Mon Sep 17 00:00:00 2001 From: federica Date: Wed, 10 Jul 2024 15:16:44 +0100 Subject: [PATCH 05/20] change names of functins --- aiida_mlip/data/model.py | 58 +++---------------- aiida_mlip/helpers/help_load.py | 6 +- aiida_mlip/parsers/train_parser.py | 4 +- docs/source/user_guide/data.rst | 4 +- docs/source/user_guide/tutorial.rst | 4 +- .../tutorials/geometry-optimisation.ipynb | 6 +- .../tutorials/high-throughput-screening.ipynb | 2 +- examples/tutorials/singlepoint.ipynb | 4 +- tests/calculations/test_geomopt.py | 4 +- tests/calculations/test_md.py | 6 +- tests/calculations/test_singlepoint.py | 8 +-- tests/calculations/test_train.py | 4 +- tests/data/test_model.py | 16 ++--- 13 files changed, 41 insertions(+), 85 deletions(-) diff --git a/aiida_mlip/data/model.py b/aiida_mlip/data/model.py index 37e69a64..2c1f3946 100644 --- a/aiida_mlip/data/model.py +++ b/aiida_mlip/data/model.py @@ -26,14 +26,14 @@ class ModelData(SinglefileData): ---------- architecture : str Architecture of the mlip model. - filepath : str - Path of the mlip model. + model_hash : str + Hash of the model. Methods ------- set_file(file, filename=None, architecture=None, **kwargs) Set the file for the node. - local_file(file, architecture, filename=None): + from_local(file, architecture, filename=None): Create a ModelData instance from a local file. download(url, architecture, filename=None, cache_dir=None, force_download=False) Download a file from a URL and save it as ModelData. @@ -69,49 +69,6 @@ def _calculate_hash(file: Union[str, Path]) -> str: file_hash = sha256.hexdigest() return file_hash - @classmethod - def _check_existing_file( - cls, file: Union[str, Path] - ) -> Path: # just don't do this, do the hash and then the querybuilder - """ - Check if a file already exists and return the path of the existing file. - - Parameters - ---------- - file : Union[str, Path] - Path to the downloaded model file. - - Returns - ------- - Path - The path of the model file of interest (same as input path if no duplicates - were found). - """ - file_hash = cls._calculate_hash(file) - - def is_diff_file(curr_path: Path) -> bool: - """ - Filter to check if two files are different. - - Parameters - ---------- - curr_path : Path - Path to the file to compare with. - - Returns - ------- - bool - True if the files are different, False otherwise. - """ - return curr_path.is_file() and not curr_path.samefile(file) - - file_folder = Path(file).parent - for existing_file in filter(is_diff_file, file_folder.rglob("*")): - if cls._calculate_hash(existing_file) == file_hash: - file.unlink() - return existing_file - return Path(file) - def __init__( self, file: Union[str, Path], @@ -138,7 +95,6 @@ def __init__( """ super().__init__(file, filename, **kwargs) self.base.attributes.set("architecture", architecture) - self.base.attributes.set("filepath", str(file)) # no need def set_file( self, @@ -170,8 +126,8 @@ def set_file( model_hash = self._calculate_hash(file) self.base.attributes.set("model_hash", model_hash) - @classmethod # if I change I won't need this - def local_file( + @classmethod + def from_local( cls, file: Union[str, Path], architecture: str, @@ -199,7 +155,7 @@ def local_file( @classmethod # pylint: disable=too-many-arguments - def download( # change names with from (from_local and from_url) + def from_url( cls, url: str, architecture: str, @@ -243,7 +199,7 @@ def download( # change names with from (from_local and from_url) # Download file request.urlretrieve(url, file) - model = cls.local_file(file=file, architecture=architecture) + model = cls.from_local(file=file, architecture=architecture) if keep_file: return model diff --git a/aiida_mlip/helpers/help_load.py b/aiida_mlip/helpers/help_load.py index 937c906f..41979e13 100644 --- a/aiida_mlip/helpers/help_load.py +++ b/aiida_mlip/helpers/help_load.py @@ -42,15 +42,15 @@ def load_model( The loaded model. """ if model is None: - loaded_model = ModelData.download( + loaded_model = ModelData.from_url( "https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", # pylint: disable=line-too-long architecture, cache_dir=cache_dir, ) elif (file_path := Path(model)).is_file(): - loaded_model = ModelData.local_file(file_path, architecture=architecture) + loaded_model = ModelData.from_local(file_path, architecture=architecture) else: - loaded_model = ModelData.download( + loaded_model = ModelData.from_url( model, architecture=architecture, cache_dir=cache_dir ) return loaded_model diff --git a/aiida_mlip/parsers/train_parser.py b/aiida_mlip/parsers/train_parser.py index d12d2662..bc22baa2 100644 --- a/aiida_mlip/parsers/train_parser.py +++ b/aiida_mlip/parsers/train_parser.py @@ -164,8 +164,8 @@ def _save_models(self, model_output: Path, compiled_model_output: Path) -> None: Path to the compiled model output file. """ architecture = "mace_mp" - model = ModelData.local_file(model_output, architecture=architecture) - compiled_model = ModelData.local_file( + model = ModelData.from_local(model_output, architecture=architecture) + compiled_model = ModelData.from_local( compiled_model_output, architecture=architecture ) diff --git a/docs/source/user_guide/data.rst b/docs/source/user_guide/data.rst index 4f9437ac..d733efc9 100644 --- a/docs/source/user_guide/data.rst +++ b/docs/source/user_guide/data.rst @@ -23,13 +23,13 @@ Usage .. code-block:: python - model = ModelData.local_file('/path/to/file', filename='model', architecture='mace') + model = ModelData.from_local('/path/to/file', filename='model', architecture='mace') - To download a file and save it as a `ModelData` object: .. code-block:: python - model = ModelData.download('http://yoururl.test/model', architecture='mace', filename='model', cache_dir='/home/mlip/', force_download=False) + model = ModelData.from_url('http://yoururl.test/model', architecture='mace', filename='model', cache_dir='/home/mlip/', force_download=False) - The architecture of the model file can be accessed using the `architecture` property: diff --git a/docs/source/user_guide/tutorial.rst b/docs/source/user_guide/tutorial.rst index dcc2df6a..08ab8f63 100644 --- a/docs/source/user_guide/tutorial.rst +++ b/docs/source/user_guide/tutorial.rst @@ -35,13 +35,13 @@ In this example we use MACE with a model that we download from this URL: "https: from aiida_mlip.data.model import ModelData url = "https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model" - model = ModelData.download(url, architecture="mace", cache_dir="/.cache/") + model = ModelData.from_url(url, architecture="mace", cache_dir="/.cache/") If we already have the model saved in some folder we can save it as: .. code-block:: python - model = ModelData.local_file("/path/to/model", architecture="mace") + model = ModelData.from_local("/path/to/model", architecture="mace") Another parameter that we need to define as AiiDA type is the code. Assuming the code is saved as `janus` in the `localhost` computer, the code info that are needed can be loaded as follow: diff --git a/examples/tutorials/geometry-optimisation.ipynb b/examples/tutorials/geometry-optimisation.ipynb index 4032433f..ad713ee5 100644 --- a/examples/tutorials/geometry-optimisation.ipynb +++ b/examples/tutorials/geometry-optimisation.ipynb @@ -76,7 +76,7 @@ "source": [ "from aiida_mlip.data.model import ModelData\n", "url = \"https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model\"\n", - "model = ModelData.download(url, architecture=\"mace_mp\", cache_dir=\"mlips\")" + "model = ModelData.from_url(url, architecture=\"mace_mp\", cache_dir=\"mlips\")" ] }, { @@ -98,7 +98,7 @@ "outputs": [], "source": [ "# from aiida_mlip.data.model import ModelData\n", - "# model = ModelData.local_file(\"mlips/mace_mp/mace_mp_small.model\", architecture=\"mace_mp\")" + "# model = ModelData.from_local(\"mlips/mace_mp/mace_mp_small.model\", architecture=\"mace_mp\")" ] }, { @@ -368,7 +368,7 @@ " return traj.get_step_structure(index.value)\n", "\n", "url = \"https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model\"\n", - "model = ModelData.download(url, architecture=\"mace_mp\", cache_dir=\"mlips\")\n", + "model = ModelData.from_url(url, architecture=\"mace_mp\", cache_dir=\"mlips\")\n", "list_of_nodes = []\n", "\n", "\n", diff --git a/examples/tutorials/high-throughput-screening.ipynb b/examples/tutorials/high-throughput-screening.ipynb index d8a7e786..35b2cb8b 100644 --- a/examples/tutorials/high-throughput-screening.ipynb +++ b/examples/tutorials/high-throughput-screening.ipynb @@ -80,7 +80,7 @@ "outputs": [], "source": [ "Calculation = CalculationFactory(\"mlip.sp\")\n", - "model = ModelData.local_file(\"mlips/mace_mp/mace_mp_small.model\", architecture=\"mace_mp\")\n", + "model = ModelData.from_local(\"mlips/mace_mp/mace_mp_small.model\", architecture=\"mace_mp\")\n", "metadata = {\"options\": {\"resources\": {\"num_machines\": 1}}}\n", "code = load_code(\"janus@localhost\")" ] diff --git a/examples/tutorials/singlepoint.ipynb b/examples/tutorials/singlepoint.ipynb index aee8b8fb..4fa86f0b 100644 --- a/examples/tutorials/singlepoint.ipynb +++ b/examples/tutorials/singlepoint.ipynb @@ -66,7 +66,7 @@ "source": [ "from aiida_mlip.data.model import ModelData\n", "url = \"https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model\"\n", - "model = ModelData.download(url, architecture=\"mace_mp\", cache_dir=\"mlips\")" + "model = ModelData.from_url(url, architecture=\"mace_mp\", cache_dir=\"mlips\")" ] }, { @@ -86,7 +86,7 @@ }, "outputs": [], "source": [ - "model = ModelData.local_file(\"/path/to/model\", architecture=\"mace\")" + "model = ModelData.from_local(\"/path/to/model\", architecture=\"mace\")" ] }, { diff --git a/tests/calculations/test_geomopt.py b/tests/calculations/test_geomopt.py index 9cc0a952..e94092c4 100644 --- a/tests/calculations/test_geomopt.py +++ b/tests/calculations/test_geomopt.py @@ -24,7 +24,7 @@ def test_geomopt(fixture_sandbox, generate_calc_job, janus_code, model_folder): "arch": Str("mace"), "precision": Str("float64"), "struct": StructureData(ase=bulk("NaCl", "rocksalt", 5.63)), - "model": ModelData.local_file(model_file, architecture="mace"), + "model": ModelData.from_local(model_file, architecture="mace"), "device": Str("cpu"), } @@ -79,7 +79,7 @@ def test_run_opt(model_folder, janus_code): "arch": Str("mace"), "precision": Str("float64"), "struct": StructureData(ase=bulk("NaCl", "rocksalt", 5.63)), - "model": ModelData.local_file(model_file, architecture="mace"), + "model": ModelData.from_local(model_file, architecture="mace"), "device": Str("cpu"), "fully_opt": Bool(True), "fmax": Float(0.1), diff --git a/tests/calculations/test_md.py b/tests/calculations/test_md.py index 795a5204..79ca12b5 100644 --- a/tests/calculations/test_md.py +++ b/tests/calculations/test_md.py @@ -27,7 +27,7 @@ def test_MD(fixture_sandbox, generate_calc_job, janus_code, model_folder): "arch": Str("mace"), "precision": Str("float64"), "struct": StructureData(ase=bulk("NaCl", "rocksalt", 5.63)), - "model": ModelData.local_file(model_file, architecture="mace"), + "model": ModelData.from_local(model_file, architecture="mace"), "device": Str("cpu"), "ensemble": Str("nve"), "md_kwargs": Dict( @@ -109,7 +109,7 @@ def test_MD_with_config( model_file = model_folder / "mace_mp_small.model" inputs = { "code": janus_code, - "model": ModelData.local_file(file=model_file, architecture="mace"), + "model": ModelData.from_local(file=model_file, architecture="mace"), "metadata": {"options": {"resources": {"num_machines": 1}}}, "config": JanusConfigfile(config_folder / "config_janus_md.yaml"), } @@ -176,7 +176,7 @@ def test_run_md(model_folder, structure_folder, janus_code): "arch": Str("mace"), "precision": Str("float64"), "struct": StructureData(ase=read(structure_file)), - "model": ModelData.local_file(model_file, architecture="mace"), + "model": ModelData.from_local(model_file, architecture="mace"), "device": Str("cpu"), "ensemble": Str("nve"), "md_kwargs": Dict( diff --git a/tests/calculations/test_singlepoint.py b/tests/calculations/test_singlepoint.py index 5627a3a7..de465ccd 100644 --- a/tests/calculations/test_singlepoint.py +++ b/tests/calculations/test_singlepoint.py @@ -25,7 +25,7 @@ def test_singlepoint(fixture_sandbox, generate_calc_job, janus_code, model_folde "arch": Str("mace"), "precision": Str("float64"), "struct": StructureData(ase=bulk("NaCl", "rocksalt", 5.63)), - "model": ModelData.local_file(model_file, architecture="mace"), + "model": ModelData.from_local(model_file, architecture="mace"), "device": Str("cpu"), } @@ -74,7 +74,7 @@ def test_sp_nostruct(fixture_sandbox, generate_calc_job, model_folder, janus_cod "code": janus_code, "arch": Str("mace"), "precision": Str("float64"), - "model": ModelData.local_file(model_file, architecture="mace"), + "model": ModelData.from_local(model_file, architecture="mace"), "device": Str("cpu"), } with pytest.raises(InputValidationError): @@ -119,7 +119,7 @@ def test_two_arch(fixture_sandbox, generate_calc_job, model_folder, janus_code): inputs = { "code": janus_code, "metadata": {"options": {"resources": {"num_machines": 1}}}, - "model": ModelData.local_file(model_file, architecture="mace_mp"), + "model": ModelData.from_local(model_file, architecture="mace_mp"), "arch": Str("chgnet"), "struct": StructureData(ase=bulk("NaCl", "rocksalt", 5.63)), } @@ -137,7 +137,7 @@ def test_run_sp(model_folder, janus_code): "arch": Str("mace"), "precision": Str("float64"), "struct": StructureData(ase=bulk("NaCl", "rocksalt", 5.63)), - "model": ModelData.local_file(model_file, architecture="mace"), + "model": ModelData.from_local(model_file, architecture="mace"), "device": Str("cpu"), } diff --git a/tests/calculations/test_train.py b/tests/calculations/test_train.py index 7c543858..ade8c681 100644 --- a/tests/calculations/test_train.py +++ b/tests/calculations/test_train.py @@ -121,7 +121,7 @@ def test_prepare_tune(fixture_sandbox, generate_calc_job, janus_code, config_fol "code": janus_code, "mlip_config": config, "fine_tune": Bool(True), - "foundation_model": ModelData.local_file( + "foundation_model": ModelData.from_local( file=model_file, architecture="mace_mp" ), } @@ -179,7 +179,7 @@ def test_run_train(janus_code, config_folder): "fine_tune": Bool(True), "code": janus_code, "mlip_config": config, - "foundation_model": ModelData.local_file( + "foundation_model": ModelData.from_local( file=model_file, architecture="mace_mp" ), } diff --git a/tests/data/test_model.py b/tests/data/test_model.py index f5409a0b..79b35a33 100644 --- a/tests/data/test_model.py +++ b/tests/data/test_model.py @@ -1,4 +1,4 @@ -"""Test for ModelData class""" +"""Test for ModelData class.""" from pathlib import Path @@ -10,7 +10,7 @@ def test_local_file(): # Construct a ModelData instance with the local file model_path = Path(__file__).parent / "input_files" / "model_local_file.txt" absolute_path = model_path.resolve() - model = ModelData.local_file(file=absolute_path, architecture="mace") + model = ModelData.from_local(file=absolute_path, architecture="mace") # Assert the ModelData contains the content we expect content = model.get_content() assert content == model_path.read_text(encoding="utf-8") @@ -21,7 +21,7 @@ def test_relativepath(): # Construct a ModelData instance with the local file model_path = Path(__file__).parent / "input_files" / "model_local_file.txt" relative_path = model_path.relative_to(Path.cwd()) - model = ModelData.local_file(file=relative_path, architecture="mace") + model = ModelData.from_local(file=relative_path, architecture="mace") # Assert the ModelData contains the content we expect content = model.get_content() assert content == relative_path.read_text(encoding="utf-8") @@ -30,7 +30,7 @@ def test_relativepath(): def test_architecture(): """Testing that the architecture is read and added to attributes""" file = Path(__file__).parent / "input_files/model_local_file.txt" - model = ModelData.local_file( + model = ModelData.from_local( file=file, filename="model", architecture="mace", @@ -46,7 +46,7 @@ def test_download_fresh_file_keep(tmp_path): # Construct a ModelData instance downloading a non-cached file # pylint:disable=line-too-long - model = ModelData.download( + model = ModelData.from_url( url="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", filename="mace.model", cache_dir=tmp_path, @@ -68,7 +68,7 @@ def test_download_fresh_file(tmp_path): # Construct a ModelData instance downloading a non-cached file # pylint:disable=line-too-long - model = ModelData.download( + model = ModelData.from_url( url="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", filename="mace.model", cache_dir=tmp_path, @@ -85,7 +85,7 @@ def test_no_download_cached_file(tmp_path): """Test if the caching work for avoiding double download""" # pylint:disable=line-too-long - existing_model = ModelData.download( + existing_model = ModelData.from_url( url="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", filename="mace_existing.model", cache_dir=tmp_path, @@ -93,7 +93,7 @@ def test_no_download_cached_file(tmp_path): ) # Construct a ModelData instance that should use the cached file # pylint:disable=line-too-long - model = ModelData.download( + model = ModelData.from_url( url="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", cache_dir=tmp_path, filename="test_model.model", From cc251b650852468a247ba5534845f85b87207686 Mon Sep 17 00:00:00 2001 From: federica Date: Wed, 10 Jul 2024 15:38:04 +0100 Subject: [PATCH 06/20] docs --- docs/source/user_guide/data.rst | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/docs/source/user_guide/data.rst b/docs/source/user_guide/data.rst index d733efc9..1cc63567 100644 --- a/docs/source/user_guide/data.rst +++ b/docs/source/user_guide/data.rst @@ -39,20 +39,6 @@ Usage -- The filepath of the model file can be accessed using the `filepath` property: - -.. code-block:: python - - file_path = model.filepath - -.. warning:: - - When using shared data, the ``filepath`` could point to a inaccessible location on another computer. - So if you are using data from someone else, for both the model data and the config file, consider using the ``get_content()`` method to create a new file with identical content. - Then, use the filepath of the newly created file for running calculation. - A more robust solution to this problem is going to be implemented. - - JanusConfigfile --------------- From 3ae4b9e4ee3e477bf7d111e42d5de5bae1f533b1 Mon Sep 17 00:00:00 2001 From: Federica Zanca <93498393+federicazanca@users.noreply.github.com> Date: Thu, 11 Jul 2024 08:55:55 +0100 Subject: [PATCH 07/20] Apply suggestions from code review Co-authored-by: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> --- aiida_mlip/data/model.py | 10 +++++----- tests/data/test_model.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/aiida_mlip/data/model.py b/aiida_mlip/data/model.py index 2c1f3946..d2497248 100644 --- a/aiida_mlip/data/model.py +++ b/aiida_mlip/data/model.py @@ -35,7 +35,7 @@ class ModelData(SinglefileData): Set the file for the node. from_local(file, architecture, filename=None): Create a ModelData instance from a local file. - download(url, architecture, filename=None, cache_dir=None, force_download=False) + from_url(url, architecture, filename=None, cache_dir=None, keep_file=False) Download a file from a URL and save it as ModelData. Other Parameters @@ -178,8 +178,8 @@ def from_url( Path to the folder where the file has to be saved (defaults to "~/.cache/mlips/"). keep_file : Optional[bool], optional - True to keep the downloaded model, even if there are duplicates) - (default: False, the file is cancelled and only saved in database). + True to keep the downloaded model, even if there are duplicates. + (default: False, the file is deleted and only saved in the database). Returns ------- @@ -238,11 +238,11 @@ def architecture(self) -> str: @property def model_hash(self) -> str: """ - Return the architecture. + Return hash of the architecture. Returns ------- str - Architecture of the mlip model. + Hash of the MLIP model. """ return self.base.attributes.get("model_hash") diff --git a/tests/data/test_model.py b/tests/data/test_model.py index 79b35a33..7d59d6f9 100644 --- a/tests/data/test_model.py +++ b/tests/data/test_model.py @@ -82,7 +82,7 @@ def test_download_fresh_file(tmp_path): def test_no_download_cached_file(tmp_path): - """Test if the caching work for avoiding double download""" + """Test if the caching prevents a duplicate download.""" # pylint:disable=line-too-long existing_model = ModelData.from_url( From 40c9252b86e937647eb989bc376b50fbe599db43 Mon Sep 17 00:00:00 2001 From: federica Date: Thu, 11 Jul 2024 10:48:35 +0100 Subject: [PATCH 08/20] review fix --- aiida_mlip/calculations/base.py | 3 +-- docs/source/user_guide/data.rst | 1 + tests/data/test_model.py | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/aiida_mlip/calculations/base.py b/aiida_mlip/calculations/base.py index 3b52f379..090fc75b 100644 --- a/aiida_mlip/calculations/base.py +++ b/aiida_mlip/calculations/base.py @@ -211,7 +211,7 @@ def prepare_for_submission( # Transform the structure data in xyz file called input_filename input_filename = self.inputs.metadata.options.input_filename atoms = structure.get_ase() - with folder.open(input_filename, mode="w", encoding=None) as file: + with folder.open(input_filename, mode="w", encoding="utf8") as file: write(file.name, images=atoms) log_filename = (self.inputs.log_filename).value @@ -319,5 +319,4 @@ def _add_model_to_cmdline( shutil.copyfileobj(source, target) model_path = "modelcopy.model" - if model_path: cmd_line.setdefault("calc-kwargs", {})["model"] = model_path diff --git a/docs/source/user_guide/data.rst b/docs/source/user_guide/data.rst index 1cc63567..396afdf4 100644 --- a/docs/source/user_guide/data.rst +++ b/docs/source/user_guide/data.rst @@ -37,6 +37,7 @@ Usage model_arch = model.architecture +As for a `SinglefileData`, the content of the model file can be accessed using the function `get_content()` JanusConfigfile diff --git a/tests/data/test_model.py b/tests/data/test_model.py index 7d59d6f9..5468b989 100644 --- a/tests/data/test_model.py +++ b/tests/data/test_model.py @@ -6,7 +6,7 @@ def test_local_file(): - """Testing that the local file function works""" + """Testing that the from_local function works""" # Construct a ModelData instance with the local file model_path = Path(__file__).parent / "input_files" / "model_local_file.txt" absolute_path = model_path.resolve() @@ -17,7 +17,7 @@ def test_local_file(): def test_relativepath(): - """Testing that the local file function works""" + """Testing that the from_local function works with a relative path""" # Construct a ModelData instance with the local file model_path = Path(__file__).parent / "input_files" / "model_local_file.txt" relative_path = model_path.relative_to(Path.cwd()) @@ -39,7 +39,7 @@ def test_architecture(): def test_download_fresh_file_keep(tmp_path): - """Test if download works""" + """Test if download works and the downloaded file is kept in them chosen folder""" # Ensure we do not have the file cached already path_test = tmp_path / "mace" / "mace.model" path_test.unlink(missing_ok=True) @@ -61,7 +61,7 @@ def test_download_fresh_file_keep(tmp_path): def test_download_fresh_file(tmp_path): - """Test if download works""" + """Test if download works and the file is only saved in the database not locally""" # Ensure we do not have the file cached already path_test = tmp_path / "mace" / "mace.model" path_test.unlink(missing_ok=True) @@ -82,7 +82,7 @@ def test_download_fresh_file(tmp_path): def test_no_download_cached_file(tmp_path): - """Test if the caching prevents a duplicate download.""" + """Test if the caching prevents saving duplicate model in the database.""" # pylint:disable=line-too-long existing_model = ModelData.from_url( From 45c50ae3ce855fcd7cfd43387bfdb4c4c900e905 Mon Sep 17 00:00:00 2001 From: federica Date: Thu, 11 Jul 2024 10:52:44 +0100 Subject: [PATCH 09/20] add comment --- aiida_mlip/data/model.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/aiida_mlip/data/model.py b/aiida_mlip/data/model.py index d2497248..87f6ee47 100644 --- a/aiida_mlip/data/model.py +++ b/aiida_mlip/data/model.py @@ -209,7 +209,10 @@ def from_url( qb = QueryBuilder() qb.append(ModelData, project=["attributes", "pk", "ctime"]) + # Looking for ModelData in the whole database for i in qb.iterdict(): + # If the hash is the same as the new model, but not the creation time + # it means that there is already a model that is the same, use that if i["ModelData_1"]["attributes"]["model_hash"] == model.model_hash: if i["ModelData_1"]["ctime"] != model.ctime: delete_nodes( From 147d2f99657c014d623430b6a766af66ab27245c Mon Sep 17 00:00:00 2001 From: Federica Zanca <93498393+federicazanca@users.noreply.github.com> Date: Fri, 12 Jul 2024 08:54:44 +0100 Subject: [PATCH 10/20] Apply suggestions from code review Co-authored-by: Alin Marin Elena Co-authored-by: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> --- aiida_mlip/calculations/base.py | 4 ++-- aiida_mlip/calculations/train.py | 4 ++-- tests/data/test_model.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/aiida_mlip/calculations/base.py b/aiida_mlip/calculations/base.py index 090fc75b..7f033344 100644 --- a/aiida_mlip/calculations/base.py +++ b/aiida_mlip/calculations/base.py @@ -315,8 +315,8 @@ def _add_model_to_cmdline( raise ValueError("Model cannot be None") with self.inputs.model.open(mode="rb") as source: - with folder.open("modelcopy.model", mode="wb") as target: + with folder.open("mlff.model", mode="wb") as target: shutil.copyfileobj(source, target) - model_path = "modelcopy.model" + model_path = "mlff.model" cmd_line.setdefault("calc-kwargs", {})["model"] = model_path diff --git a/aiida_mlip/calculations/train.py b/aiida_mlip/calculations/train.py index 1dd5e79b..5ea5cd64 100644 --- a/aiida_mlip/calculations/train.py +++ b/aiida_mlip/calculations/train.py @@ -177,9 +177,9 @@ def prepare_for_submission( # Add foundation_model to the config file if fine-tuning is enabled if self.inputs.fine_tune and "foundation_model" in self.inputs: with self.inputs.foundation_model.open(mode="rb") as source: - with folder.open("modelcopy.model", mode="wb") as target: + with folder.open("mlff.model", mode="wb") as target: shutil.copyfileobj(source, target) - config_parse += "foundation_model: modelcopy.model" + config_parse += "foundation_model: mlff.model" # Copy config file content inside the folder where the calculation is run config_copy = "mlip_train.yml" diff --git a/tests/data/test_model.py b/tests/data/test_model.py index 5468b989..3315f4f4 100644 --- a/tests/data/test_model.py +++ b/tests/data/test_model.py @@ -39,7 +39,7 @@ def test_architecture(): def test_download_fresh_file_keep(tmp_path): - """Test if download works and the downloaded file is kept in them chosen folder""" + """Test if download works and the downloaded file is kept in the chosen folder.""" # Ensure we do not have the file cached already path_test = tmp_path / "mace" / "mace.model" path_test.unlink(missing_ok=True) From 90a6afec781cc134d3ad5fac4aae2925bd009154 Mon Sep 17 00:00:00 2001 From: federica Date: Fri, 12 Jul 2024 09:50:06 +0100 Subject: [PATCH 11/20] name changes --- aiida_mlip/data/model.py | 4 ++-- aiida_mlip/helpers/help_load.py | 4 ++-- docs/source/user_guide/data.rst | 2 +- docs/source/user_guide/tutorial.rst | 2 +- examples/tutorials/geometry-optimisation.ipynb | 4 ++-- examples/tutorials/singlepoint.ipynb | 2 +- tests/calculations/test_geomopt.py | 4 ++-- tests/calculations/test_md.py | 8 ++++---- tests/calculations/test_singlepoint.py | 4 ++-- tests/calculations/test_train.py | 2 +- tests/data/test_model.py | 8 ++++---- 11 files changed, 22 insertions(+), 22 deletions(-) diff --git a/aiida_mlip/data/model.py b/aiida_mlip/data/model.py index 87f6ee47..1021984c 100644 --- a/aiida_mlip/data/model.py +++ b/aiida_mlip/data/model.py @@ -35,7 +35,7 @@ class ModelData(SinglefileData): Set the file for the node. from_local(file, architecture, filename=None): Create a ModelData instance from a local file. - from_url(url, architecture, filename=None, cache_dir=None, keep_file=False) + from_uri(url, architecture, filename=None, cache_dir=None, keep_file=False) Download a file from a URL and save it as ModelData. Other Parameters @@ -155,7 +155,7 @@ def from_local( @classmethod # pylint: disable=too-many-arguments - def from_url( + def from_uri( cls, url: str, architecture: str, diff --git a/aiida_mlip/helpers/help_load.py b/aiida_mlip/helpers/help_load.py index 41979e13..f4504e2c 100644 --- a/aiida_mlip/helpers/help_load.py +++ b/aiida_mlip/helpers/help_load.py @@ -42,7 +42,7 @@ def load_model( The loaded model. """ if model is None: - loaded_model = ModelData.from_url( + loaded_model = ModelData.from_uri( "https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", # pylint: disable=line-too-long architecture, cache_dir=cache_dir, @@ -50,7 +50,7 @@ def load_model( elif (file_path := Path(model)).is_file(): loaded_model = ModelData.from_local(file_path, architecture=architecture) else: - loaded_model = ModelData.from_url( + loaded_model = ModelData.from_uri( model, architecture=architecture, cache_dir=cache_dir ) return loaded_model diff --git a/docs/source/user_guide/data.rst b/docs/source/user_guide/data.rst index 396afdf4..28e7948a 100644 --- a/docs/source/user_guide/data.rst +++ b/docs/source/user_guide/data.rst @@ -29,7 +29,7 @@ Usage .. code-block:: python - model = ModelData.from_url('http://yoururl.test/model', architecture='mace', filename='model', cache_dir='/home/mlip/', force_download=False) + model = ModelData.from_uri('http://yoururl.test/model', architecture='mace', filename='model', cache_dir='/home/mlip/', force_download=False) - The architecture of the model file can be accessed using the `architecture` property: diff --git a/docs/source/user_guide/tutorial.rst b/docs/source/user_guide/tutorial.rst index 08ab8f63..cd8f1869 100644 --- a/docs/source/user_guide/tutorial.rst +++ b/docs/source/user_guide/tutorial.rst @@ -35,7 +35,7 @@ In this example we use MACE with a model that we download from this URL: "https: from aiida_mlip.data.model import ModelData url = "https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model" - model = ModelData.from_url(url, architecture="mace", cache_dir="/.cache/") + model = ModelData.from_uri(url, architecture="mace", cache_dir="/.cache/") If we already have the model saved in some folder we can save it as: diff --git a/examples/tutorials/geometry-optimisation.ipynb b/examples/tutorials/geometry-optimisation.ipynb index ad713ee5..f0b4d9ca 100644 --- a/examples/tutorials/geometry-optimisation.ipynb +++ b/examples/tutorials/geometry-optimisation.ipynb @@ -76,7 +76,7 @@ "source": [ "from aiida_mlip.data.model import ModelData\n", "url = \"https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model\"\n", - "model = ModelData.from_url(url, architecture=\"mace_mp\", cache_dir=\"mlips\")" + "model = ModelData.from_uri(url, architecture=\"mace_mp\", cache_dir=\"mlips\")" ] }, { @@ -368,7 +368,7 @@ " return traj.get_step_structure(index.value)\n", "\n", "url = \"https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model\"\n", - "model = ModelData.from_url(url, architecture=\"mace_mp\", cache_dir=\"mlips\")\n", + "model = ModelData.from_uri(url, architecture=\"mace_mp\", cache_dir=\"mlips\")\n", "list_of_nodes = []\n", "\n", "\n", diff --git a/examples/tutorials/singlepoint.ipynb b/examples/tutorials/singlepoint.ipynb index 4fa86f0b..581d4288 100644 --- a/examples/tutorials/singlepoint.ipynb +++ b/examples/tutorials/singlepoint.ipynb @@ -66,7 +66,7 @@ "source": [ "from aiida_mlip.data.model import ModelData\n", "url = \"https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model\"\n", - "model = ModelData.from_url(url, architecture=\"mace_mp\", cache_dir=\"mlips\")" + "model = ModelData.from_uri(url, architecture=\"mace_mp\", cache_dir=\"mlips\")" ] }, { diff --git a/tests/calculations/test_geomopt.py b/tests/calculations/test_geomopt.py index e94092c4..0cb76c2a 100644 --- a/tests/calculations/test_geomopt.py +++ b/tests/calculations/test_geomopt.py @@ -43,7 +43,7 @@ def test_geomopt(fixture_sandbox, generate_calc_job, janus_code, model_folder): "--out", "aiida-results.xyz", "--calc-kwargs", - "{'default_dtype': 'float64', 'model': 'modelcopy.model'}", + "{'default_dtype': 'float64', 'model': 'mlff.model'}", "--traj", "aiida-traj.xyz", ] @@ -58,7 +58,7 @@ def test_geomopt(fixture_sandbox, generate_calc_job, janus_code, model_folder): # Check the attributes of the returned `CalcInfo` assert sorted(fixture_sandbox.get_content_list()) == sorted( - ["aiida.xyz", "modelcopy.model"] + ["aiida.xyz", "mlff.model"] ) assert isinstance(calc_info, datastructures.CalcInfo) assert isinstance(calc_info.codes_info[0], datastructures.CodeInfo) diff --git a/tests/calculations/test_md.py b/tests/calculations/test_md.py index 79ca12b5..2b907c3d 100644 --- a/tests/calculations/test_md.py +++ b/tests/calculations/test_md.py @@ -56,7 +56,7 @@ def test_MD(fixture_sandbox, generate_calc_job, janus_code, model_folder): "--summary", "md_summary.yml", "--calc-kwargs", - "{'default_dtype': 'float64', 'model': 'modelcopy.model'}", + "{'default_dtype': 'float64', 'model': 'mlff.model'}", "--ensemble", "nve", "--temp", @@ -86,7 +86,7 @@ def test_MD(fixture_sandbox, generate_calc_job, janus_code, model_folder): # Check the attributes of the returned `CalcInfo` assert sorted(fixture_sandbox.get_content_list()) == sorted( - ["aiida.xyz", "modelcopy.model"] + ["aiida.xyz", "mlff.model"] ) assert isinstance(calc_info, datastructures.CalcInfo) assert isinstance(calc_info.codes_info[0], datastructures.CodeInfo) @@ -125,7 +125,7 @@ def test_MD_with_config( "--arch", "mace", "--calc-kwargs", - "{'model': 'modelcopy.model'}", + "{'model': 'mlff.model'}", "--config", "config.yaml", "--ensemble", @@ -152,7 +152,7 @@ def test_MD_with_config( [ "aiida.xyz", "config.yaml", - "modelcopy.model", + "mlff.model", ] ) assert isinstance(calc_info, datastructures.CalcInfo) diff --git a/tests/calculations/test_singlepoint.py b/tests/calculations/test_singlepoint.py index de465ccd..31959603 100644 --- a/tests/calculations/test_singlepoint.py +++ b/tests/calculations/test_singlepoint.py @@ -44,7 +44,7 @@ def test_singlepoint(fixture_sandbox, generate_calc_job, janus_code, model_folde "--out", "aiida-results.xyz", "--calc-kwargs", - "{'default_dtype': 'float64', 'model': 'modelcopy.model'}", + "{'default_dtype': 'float64', 'model': 'mlff.model'}", ] retrieve_list = [ @@ -56,7 +56,7 @@ def test_singlepoint(fixture_sandbox, generate_calc_job, janus_code, model_folde # Check the attributes of the returned `CalcInfo` assert sorted(fixture_sandbox.get_content_list()) == sorted( - ["aiida.xyz", "modelcopy.model"] + ["aiida.xyz", "mlff.model"] ) assert isinstance(calc_info, datastructures.CalcInfo) assert isinstance(calc_info.codes_info[0], datastructures.CodeInfo) diff --git a/tests/calculations/test_train.py b/tests/calculations/test_train.py index ade8c681..cdb27d7d 100644 --- a/tests/calculations/test_train.py +++ b/tests/calculations/test_train.py @@ -142,7 +142,7 @@ def test_prepare_tune(fixture_sandbox, generate_calc_job, janus_code, config_fol # Check the attributes of the returned `CalcInfo` assert sorted(fixture_sandbox.get_content_list()) == sorted( - ["mlip_train.yml", "modelcopy.model"] + ["mlip_train.yml", "mlff.model"] ) assert isinstance(calc_info, datastructures.CalcInfo) assert isinstance(calc_info.codes_info[0], datastructures.CodeInfo) diff --git a/tests/data/test_model.py b/tests/data/test_model.py index 3315f4f4..2727a62b 100644 --- a/tests/data/test_model.py +++ b/tests/data/test_model.py @@ -46,7 +46,7 @@ def test_download_fresh_file_keep(tmp_path): # Construct a ModelData instance downloading a non-cached file # pylint:disable=line-too-long - model = ModelData.from_url( + model = ModelData.from_uri( url="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", filename="mace.model", cache_dir=tmp_path, @@ -68,7 +68,7 @@ def test_download_fresh_file(tmp_path): # Construct a ModelData instance downloading a non-cached file # pylint:disable=line-too-long - model = ModelData.from_url( + model = ModelData.from_uri( url="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", filename="mace.model", cache_dir=tmp_path, @@ -85,7 +85,7 @@ def test_no_download_cached_file(tmp_path): """Test if the caching prevents saving duplicate model in the database.""" # pylint:disable=line-too-long - existing_model = ModelData.from_url( + existing_model = ModelData.from_uri( url="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", filename="mace_existing.model", cache_dir=tmp_path, @@ -93,7 +93,7 @@ def test_no_download_cached_file(tmp_path): ) # Construct a ModelData instance that should use the cached file # pylint:disable=line-too-long - model = ModelData.from_url( + model = ModelData.from_uri( url="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", cache_dir=tmp_path, filename="test_model.model", From 4eed4410b4a71fe0f9a62ec5712747e697dde047 Mon Sep 17 00:00:00 2001 From: Federica Zanca <93498393+federicazanca@users.noreply.github.com> Date: Wed, 17 Jul 2024 10:00:09 +0100 Subject: [PATCH 12/20] Update aiida_mlip/calculations/base.py Co-authored-by: Jacob Wilkins <46597752+oerc0122@users.noreply.github.com> --- aiida_mlip/calculations/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiida_mlip/calculations/base.py b/aiida_mlip/calculations/base.py index 7f033344..310e45c1 100644 --- a/aiida_mlip/calculations/base.py +++ b/aiida_mlip/calculations/base.py @@ -300,7 +300,7 @@ def _add_model_to_cmdline( cmd_line : dict Dictionary containing the cmd line keys. - folder : aiida.common.folders.Folder + folder : ~aiida.common.folders.Folder An `aiida.common.folders.Folder` to temporarily write files on disk. Returns From e282c18f7db344f53b618c96b1baf7ad23d852dd Mon Sep 17 00:00:00 2001 From: federica Date: Thu, 18 Jul 2024 11:41:29 +0100 Subject: [PATCH 13/20] fixes and all works --- aiida_mlip/calculations/base.py | 7 +++++-- aiida_mlip/data/model.py | 21 ++++++++++++--------- examples/calculations/submit_geomopt.py | 4 ++-- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/aiida_mlip/calculations/base.py b/aiida_mlip/calculations/base.py index 310e45c1..c6261b01 100644 --- a/aiida_mlip/calculations/base.py +++ b/aiida_mlip/calculations/base.py @@ -65,10 +65,13 @@ def validate_inputs( if ( "arch" in inputs and "model" in inputs - and inputs["arch"].value is not inputs["model"].architecture + and inputs["arch"].value != inputs["model"].architecture ): + inputvalue = inputs["arch"].value + modelvalue = inputs["model"].architecture raise InputValidationError( - "'arch' in ModelData and in 'arch' input must be the same" + "'arch' in ModelData and in inputs must be the same, " + f"but they are {modelvalue} and {inputvalue}" ) diff --git a/aiida_mlip/data/model.py b/aiida_mlip/data/model.py index 1021984c..f503c2d6 100644 --- a/aiida_mlip/data/model.py +++ b/aiida_mlip/data/model.py @@ -6,7 +6,6 @@ from urllib import request from aiida.orm import QueryBuilder, SinglefileData, load_node -from aiida.tools import delete_nodes class ModelData(SinglefileData): @@ -213,15 +212,19 @@ def from_uri( for i in qb.iterdict(): # If the hash is the same as the new model, but not the creation time # it means that there is already a model that is the same, use that - if i["ModelData_1"]["attributes"]["model_hash"] == model.model_hash: + if ( + "model_hash" in i["ModelData_1"]["attributes"] + and i["ModelData_1"]["attributes"]["model_hash"] == model.model_hash + and i["ModelData_1"]["attributes"]["architecture"] == model.architecture + ): if i["ModelData_1"]["ctime"] != model.ctime: - delete_nodes( - [model.uuid], - dry_run=False, - create_forward=True, - call_calc_forward=True, - call_work_forward=True, - ) + # delete_nodes( + # [model.pk], + # dry_run=False, + # create_forward=True, + # call_calc_forward=True, + # call_work_forward=True, + # ) model = load_node(i["ModelData_1"]["pk"]) break return model diff --git a/examples/calculations/submit_geomopt.py b/examples/calculations/submit_geomopt.py index d7396068..a06bc8b1 100644 --- a/examples/calculations/submit_geomopt.py +++ b/examples/calculations/submit_geomopt.py @@ -4,7 +4,7 @@ from aiida.common import NotExistent from aiida.engine import run_get_node -from aiida.orm import Bool, Dict, Float, Int, Str, load_code +from aiida.orm import Bool, Float, Int, Str, load_code from aiida.plugins import CalculationFactory from aiida_mlip.helpers.help_load import load_model, load_structure @@ -44,7 +44,7 @@ def geomopt(params: dict) -> None: "fmax": Float(params["fmax"]), "vectors_only": Bool(params["vectors_only"]), "fully_opt": Bool(params["fully_opt"]), - "opt_kwargs": Dict({"restart": "rest.pkl"}), + # "opt_kwargs": Dict({"restart": "rest.pkl"}), "steps": Int(params["steps"]), } From de9800d5caa63f8d3e06a42f13955713ec128cd4 Mon Sep 17 00:00:00 2001 From: federica Date: Thu, 18 Jul 2024 11:56:41 +0100 Subject: [PATCH 14/20] change url to uri --- aiida_mlip/data/model.py | 14 +++++++------- aiida_mlip/helpers/help_load.py | 6 +++--- docs/source/user_guide/data.rst | 4 ++-- docs/source/user_guide/tutorial.rst | 6 +++--- examples/calculations/submit_geomopt.py | 2 +- examples/calculations/submit_md.py | 2 +- examples/calculations/submit_singlepoint.py | 2 +- examples/tutorials/geometry-optimisation.ipynb | 8 ++++---- examples/tutorials/singlepoint.ipynb | 4 ++-- tests/data/test_model.py | 8 ++++---- tests/helpers/test_load.py | 6 +++--- 11 files changed, 31 insertions(+), 31 deletions(-) diff --git a/aiida_mlip/data/model.py b/aiida_mlip/data/model.py index f503c2d6..4c08bb49 100644 --- a/aiida_mlip/data/model.py +++ b/aiida_mlip/data/model.py @@ -34,8 +34,8 @@ class ModelData(SinglefileData): Set the file for the node. from_local(file, architecture, filename=None): Create a ModelData instance from a local file. - from_uri(url, architecture, filename=None, cache_dir=None, keep_file=False) - Download a file from a URL and save it as ModelData. + from_uri(uri, architecture, filename=None, cache_dir=None, keep_file=False) + Download a file from a uri and save it as ModelData. Other Parameters ---------------- @@ -156,19 +156,19 @@ def from_local( # pylint: disable=too-many-arguments def from_uri( cls, - url: str, + uri: str, architecture: str, filename: Optional[str] = "tmp_file.model", cache_dir: Optional[Union[str, Path]] = None, keep_file: Optional[bool] = False, ): """ - Download a file from a URL and save it as ModelData. + Download a file from a uri and save it as ModelData. Parameters ---------- - url : str - URL of the file to download. + uri : str + uri of the file to download. architecture : [str] Architecture of the mlip model. filename : Optional[str], optional @@ -196,7 +196,7 @@ def from_uri( file = arch_path / filename # Download file - request.urlretrieve(url, file) + request.urlretrieve(uri, file) model = cls.from_local(file=file, architecture=architecture) diff --git a/aiida_mlip/helpers/help_load.py b/aiida_mlip/helpers/help_load.py index f4504e2c..46f71435 100644 --- a/aiida_mlip/helpers/help_load.py +++ b/aiida_mlip/helpers/help_load.py @@ -20,17 +20,17 @@ def load_model( cache_dir: Optional[Union[str, Path]] = None, ) -> ModelData: """ - Load a model from a file path or URL. + Load a model from a file path or uri. If the string represents a file path, the model will be loaded from that path. - If it's a URL, the model will be downloaded from the specified location. + If it's a uri, the model will be downloaded from the specified location. If the input model is None it returns a default model corresponding to the default used in the Calcjobs. Parameters ---------- model : Optional[Union[str, Path]] - Model file path or a URL for downloading the model or None to use the default. + Model file path or a uri for downloading the model or None to use the default. architecture : str The architecture of the model. cache_dir : Optional[Union[str, Path]] diff --git a/docs/source/user_guide/data.rst b/docs/source/user_guide/data.rst index 28e7948a..fd71a3ea 100644 --- a/docs/source/user_guide/data.rst +++ b/docs/source/user_guide/data.rst @@ -4,7 +4,7 @@ Data types ModelData --------- -Defines a custom data type called `ModelData` in AiiDA, which is a subclass of the `SinglefileData` type. `ModelData` is used to handle model files and provides functionalities for handling local files and downloading files from URLs. +Defines a custom data type called `ModelData` in AiiDA, which is a subclass of the `SinglefileData` type. `ModelData` is used to handle model files and provides functionalities for handling local files and downloading files from uris. Additional features compared to `SinglefileData`: - It can take a relative path as an argument @@ -12,7 +12,7 @@ Additional features compared to `SinglefileData`: - It takes the argument "architecture" which is specifically related to the mlip model and it is added to the node attributes. - Download functionality: - - When provided with a URL, `ModelData` automatically downloads the file. + - When provided with a uri, `ModelData` automatically downloads the file. - Saves the downloaded file in a specified folder (default: `./cache/mlips`), creating a subfolder if the architecture, and stores it as an AiiDA data type. - Handles duplicate files: if the file is downloaded twice, duplicates within the same folder are canceled, unless `force_download=True` is stated. diff --git a/docs/source/user_guide/tutorial.rst b/docs/source/user_guide/tutorial.rst index cd8f1869..0d171833 100644 --- a/docs/source/user_guide/tutorial.rst +++ b/docs/source/user_guide/tutorial.rst @@ -29,13 +29,13 @@ The input structure in aiida-mlip needs to be saved as a StructureData type: structure = StructureData(ase=read("/path/to/structure.cif")) Then we need to choose a model and architecture to be used for the calculation and save it as ModelData type, a specific data type of this plugin. -In this example we use MACE with a model that we download from this URL: "https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", and we save the file in the cache folder (default="~/.cache/mlips/"): +In this example we use MACE with a model that we download from this uri: "https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", and we save the file in the cache folder (default="~/.cache/mlips/"): .. code-block:: python from aiida_mlip.data.model import ModelData - url = "https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model" - model = ModelData.from_uri(url, architecture="mace", cache_dir="/.cache/") + uri = "https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model" + model = ModelData.from_uri(uri, architecture="mace", cache_dir="/.cache/") If we already have the model saved in some folder we can save it as: diff --git a/examples/calculations/submit_geomopt.py b/examples/calculations/submit_geomopt.py index a06bc8b1..cf14f279 100644 --- a/examples/calculations/submit_geomopt.py +++ b/examples/calculations/submit_geomopt.py @@ -67,7 +67,7 @@ def geomopt(params: dict) -> None: "--model", default=None, type=str, - help="Specify path or url of the model to use", + help="Specify path or uri of the model to use", ) @click.option( "--arch", diff --git a/examples/calculations/submit_md.py b/examples/calculations/submit_md.py index 98950eea..3967232f 100644 --- a/examples/calculations/submit_md.py +++ b/examples/calculations/submit_md.py @@ -66,7 +66,7 @@ def MD(params: dict) -> None: "--model", default=None, type=str, - help="Specify path or url of the model to use", + help="Specify path or uri of the model to use", ) @click.option( "--arch", diff --git a/examples/calculations/submit_singlepoint.py b/examples/calculations/submit_singlepoint.py index 9b30b95b..9393ef33 100644 --- a/examples/calculations/submit_singlepoint.py +++ b/examples/calculations/submit_singlepoint.py @@ -62,7 +62,7 @@ def singlepoint(params: dict) -> None: "--model", default=None, type=str, - help="Specify path or url of the model to use", + help="Specify path or uri of the model to use", ) @click.option( "--arch", diff --git a/examples/tutorials/geometry-optimisation.ipynb b/examples/tutorials/geometry-optimisation.ipynb index f0b4d9ca..48842970 100644 --- a/examples/tutorials/geometry-optimisation.ipynb +++ b/examples/tutorials/geometry-optimisation.ipynb @@ -75,8 +75,8 @@ "outputs": [], "source": [ "from aiida_mlip.data.model import ModelData\n", - "url = \"https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model\"\n", - "model = ModelData.from_uri(url, architecture=\"mace_mp\", cache_dir=\"mlips\")" + "uri = \"https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model\"\n", + "model = ModelData.from_uri(uri, architecture=\"mace_mp\", cache_dir=\"mlips\")" ] }, { @@ -367,8 +367,8 @@ "def prepare_struct_inputs(traj, index):\n", " return traj.get_step_structure(index.value)\n", "\n", - "url = \"https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model\"\n", - "model = ModelData.from_uri(url, architecture=\"mace_mp\", cache_dir=\"mlips\")\n", + "uri = \"https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model\"\n", + "model = ModelData.from_uri(uri, architecture=\"mace_mp\", cache_dir=\"mlips\")\n", "list_of_nodes = []\n", "\n", "\n", diff --git a/examples/tutorials/singlepoint.ipynb b/examples/tutorials/singlepoint.ipynb index 581d4288..32f91df1 100644 --- a/examples/tutorials/singlepoint.ipynb +++ b/examples/tutorials/singlepoint.ipynb @@ -65,8 +65,8 @@ "outputs": [], "source": [ "from aiida_mlip.data.model import ModelData\n", - "url = \"https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model\"\n", - "model = ModelData.from_uri(url, architecture=\"mace_mp\", cache_dir=\"mlips\")" + "uri = \"https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model\"\n", + "model = ModelData.from_uri(uri, architecture=\"mace_mp\", cache_dir=\"mlips\")" ] }, { diff --git a/tests/data/test_model.py b/tests/data/test_model.py index 2727a62b..4b650c6a 100644 --- a/tests/data/test_model.py +++ b/tests/data/test_model.py @@ -47,7 +47,7 @@ def test_download_fresh_file_keep(tmp_path): # Construct a ModelData instance downloading a non-cached file # pylint:disable=line-too-long model = ModelData.from_uri( - url="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", + uri="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", filename="mace.model", cache_dir=tmp_path, architecture="mace", @@ -69,7 +69,7 @@ def test_download_fresh_file(tmp_path): # Construct a ModelData instance downloading a non-cached file # pylint:disable=line-too-long model = ModelData.from_uri( - url="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", + uri="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", filename="mace.model", cache_dir=tmp_path, architecture="mace", @@ -86,7 +86,7 @@ def test_no_download_cached_file(tmp_path): # pylint:disable=line-too-long existing_model = ModelData.from_uri( - url="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", + uri="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", filename="mace_existing.model", cache_dir=tmp_path, architecture="mace_mp", @@ -94,7 +94,7 @@ def test_no_download_cached_file(tmp_path): # Construct a ModelData instance that should use the cached file # pylint:disable=line-too-long model = ModelData.from_uri( - url="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", + uri="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", cache_dir=tmp_path, filename="test_model.model", architecture="mace_mp", diff --git a/tests/helpers/test_load.py b/tests/helpers/test_load.py index cc03df6c..2ebc04d1 100644 --- a/tests/helpers/test_load.py +++ b/tests/helpers/test_load.py @@ -20,12 +20,12 @@ def test_load_local_model(model_folder): def test_download_model(tmp_path): - """Test for the load_model function for loading from URL.""" - url_model = ( + """Test for the load_model function for loading from uri.""" + uri_model = ( "https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model" ) loaded_model = load_model( - url_model, architecture="example_architecture", cache_dir=tmp_path + uri_model, architecture="example_architecture", cache_dir=tmp_path ) assert isinstance(loaded_model, ModelData) From 8a6a004289644bc4d31f093607ae1179ead4f88f Mon Sep 17 00:00:00 2001 From: Federica Zanca <93498393+federicazanca@users.noreply.github.com> Date: Thu, 18 Jul 2024 15:40:27 +0100 Subject: [PATCH 15/20] Update aiida_mlip/calculations/base.py Co-authored-by: Jacob Wilkins <46597752+oerc0122@users.noreply.github.com> --- aiida_mlip/calculations/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aiida_mlip/calculations/base.py b/aiida_mlip/calculations/base.py index c6261b01..0cbb8d4d 100644 --- a/aiida_mlip/calculations/base.py +++ b/aiida_mlip/calculations/base.py @@ -317,8 +317,8 @@ def _add_model_to_cmdline( if self.inputs.model is None: raise ValueError("Model cannot be None") - with self.inputs.model.open(mode="rb") as source: - with folder.open("mlff.model", mode="wb") as target: + with (self.inputs.model.open(mode="rb") as source, + folder.open("mlff.model", mode="wb") as target): shutil.copyfileobj(source, target) model_path = "mlff.model" From 680474da78cca1e54647efdf35548c2f9a0551d7 Mon Sep 17 00:00:00 2001 From: federica Date: Thu, 18 Jul 2024 16:40:05 +0100 Subject: [PATCH 16/20] change to URI and fix to qb) --- aiida_mlip/data/model.py | 42 +++++++++------------ aiida_mlip/helpers/help_load.py | 6 +-- docs/source/user_guide/data.rst | 4 +- docs/source/user_guide/tutorial.rst | 2 +- examples/calculations/submit_geomopt.py | 2 +- examples/calculations/submit_md.py | 2 +- examples/calculations/submit_singlepoint.py | 2 +- tests/data/test_model.py | 3 ++ tests/helpers/test_load.py | 2 +- 9 files changed, 31 insertions(+), 34 deletions(-) diff --git a/aiida_mlip/data/model.py b/aiida_mlip/data/model.py index 4c08bb49..7d37a6df 100644 --- a/aiida_mlip/data/model.py +++ b/aiida_mlip/data/model.py @@ -35,7 +35,7 @@ class ModelData(SinglefileData): from_local(file, architecture, filename=None): Create a ModelData instance from a local file. from_uri(uri, architecture, filename=None, cache_dir=None, keep_file=False) - Download a file from a uri and save it as ModelData. + Download a file from a URI and save it as ModelData. Other Parameters ---------------- @@ -163,12 +163,12 @@ def from_uri( keep_file: Optional[bool] = False, ): """ - Download a file from a uri and save it as ModelData. + Download a file from a URI and save it as ModelData. Parameters ---------- uri : str - uri of the file to download. + URI of the file to download. architecture : [str] Architecture of the mlip model. filename : Optional[str], optional @@ -206,27 +206,21 @@ def from_uri( file.unlink(missing_ok=True) qb = QueryBuilder() - qb.append(ModelData, project=["attributes", "pk", "ctime"]) - - # Looking for ModelData in the whole database - for i in qb.iterdict(): - # If the hash is the same as the new model, but not the creation time - # it means that there is already a model that is the same, use that - if ( - "model_hash" in i["ModelData_1"]["attributes"] - and i["ModelData_1"]["attributes"]["model_hash"] == model.model_hash - and i["ModelData_1"]["attributes"]["architecture"] == model.architecture - ): - if i["ModelData_1"]["ctime"] != model.ctime: - # delete_nodes( - # [model.pk], - # dry_run=False, - # create_forward=True, - # call_calc_forward=True, - # call_work_forward=True, - # ) - model = load_node(i["ModelData_1"]["pk"]) - break + qb.append( + ModelData, + filters={ + "attributes.model_hash": model.model_hash, + "attributes.architecture": model.architecture, + "ctime": {"!in": [model.ctime]}, + }, + project=["attributes", "pk", "ctime"], + ) + + if qb.count() != 0: + model = load_node( + qb.first()[1] + ) # This gets the pk of the first model in the query + return model @property diff --git a/aiida_mlip/helpers/help_load.py b/aiida_mlip/helpers/help_load.py index 46f71435..f998425b 100644 --- a/aiida_mlip/helpers/help_load.py +++ b/aiida_mlip/helpers/help_load.py @@ -20,17 +20,17 @@ def load_model( cache_dir: Optional[Union[str, Path]] = None, ) -> ModelData: """ - Load a model from a file path or uri. + Load a model from a file path or URI. If the string represents a file path, the model will be loaded from that path. - If it's a uri, the model will be downloaded from the specified location. + If it's a URI, the model will be downloaded from the specified location. If the input model is None it returns a default model corresponding to the default used in the Calcjobs. Parameters ---------- model : Optional[Union[str, Path]] - Model file path or a uri for downloading the model or None to use the default. + Model file path or a URI for downloading the model or None to use the default. architecture : str The architecture of the model. cache_dir : Optional[Union[str, Path]] diff --git a/docs/source/user_guide/data.rst b/docs/source/user_guide/data.rst index fd71a3ea..3b7c63d2 100644 --- a/docs/source/user_guide/data.rst +++ b/docs/source/user_guide/data.rst @@ -4,7 +4,7 @@ Data types ModelData --------- -Defines a custom data type called `ModelData` in AiiDA, which is a subclass of the `SinglefileData` type. `ModelData` is used to handle model files and provides functionalities for handling local files and downloading files from uris. +Defines a custom data type called `ModelData` in AiiDA, which is a subclass of the `SinglefileData` type. `ModelData` is used to handle model files and provides functionalities for handling local files and downloading files from URIs. Additional features compared to `SinglefileData`: - It can take a relative path as an argument @@ -12,7 +12,7 @@ Additional features compared to `SinglefileData`: - It takes the argument "architecture" which is specifically related to the mlip model and it is added to the node attributes. - Download functionality: - - When provided with a uri, `ModelData` automatically downloads the file. + - When provided with a URI, `ModelData` automatically downloads the file. - Saves the downloaded file in a specified folder (default: `./cache/mlips`), creating a subfolder if the architecture, and stores it as an AiiDA data type. - Handles duplicate files: if the file is downloaded twice, duplicates within the same folder are canceled, unless `force_download=True` is stated. diff --git a/docs/source/user_guide/tutorial.rst b/docs/source/user_guide/tutorial.rst index 0d171833..f07d6189 100644 --- a/docs/source/user_guide/tutorial.rst +++ b/docs/source/user_guide/tutorial.rst @@ -29,7 +29,7 @@ The input structure in aiida-mlip needs to be saved as a StructureData type: structure = StructureData(ase=read("/path/to/structure.cif")) Then we need to choose a model and architecture to be used for the calculation and save it as ModelData type, a specific data type of this plugin. -In this example we use MACE with a model that we download from this uri: "https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", and we save the file in the cache folder (default="~/.cache/mlips/"): +In this example we use MACE with a model that we download from this URI: "https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model", and we save the file in the cache folder (default="~/.cache/mlips/"): .. code-block:: python diff --git a/examples/calculations/submit_geomopt.py b/examples/calculations/submit_geomopt.py index cf14f279..cd826cc9 100644 --- a/examples/calculations/submit_geomopt.py +++ b/examples/calculations/submit_geomopt.py @@ -67,7 +67,7 @@ def geomopt(params: dict) -> None: "--model", default=None, type=str, - help="Specify path or uri of the model to use", + help="Specify path or URI of the model to use", ) @click.option( "--arch", diff --git a/examples/calculations/submit_md.py b/examples/calculations/submit_md.py index 3967232f..1858be46 100644 --- a/examples/calculations/submit_md.py +++ b/examples/calculations/submit_md.py @@ -66,7 +66,7 @@ def MD(params: dict) -> None: "--model", default=None, type=str, - help="Specify path or uri of the model to use", + help="Specify path or URI of the model to use", ) @click.option( "--arch", diff --git a/examples/calculations/submit_singlepoint.py b/examples/calculations/submit_singlepoint.py index 9393ef33..a5997b6d 100644 --- a/examples/calculations/submit_singlepoint.py +++ b/examples/calculations/submit_singlepoint.py @@ -62,7 +62,7 @@ def singlepoint(params: dict) -> None: "--model", default=None, type=str, - help="Specify path or uri of the model to use", + help="Specify path or URI of the model to use", ) @click.option( "--arch", diff --git a/tests/data/test_model.py b/tests/data/test_model.py index 4b650c6a..b97f19c9 100644 --- a/tests/data/test_model.py +++ b/tests/data/test_model.py @@ -99,6 +99,9 @@ def test_no_download_cached_file(tmp_path): filename="test_model.model", architecture="mace_mp", ) + file_path = tmp_path / "test_model.model" # Assert the new ModelData was not downloaded and the old one is still there assert model.pk == existing_model.pk + assert model.model_hash == existing_model.model_hash + assert file_path.exists() is False, f"File {file_path} exists and shouldn't." diff --git a/tests/helpers/test_load.py b/tests/helpers/test_load.py index 2ebc04d1..912500d4 100644 --- a/tests/helpers/test_load.py +++ b/tests/helpers/test_load.py @@ -20,7 +20,7 @@ def test_load_local_model(model_folder): def test_download_model(tmp_path): - """Test for the load_model function for loading from uri.""" + """Test for the load_model function for loading from URI.""" uri_model = ( "https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model" ) From 8e19e78a7fd326bb8fbcc6e82d96f84657a59eed Mon Sep 17 00:00:00 2001 From: federica Date: Thu, 18 Jul 2024 16:45:54 +0100 Subject: [PATCH 17/20] fix pre-commit --- aiida_mlip/calculations/base.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/aiida_mlip/calculations/base.py b/aiida_mlip/calculations/base.py index 0cbb8d4d..32454cac 100644 --- a/aiida_mlip/calculations/base.py +++ b/aiida_mlip/calculations/base.py @@ -317,9 +317,11 @@ def _add_model_to_cmdline( if self.inputs.model is None: raise ValueError("Model cannot be None") - with (self.inputs.model.open(mode="rb") as source, - folder.open("mlff.model", mode="wb") as target): - shutil.copyfileobj(source, target) + with ( + self.inputs.model.open(mode="rb") as source, + folder.open("mlff.model", mode="wb") as target, + ): + shutil.copyfileobj(source, target) model_path = "mlff.model" cmd_line.setdefault("calc-kwargs", {})["model"] = model_path From ae650f0c27045b0eae01302693a0855ce01705d7 Mon Sep 17 00:00:00 2001 From: Federica Zanca <93498393+federicazanca@users.noreply.github.com> Date: Thu, 18 Jul 2024 17:12:06 +0100 Subject: [PATCH 18/20] Update aiida_mlip/data/model.py Co-authored-by: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> --- aiida_mlip/data/model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aiida_mlip/data/model.py b/aiida_mlip/data/model.py index 7d37a6df..398d408d 100644 --- a/aiida_mlip/data/model.py +++ b/aiida_mlip/data/model.py @@ -205,6 +205,7 @@ def from_uri( file.unlink(missing_ok=True) + # Check if the same model was used previously qb = QueryBuilder() qb.append( ModelData, From 74e113369e1080b37cb880537d14efe564a2b923 Mon Sep 17 00:00:00 2001 From: federica Date: Thu, 18 Jul 2024 17:15:19 +0100 Subject: [PATCH 19/20] change folder description --- aiida_mlip/calculations/base.py | 4 ++-- aiida_mlip/calculations/geomopt.py | 2 +- aiida_mlip/calculations/md.py | 2 +- aiida_mlip/calculations/singlepoint.py | 2 +- aiida_mlip/calculations/train.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/aiida_mlip/calculations/base.py b/aiida_mlip/calculations/base.py index 32454cac..a7131254 100644 --- a/aiida_mlip/calculations/base.py +++ b/aiida_mlip/calculations/base.py @@ -196,7 +196,7 @@ def prepare_for_submission( Parameters ---------- folder : aiida.common.folders.Folder - An `aiida.common.folders.Folder` to temporarily write files on disk. + Folder where the calculation is run. Returns ------- @@ -304,7 +304,7 @@ def _add_model_to_cmdline( Dictionary containing the cmd line keys. folder : ~aiida.common.folders.Folder - An `aiida.common.folders.Folder` to temporarily write files on disk. + Folder where the calculation is run. Returns ------- diff --git a/aiida_mlip/calculations/geomopt.py b/aiida_mlip/calculations/geomopt.py index 462395f6..3fec149c 100644 --- a/aiida_mlip/calculations/geomopt.py +++ b/aiida_mlip/calculations/geomopt.py @@ -100,7 +100,7 @@ def prepare_for_submission( Parameters ---------- folder : aiida.common.folders.Folder - An `aiida.common.folders.Folder` to temporarily write files on disk. + Folder where the calculation is run. Returns ------- diff --git a/aiida_mlip/calculations/md.py b/aiida_mlip/calculations/md.py index 7bef085c..d998707a 100644 --- a/aiida_mlip/calculations/md.py +++ b/aiida_mlip/calculations/md.py @@ -83,7 +83,7 @@ def prepare_for_submission( Parameters ---------- folder : aiida.common.folders.Folder - An `aiida.common.folders.Folder` to temporarily write files on disk. + Folder where the calculation is run. Returns ------- diff --git a/aiida_mlip/calculations/singlepoint.py b/aiida_mlip/calculations/singlepoint.py index b4656ed2..9519c370 100644 --- a/aiida_mlip/calculations/singlepoint.py +++ b/aiida_mlip/calculations/singlepoint.py @@ -82,7 +82,7 @@ def prepare_for_submission( Parameters ---------- folder : aiida.common.folders.Folder - An `aiida.common.folders.Folder` to temporarily write files on disk. + Folder where the calculation is run. Returns ------- diff --git a/aiida_mlip/calculations/train.py b/aiida_mlip/calculations/train.py index 5ea5cd64..bad60ad5 100644 --- a/aiida_mlip/calculations/train.py +++ b/aiida_mlip/calculations/train.py @@ -155,7 +155,7 @@ def prepare_for_submission( Parameters ---------- folder : aiida.common.folders.Folder - An `aiida.common.folders.Folder` to temporarily write files on disk. + Folder where the calculation is run. Returns ------- From 41257c3fab0f36a0104e7114f4e53f2b42489636 Mon Sep 17 00:00:00 2001 From: federica Date: Fri, 19 Jul 2024 09:52:28 +0100 Subject: [PATCH 20/20] tests path --- tests/data/test_model.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tests/data/test_model.py b/tests/data/test_model.py index b97f19c9..145340e0 100644 --- a/tests/data/test_model.py +++ b/tests/data/test_model.py @@ -4,11 +4,12 @@ from aiida_mlip.data.model import ModelData +model_path = Path(__file__).parent / "input_files" / "model_local_file.txt" + def test_local_file(): """Testing that the from_local function works""" # Construct a ModelData instance with the local file - model_path = Path(__file__).parent / "input_files" / "model_local_file.txt" absolute_path = model_path.resolve() model = ModelData.from_local(file=absolute_path, architecture="mace") # Assert the ModelData contains the content we expect @@ -19,7 +20,6 @@ def test_local_file(): def test_relativepath(): """Testing that the from_local function works with a relative path""" # Construct a ModelData instance with the local file - model_path = Path(__file__).parent / "input_files" / "model_local_file.txt" relative_path = model_path.relative_to(Path.cwd()) model = ModelData.from_local(file=relative_path, architecture="mace") # Assert the ModelData contains the content we expect @@ -29,9 +29,8 @@ def test_relativepath(): def test_architecture(): """Testing that the architecture is read and added to attributes""" - file = Path(__file__).parent / "input_files/model_local_file.txt" model = ModelData.from_local( - file=file, + file=model_path, filename="model", architecture="mace", ) @@ -55,9 +54,8 @@ def test_download_fresh_file_keep(tmp_path): ) # Assert the ModelData is downloaded - file_path = tmp_path / "mace" / "mace.model" assert model.architecture == "mace" - assert file_path.exists(), f"File {file_path} does not exist." + assert path_test.exists(), f"File {path_test} does not exist." def test_download_fresh_file(tmp_path): @@ -76,9 +74,8 @@ def test_download_fresh_file(tmp_path): ) # Assert the ModelData is downloaded - file_path = tmp_path / "mace" / "mace.model" assert model.architecture == "mace" - assert file_path.exists() is False, f"File {file_path} exists and shouldn't." + assert path_test.exists() is False, f"File {path_test} exists and shouldn't." def test_no_download_cached_file(tmp_path):