From 4fe22bd7826bee611868a01ed5499a88b7a148f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CDafydd?= <“dafydd_stephenson@hotmail.com”> Date: Thu, 15 Aug 2024 18:04:30 -0600 Subject: [PATCH 1/5] Add ability for `source` attribute in `InputDataset` to be a local path: - add _get_source_type method in utils (returns 'url' or 'path') - add call to _get_source_type in InputDataset.__init__() which updates local_path and exists_locally if 'path' - add logic to InputDataset.get() which creates a local symbolic link to the dataset path if it is elsewhere on the system - update InputDataset.check_exists_locally() to reflect above changes --- cstar_ocean/component.py | 12 ++-- cstar_ocean/cstar_case.py | 8 +-- cstar_ocean/input_dataset.py | 123 +++++++++++++++++++++-------------- cstar_ocean/utils.py | 16 +++++ 4 files changed, 100 insertions(+), 59 deletions(-) diff --git a/cstar_ocean/component.py b/cstar_ocean/component.py index 9d4cf22..9bf8348 100644 --- a/cstar_ocean/component.py +++ b/cstar_ocean/component.py @@ -112,12 +112,12 @@ def __str__(self): if isinstance(self.input_datasets, InputDataset) else 0 ) - base_str += f"\n{NAC} AdditionalCode repositories (query using ROMSComponent.additional_code)" + base_str += f"\n{NAC} AdditionalCode repositories (query using Component.additional_code)" base_str += ( - f"\n{NID} InputDataset objects (query using ROMSComponent.input_datasets" + f"\n{NID} InputDataset objects (query using Component.input_datasets" ) - disc_str='' + disc_str = "" if hasattr(self, "time_step") and self.time_step is not None: disc_str += "\ntime_step: " + str(self.time_step) if hasattr(self, "n_procs_x") and self.n_procs_x is not None: @@ -141,9 +141,9 @@ def __str__(self): if hasattr(self, "exe_path") and self.exe_path is not None: disc_str += "\n\nIs compiled: True" disc_str += "\n exe_path: " + self.exe_path - if len(disc_str)>0: - disc_str = "\n\nDiscretization info:"+disc_str - base_str+=disc_str + if len(disc_str) > 0: + disc_str = "\n\nDiscretization info:" + disc_str + base_str += disc_str return base_str def __repr__(self): diff --git a/cstar_ocean/cstar_case.py b/cstar_ocean/cstar_case.py index d4e2122..e02c7bc 100644 --- a/cstar_ocean/cstar_case.py +++ b/cstar_ocean/cstar_case.py @@ -112,7 +112,7 @@ def __init__( RuntimeWarning, ) self.valid_start_date = None - + if valid_end_date is not None: self.valid_end_date: Optional[dt.datetime] = ( valid_end_date @@ -127,8 +127,7 @@ def __init__( + "and valid_end_date attributes.", RuntimeWarning, ) - self.valid_end_date=None - + self.valid_end_date = None # Make sure Case start_date is set and is a datetime object: if start_date is not None: @@ -611,13 +610,12 @@ def check_is_setup(self) -> bool: ] elif isinstance(self.components, list): component_list = self.components - for component in component_list: if component.base_model.local_config_status != 0: return False # Check AdditionalCode - if (component.additional_code is not None) and ( + if (component.additional_code is None) or not ( component.additional_code.check_exists_locally(self.caseroot) ): return False diff --git a/cstar_ocean/input_dataset.py b/cstar_ocean/input_dataset.py index 6977c38..cc44974 100644 --- a/cstar_ocean/input_dataset.py +++ b/cstar_ocean/input_dataset.py @@ -4,6 +4,7 @@ import datetime as dt import dateutil.parser from typing import Optional +from cstar_ocean.utils import _get_source_type from cstar_ocean.base_model import BaseModel @@ -16,13 +17,13 @@ class InputDataset: base_model: BaseModel The base model with which this input dataset is associated source: str - URL pointing to the netCDF file containing this input dataset + local path or URL pointing to the netCDF file containing this input dataset file_hash: str The 256 bit SHA sum associated with the file for verifying downloads exists_locally: bool, default None - True if the input dataset has been fetched to the local machine, set when `check_exists_locally()` method is called + True if the input dataset exists on the local machine, set by `check_exists_locally()` method if source is a URL local_path: str, default None - The path to where the input dataset has been fetched locally, set when `get()` method is called + The path where the input dataset exists locally, set when `get()` is called if source is a URL Methods: -------- @@ -48,17 +49,23 @@ def __init__( base_model: BaseModel The base model with which this input dataset is associated source: str - URL pointing to the netCDF file containing this input dataset + URL or path pointing to the netCDF file containing this input dataset file_hash: str - The 256 bit SHA sum associated with the file for verifying downloads + The 256 bit SHA sum associated with the file for verification """ self.base_model: BaseModel = base_model + self.source: str = source self.file_hash: str = file_hash + self.exists_locally: Optional[bool] = None self.local_path: Optional[str] = None + if _get_source_type(source) == "path": + self.exists_locally = True + self.local_path = source + self.start_date = start_date self.end_date = end_date if isinstance(start_date, str): @@ -76,7 +83,7 @@ def __str__(self): base_str += "\n" + "-" * (len(name) + 7) base_str += f"\nBase model: {self.base_model.name}" - base_str += f"\nRemote path URL: {self.source}" + base_str += f"\nsource: {self.source}" if self.start_date is not None: base_str += f"\nstart_date: {self.start_date}" if self.end_date is not None: @@ -91,35 +98,55 @@ def __str__(self): def __repr__(self): return self.__str__() - def get(self, local_path: str): + def get(self, local_dir: str): """ - Fetch the file containing this input dataset and save it to `local_path` using Pooch. + Fetch the file containing this input dataset and save it within `local_dir/input_datasets` using Pooch. This method updates the `local_path` attribute of the calling InputDataset object Parameters: ----------- - local_path: str - The local path where this input dataset will be saved. + local_dir: str + The local directory in which this input dataset will be saved. """ - - tgt_dir = local_path + "/input_datasets/" + self.base_model.name + "/" + tgt_dir = local_dir + "/input_datasets/" + self.base_model.name + "/" os.makedirs(tgt_dir, exist_ok=True) + tgt_path = tgt_dir + os.path.basename(self.source) - # NOTE: default timeout was leading to a lot of timeouterrors - downloader = pooch.HTTPDownloader(timeout=120) - to_fetch = pooch.create( - path=tgt_dir, - base_url=os.path.dirname(self.source), - registry={os.path.basename(self.source): self.file_hash}, - ) - - to_fetch.fetch(os.path.basename(self.source), downloader=downloader) - self.exists_locally = True - self.local_path = tgt_dir + "/" + os.path.basename(self.source) + if self.exists_locally: + assert ( + self.local_path is not None + ), "local_path should always be set when exists_locally is True" + if os.path.abspath(self.local_path) != os.path.abspath(tgt_path): + if os.path.exists(tgt_path): + raise FileExistsError( + f"A file by the name of {os.path.basename(self.source)}" + + f"already exists at {tgt_dir}." + ) + # TODO maybe this should check the hash and just `return` if it matches? + else: + # If the file is somewhere else on the system, make a symbolic link where we want it + os.symlink(self.local_path, tgt_path) + return + else: + # nothing to do as file is already at tgt_path + return - def check_exists_locally(self, local_path: str) -> bool: + else: + # NOTE: default timeout was leading to a lot of timeouterrors + downloader = pooch.HTTPDownloader(timeout=120) + to_fetch = pooch.create( + path=tgt_dir, + base_url=os.path.dirname(self.source), + registry={os.path.basename(self.source): self.file_hash}, + ) + + to_fetch.fetch(os.path.basename(self.source), downloader=downloader) + self.exists_locally = True + self.local_path = tgt_dir + "/" + os.path.basename(self.source) + + def check_exists_locally(self, local_dir: str) -> bool: """ Checks whether this InputDataset has already been fetched to the local machine @@ -128,38 +155,38 @@ def check_exists_locally(self, local_path: str) -> bool: Parameters: ----------- - local_path (str): - The local path to check for the existence of this input dataset + local_dir (str): + The local directory in which to check for the existence of this input dataset Returns: -------- exists_locally (bool): True if the method has verified the local existence of the dataset """ - tgt_dir = local_path + "/input_datasets/" + self.base_model.name + "/" - fpath = tgt_dir + os.path.basename(self.source) - if os.path.exists(fpath): - sha256_hash = hashlib.sha256() - with open(fpath, "rb") as f: - for chunk in iter(lambda: f.read(4096), b""): - sha256_hash.update(chunk) - - hash_hex = sha256_hash.hexdigest() - if self.file_hash != hash_hex: - raise ValueError( - f"{fpath} exists locally but the local file hash {hash_hex}" - + "does not match that associated with this InputDataset object" - + f"{self.file_hash}" - ) + + if self.exists_locally is None: + tgt_dir = local_dir + "/input_datasets/" + self.base_model.name + "/" + fpath = tgt_dir + os.path.basename(self.source) + if os.path.exists(fpath): + sha256_hash = hashlib.sha256() + with open(fpath, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + sha256_hash.update(chunk) + + hash_hex = sha256_hash.hexdigest() + if self.file_hash != hash_hex: + raise ValueError( + f"{fpath} exists locally but the local file hash {hash_hex}" + + "does not match that associated with this InputDataset object" + + f"{self.file_hash}" + ) + else: + self.exists_locally = True + self.local_path = tgt_dir else: - self.exists_locally = True + self.exists_locally = False - if self.exists_locally: - self.local_path = tgt_dir - return True - else: - self.exists_locally = False - return False + return self.exists_locally class ModelGrid(InputDataset): diff --git a/cstar_ocean/utils.py b/cstar_ocean/utils.py index f241eda..436eca7 100644 --- a/cstar_ocean/utils.py +++ b/cstar_ocean/utils.py @@ -1,11 +1,27 @@ import os import re import subprocess +import pathlib from math import ceil +from urllib.parse import urlparse from cstar_ocean.environment import _CSTAR_CONFIG_FILE +def _get_source_type(source): + """Determine whether a string (source) describes a local path or url""" + urlparsed_source = urlparse(source) + if all([urlparsed_source.scheme, urlparsed_source.netloc]): + return "url" + elif pathlib.Path(source).exists(): + return "path" + else: + raise ValueError( + f"{source} is not a recognised URL or local path pointing to an existing file" + ) + + def _write_to_config_file(config_file_str): + """write to C-Star config file to configure environment on import""" if not os.path.exists(_CSTAR_CONFIG_FILE): base_conf_str = ( "# This file was generated by C-Star and is specific to your machine. " From 7e24a5a4a3a932d0dafed9b7871a14c4116be094 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CDafydd?= <“dafydd_stephenson@hotmail.com”> Date: Fri, 16 Aug 2024 16:54:34 -0600 Subject: [PATCH 2/5] fix found bugs in cstar_case.py when running Case.from_blueprint() on yaml created with Case.persist() --- cstar_ocean/cstar_case.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/cstar_ocean/cstar_case.py b/cstar_ocean/cstar_case.py index e02c7bc..879864d 100644 --- a/cstar_ocean/cstar_case.py +++ b/cstar_ocean/cstar_case.py @@ -484,14 +484,12 @@ def persist(self, filename: str): # Add metadata to dictionary bp_dict["registry_attrs"] = {"name": self.name} + + # Add start date to valid_date_range if it exists if self.valid_start_date is not None: - bp_dict["registry_attrs"]["valid_date_range"] = { - "start_date": str(self.valid_start_date) - } + bp_dict["registry_attrs"].setdefault("valid_date_range", {})["start_date"] = str(self.valid_start_date) if self.valid_end_date is not None: - bp_dict["registry_attrs"]["valid_date_range"] = { - "end_date": str(self.valid_end_date) - } + bp_dict["registry_attrs"].setdefault("valid_date_range", {})["end_date"] = str(self.valid_end_date) bp_dict["components"] = [] @@ -562,6 +560,7 @@ def persist(self, filename: str): if isinstance(input_datasets, list): input_dataset_info: dict = {} for ind in input_datasets: + # Determine what kind of input dataset we are adding if isinstance(ind, ModelGrid): dct_key = "model_grid" elif isinstance(ind, InitialConditions): @@ -574,11 +573,17 @@ def persist(self, filename: str): dct_key = "surface_forcing" if dct_key not in input_dataset_info.keys(): input_dataset_info[dct_key] = {} + + # Create a dictionary of file_info for each dataset file: if "files" not in input_dataset_info[dct_key].keys(): input_dataset_info[dct_key]["files"] = [] - input_dataset_info[dct_key]["files"].append( - {"source": ind.source, "file_hash": ind.file_hash} - ) + file_info={"source":ind.source,"hash":ind.file_hash} + if hasattr(ind,"start_date"): + file_info["start_date"]=ind.start_date + if hasattr(ind,"end_date"): + file_info["end_date"]=ind.end_date + + input_dataset_info[dct_key]["files"].append(file_info) component_info["input_datasets"] = input_dataset_info From 1a47dbf800a517c7570fe109ae45ec18118fe4c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CDafydd?= <“dafydd_stephenson@hotmail.com”> Date: Fri, 16 Aug 2024 16:58:49 -0600 Subject: [PATCH 3/5] Modify tests/test_roms_marbl_example.py to add a second section which: - Modifies the blueprint created by the first case to use local paths to input datasets where available - Creates and runs a second case `roms_marbl_local_case` from this blueprint --- tests/test_roms_marbl_example.py | 61 +++++++++++++++++++++++++++----- 1 file changed, 52 insertions(+), 9 deletions(-) diff --git a/tests/test_roms_marbl_example.py b/tests/test_roms_marbl_example.py index 947602b..6987508 100644 --- a/tests/test_roms_marbl_example.py +++ b/tests/test_roms_marbl_example.py @@ -1,7 +1,11 @@ +import os +import shutil +import subprocess from unittest.mock import patch import cstar_ocean as cstar -roms_marbl_case = cstar.Case.from_blueprint( +## First test uses URLs to point to input datasets +roms_marbl_remote_case = cstar.Case.from_blueprint( blueprint=(cstar.environment._CSTAR_ROOT) + "/../examples/cstar_blueprint_roms_marbl_example.yaml", caseroot="roms_marbl_example_case/", @@ -11,11 +15,50 @@ # patch will automatically respond "y" to any call for input with patch("builtins.input", return_value="y"): - roms_marbl_case.setup() - roms_marbl_case.persist("test_blueprint.yaml") - roms_marbl_case.build() - roms_marbl_case.pre_run() - roms_marbl_case.run() - roms_marbl_case.post_run() - -print("TEST COMPLETE!") + roms_marbl_remote_case.setup() + roms_marbl_remote_case.persist("test_blueprint.yaml") + roms_marbl_remote_case.build() + roms_marbl_remote_case.pre_run() + roms_marbl_remote_case.run() + roms_marbl_remote_case.post_run() + +print("Test complete with remote input dataset files") + +## Second test modifies the yaml created above to use available local input datasets + +# Move the input datasets to a new location +shutil.move(roms_marbl_remote_case.caseroot+'/input_datasets/ROMS/','local_input_files') + +# Modify the blueprint file to point to local paths whenever we have the files: +with open('test_blueprint.yaml') as f: + test_blueprint=f.readlines() + +for i,line in enumerate(test_blueprint): + url_prefix="https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/" + if url_prefix in line: + fileurl=line.split()[-1] # Just isolate URL from e.g. source: URL + filepath=f"{os.getcwd()}/local_input_files/{os.path.basename(fileurl)}" + if os.path.exists(filepath): + test_blueprint[i]=line.replace(fileurl,filepath) + +with open('modified_test_blueprint.yaml', 'w') as f: + f.writelines(test_blueprint) +## + +roms_marbl_local_case = cstar.Case.from_blueprint( + blueprint="modified_test_blueprint.yaml", + caseroot="roms_marbl_local_case", + start_date="20120103 12:00:00", + end_date="20120103 12:30:00") + +# patch will automatically respond "y" to any call for input +with patch("builtins.input", return_value="y"): + roms_marbl_local_case.setup() + roms_marbl_local_case.persist("test_blueprint_local.yaml") + roms_marbl_local_case.build() + roms_marbl_local_case.pre_run() + roms_marbl_local_case.run() + roms_marbl_local_case.post_run() + +print("Test complete with local input dataset files") + From d0ba139c1605592fa4257374621cd37b02dc895c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CDafydd?= <“dafydd_stephenson@hotmail.com”> Date: Fri, 16 Aug 2024 17:54:25 -0600 Subject: [PATCH 4/5] Changes to example notebook and environment.yml addressing #18,#21,#22,#23; fix bug caught by pre-commit in cstar_case.py --- ci/environment.yml | 4 +- cstar_ocean/.pre-commit-config.yaml | 45 -------- cstar_ocean/cstar_case.py | 20 ++-- examples/cstar_example_notebook.ipynb | 154 ++++++++++++++------------ 4 files changed, 100 insertions(+), 123 deletions(-) delete mode 100644 cstar_ocean/.pre-commit-config.yaml diff --git a/ci/environment.yml b/ci/environment.yml index 8d50543..99bfc5b 100644 --- a/ci/environment.yml +++ b/ci/environment.yml @@ -11,5 +11,5 @@ dependencies: - compilers - netcdf-fortran - mpich - - nco - - ncview + - xarray + - netCDF4 diff --git a/cstar_ocean/.pre-commit-config.yaml b/cstar_ocean/.pre-commit-config.yaml deleted file mode 100644 index 51f6568..0000000 --- a/cstar_ocean/.pre-commit-config.yaml +++ /dev/null @@ -1,45 +0,0 @@ -exclude: '^(?!cstar_ocean/).*' -ci: - autoupdate_schedule: monthly - -repos: - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 - files: '^cstar_ocean/' - hooks: - - id: trailing-whitespace - - id: end-of-file-fixer - - id: check-yaml - - - repo: https://github.com/astral-sh/ruff-pre-commit - # Ruff version. - rev: "v0.4.7" - hooks: - # Run the linter. - - id: ruff - args: [ --fix ] - # Run the formatter. - - id: ruff-format - - - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.10.0 - hooks: - - id: mypy - # Copied from setup.cfg - exclude: "properties|asv_bench|docs" - additional_dependencies: [ - # Type stubs - types-python-dateutil, - types-pkg_resources, - types-PyYAML, - types-pytz, - # Dependencies that are typed - numpy, - typing-extensions>=4.1.0, - ] - - - repo: https://github.com/MarcoGorelli/absolufy-imports - rev: v0.3.1 - hooks: - - id: absolufy-imports - args: [--application-directories=.] diff --git a/cstar_ocean/cstar_case.py b/cstar_ocean/cstar_case.py index 879864d..88298f2 100644 --- a/cstar_ocean/cstar_case.py +++ b/cstar_ocean/cstar_case.py @@ -487,9 +487,13 @@ def persist(self, filename: str): # Add start date to valid_date_range if it exists if self.valid_start_date is not None: - bp_dict["registry_attrs"].setdefault("valid_date_range", {})["start_date"] = str(self.valid_start_date) + bp_dict["registry_attrs"].setdefault("valid_date_range", {})[ + "start_date" + ] = str(self.valid_start_date) if self.valid_end_date is not None: - bp_dict["registry_attrs"].setdefault("valid_date_range", {})["end_date"] = str(self.valid_end_date) + bp_dict["registry_attrs"].setdefault("valid_date_range", {})["end_date"] = ( + str(self.valid_end_date) + ) bp_dict["components"] = [] @@ -577,12 +581,12 @@ def persist(self, filename: str): # Create a dictionary of file_info for each dataset file: if "files" not in input_dataset_info[dct_key].keys(): input_dataset_info[dct_key]["files"] = [] - file_info={"source":ind.source,"hash":ind.file_hash} - if hasattr(ind,"start_date"): - file_info["start_date"]=ind.start_date - if hasattr(ind,"end_date"): - file_info["end_date"]=ind.end_date - + file_info = {"source": ind.source, "hash": ind.file_hash} + if hasattr(ind, "start_date"): + file_info["start_date"] = str(ind.start_date) + if hasattr(ind, "end_date"): + file_info["end_date"] = str(ind.end_date) + input_dataset_info[dct_key]["files"].append(file_info) component_info["input_datasets"] = input_dataset_info diff --git a/examples/cstar_example_notebook.ipynb b/examples/cstar_example_notebook.ipynb index 10d2f3f..e4aaee8 100644 --- a/examples/cstar_example_notebook.ipynb +++ b/examples/cstar_example_notebook.ipynb @@ -298,18 +298,18 @@ " source=\"https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/roms_bry_2012.nc\",\n", " file_hash=\"c3b0e14aae6dd5a0d54703fa04cf95960c1970e732c0a230427bf8b0fbbd8bf1\"\n", ")\n", - "roms_bgc_boundary_forcing=cstar.BoundaryForcing(\n", - " base_model=roms_base_model,\n", - " source=\"https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/roms_bry_bgc_MARBL.nc\",\n", - " file_hash=\"897a8df8ed45841a98b3906f2dd07750decc5c2b50095ba648a855c869c7d3ee\"\n", + "roms_bgc_boundary_forcing = cstar.BoundaryForcing(\n", + " base_model = roms_base_model,\n", + " source = \"https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/roms_bry_bgc_MARBL.nc\",\n", + " file_hash = \"897a8df8ed45841a98b3906f2dd07750decc5c2b50095ba648a855c869c7d3ee\"\n", ")\n", "# Surface\n", - "roms_bgc_surface_forcing=cstar.SurfaceForcing(\n", + "roms_bgc_surface_forcing = cstar.SurfaceForcing(\n", " base_model=roms_base_model,\n", " source=\"https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/roms_frc_bgc.nc\",\n", " file_hash=\"621dd23691d87aa93c5cc582daf6c5f18333ed062ff934777d50b63346c3f84d\"\n", ")\n", - "roms_phys_surface_forcing=cstar.SurfaceForcing(\n", + "roms_phys_surface_forcing = cstar.SurfaceForcing(\n", " base_model=roms_base_model,\n", " source=\"https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/roms_frc.201201.nc\",\n", " file_hash=\"923049a9c2ab9ce77fa4a0211585e6848a12e87bf237e7aa310f693c3ac6abfa\"\n", @@ -347,8 +347,8 @@ "MARBLComponent object \n", "---------------------\n", "Built from: \n", - "1 AdditionalCode repositories (query using ROMSComponent.additional_code)\n", - "0 InputDataset objects (query using ROMSComponent.input_datasets" + "1 AdditionalCode repositories (query using Component.additional_code)\n", + "0 InputDataset objects (query using Component.input_datasets" ] }, "execution_count": 8, @@ -357,9 +357,9 @@ } ], "source": [ - "marbl_component=cstar.MARBLComponent(\n", - " base_model=marbl_base_model,\n", - " additional_code=marbl_additional_code\n", + "marbl_component = cstar.MARBLComponent(\n", + " base_model = marbl_base_model,\n", + " additional_code = marbl_additional_code\n", ")\n", "marbl_component " ] @@ -386,8 +386,8 @@ "ROMSComponent object \n", "--------------------\n", "Built from: \n", - "1 AdditionalCode repositories (query using ROMSComponent.additional_code)\n", - "7 InputDataset objects (query using ROMSComponent.input_datasets\n", + "1 AdditionalCode repositories (query using Component.additional_code)\n", + "7 InputDataset objects (query using Component.input_datasets\n", "\n", "Discretization info:\n", "time_step: 1\n", @@ -401,10 +401,10 @@ } ], "source": [ - "roms_component=cstar.ROMSComponent(\n", - " base_model=roms_base_model,\n", - " additional_code=roms_additional_code,\n", - " input_datasets=roms_input_datasets,\n", + "roms_component = cstar.ROMSComponent(\n", + " base_model = roms_base_model,\n", + " additional_code = roms_additional_code,\n", + " input_datasets = roms_input_datasets,\n", " n_procs_x = 3,\n", " n_procs_y = 3)\n", "roms_component " @@ -416,7 +416,7 @@ "metadata": {}, "source": [ "## And finally, we can build the Case object:\n", - "This is instantiated using a list of components, a name string, and a \"caseroot\" - the local path where the case will be run" + "This is instantiated using a list of components, a name string, and a \"caseroot\" - the local path where the case will be run. Additionally we choose a start date and end date for the run. The values below run the case for one model month, which may take several minutes to run (depending on your machine). **If you'd like to run the case more quickly**, modify `end_date` below:" ] }, { @@ -462,12 +462,12 @@ } ], "source": [ - "roms_marbl_case=cstar.Case(\n", - " components=[roms_component,marbl_component],\n", - " name='roms_marbl_example_cstar_case',\n", - " caseroot=os.getcwd()+'/roms_marbl_example_cstar_case',\n", - " start_date='20120103 12:00:00',\n", - " end_date='20120131 12:00:00')\n", + "roms_marbl_case = cstar.Case(\n", + " components = [roms_component,marbl_component],\n", + " name = 'roms_marbl_example_cstar_case',\n", + " caseroot = os.getcwd()+'/roms_marbl_example_cstar_case',\n", + " start_date = '20120103 12:00:00',\n", + " end_date = '20120131 12:00:00')\n", "roms_marbl_case" ] }, @@ -567,7 +567,11 @@ ], "source": [ "%%bash\n", - "tree $roms_marbl_caseroot" + "if command -v tree >/dev/null 2>&1; then \n", + " tree $roms_marbl_caseroot; \n", + "else \n", + " echo \"tree is not installed on your system so you can't see this cell - sorry!\"\n", + "fi\n" ] }, { @@ -582,7 +586,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "id": "868f69f3-9f34-4105-942d-1926c15f96e5", "metadata": {}, "outputs": [], @@ -592,7 +596,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "id": "062668a5-4f25-4f20-a969-95c1a856026a", "metadata": {}, "outputs": [ @@ -635,27 +639,41 @@ " model_grid:\n", " files:\n", " - source: https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/roms_grd.nc\n", - " file_hash: fd537ef8159fabb18e38495ec8d44e2fa1b7fb615fcb1417dd4c0e1bb5f4e41d\n", + " hash: fd537ef8159fabb18e38495ec8d44e2fa1b7fb615fcb1417dd4c0e1bb5f4e41d\n", + " start_date: None\n", + " end_date: None\n", " initial_conditions:\n", " files:\n", " - source: https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/MARBL_rst.20120103120000.nc\n", - " file_hash: fc3bbd039256edc89c898efda0eebc5c53773995598d59310bc6d57f454a6ddd\n", + " hash: fc3bbd039256edc89c898efda0eebc5c53773995598d59310bc6d57f454a6ddd\n", + " start_date: None\n", + " end_date: None\n", " tidal_forcing:\n", " files:\n", " - source: https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/roms_tides.nc\n", - " file_hash: 90db174ab174909f9bf27c13fa19995c03f680bcb80e7d012268505b48590338\n", + " hash: 90db174ab174909f9bf27c13fa19995c03f680bcb80e7d012268505b48590338\n", + " start_date: None\n", + " end_date: None\n", " boundary_forcing:\n", " files:\n", " - source: https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/roms_bry_2012.nc\n", - " file_hash: c3b0e14aae6dd5a0d54703fa04cf95960c1970e732c0a230427bf8b0fbbd8bf1\n", + " hash: c3b0e14aae6dd5a0d54703fa04cf95960c1970e732c0a230427bf8b0fbbd8bf1\n", + " start_date: None\n", + " end_date: None\n", " - source: https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/roms_bry_bgc_MARBL.nc\n", - " file_hash: 897a8df8ed45841a98b3906f2dd07750decc5c2b50095ba648a855c869c7d3ee\n", + " hash: 897a8df8ed45841a98b3906f2dd07750decc5c2b50095ba648a855c869c7d3ee\n", + " start_date: None\n", + " end_date: None\n", " surface_forcing:\n", " files:\n", " - source: https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/roms_frc_bgc.nc\n", - " file_hash: 621dd23691d87aa93c5cc582daf6c5f18333ed062ff934777d50b63346c3f84d\n", + " hash: 621dd23691d87aa93c5cc582daf6c5f18333ed062ff934777d50b63346c3f84d\n", + " start_date: None\n", + " end_date: None\n", " - source: https://github.com/CWorthy-ocean/input_datasets_roms_marbl_example/raw/main/roms_frc.201201.nc\n", - " file_hash: 923049a9c2ab9ce77fa4a0211585e6848a12e87bf237e7aa310f693c3ac6abfa\n", + " hash: 923049a9c2ab9ce77fa4a0211585e6848a12e87bf237e7aa310f693c3ac6abfa\n", + " start_date: None\n", + " end_date: None\n", "- component:\n", " base_model:\n", " name: MARBL\n", @@ -764,7 +782,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 20, "id": "dde01e3d-352c-40b7-91f6-a9bbb38a859c", "metadata": {}, "outputs": [ @@ -772,47 +790,46 @@ "name": "stdout", "output_type": "stream", "text": [ - "MARBL_bgc.20120103120000.nc MARBL_rst.20120104120000.nc\n", - "MARBL_bgc_dia.20120103120000.nc MARBL_rst.20120105120000.nc\n", - "MARBL_his.20120103120000.nc MARBL_rst.20120106120000.nc\n", - "MARBL_his.20120104120000.nc MARBL_rst.20120107120000.nc\n", - "MARBL_his.20120105120000.nc MARBL_rst.20120108120000.nc\n", - "MARBL_his.20120106120000.nc MARBL_rst.20120109120000.nc\n", - "MARBL_his.20120107120000.nc MARBL_rst.20120110120000.nc\n", - "MARBL_his.20120108120000.nc MARBL_rst.20120111120000.nc\n", - "MARBL_his.20120109120000.nc MARBL_rst.20120112120000.nc\n", - "MARBL_his.20120110120000.nc MARBL_rst.20120113120000.nc\n", - "MARBL_his.20120111120000.nc MARBL_rst.20120114120000.nc\n", - "MARBL_his.20120112120000.nc MARBL_rst.20120115120000.nc\n", - "MARBL_his.20120113120000.nc MARBL_rst.20120116120000.nc\n", - "MARBL_his.20120114120000.nc MARBL_rst.20120117120000.nc\n", - "MARBL_his.20120115120000.nc MARBL_rst.20120118120000.nc\n", - "MARBL_his.20120116120000.nc MARBL_rst.20120119120000.nc\n", - "MARBL_his.20120117120000.nc MARBL_rst.20120120120000.nc\n", - "MARBL_his.20120118120000.nc MARBL_rst.20120121120000.nc\n", - "MARBL_his.20120119120000.nc MARBL_rst.20120122120000.nc\n", - "MARBL_his.20120120120000.nc MARBL_rst.20120123120000.nc\n", - "MARBL_his.20120121120000.nc MARBL_rst.20120124120000.nc\n", - "MARBL_his.20120122120000.nc MARBL_rst.20120125120000.nc\n", - "MARBL_his.20120123120000.nc MARBL_rst.20120126120000.nc\n", - "MARBL_his.20120124120000.nc MARBL_rst.20120127120000.nc\n", - "MARBL_his.20120125120000.nc MARBL_rst.20120128120000.nc\n", - "MARBL_his.20120126120000.nc MARBL_rst.20120129120000.nc\n", - "MARBL_his.20120127120000.nc MARBL_rst.20120130120000.nc\n", - "MARBL_his.20120128120000.nc MARBL_rst.20120131120000.nc\n", - "MARBL_his.20120129120000.nc \u001b[34mPARTITIONED\u001b[m\u001b[m/\n", - "MARBL_his.20120130120000.nc analyse.py\n", - "MARBL_his.20120131120000.nc analyse.py~\n" + "MARBL_bgc.20120103120000.nc MARBL_his.20120131120000.nc\n", + "MARBL_bgc_dia.20120103120000.nc MARBL_rst.20120104120000.nc\n", + "MARBL_his.20120103120000.nc MARBL_rst.20120105120000.nc\n", + "MARBL_his.20120104120000.nc MARBL_rst.20120106120000.nc\n", + "MARBL_his.20120105120000.nc MARBL_rst.20120107120000.nc\n", + "MARBL_his.20120106120000.nc MARBL_rst.20120108120000.nc\n", + "MARBL_his.20120107120000.nc MARBL_rst.20120109120000.nc\n", + "MARBL_his.20120108120000.nc MARBL_rst.20120110120000.nc\n", + "MARBL_his.20120109120000.nc MARBL_rst.20120111120000.nc\n", + "MARBL_his.20120110120000.nc MARBL_rst.20120112120000.nc\n", + "MARBL_his.20120111120000.nc MARBL_rst.20120113120000.nc\n", + "MARBL_his.20120112120000.nc MARBL_rst.20120114120000.nc\n", + "MARBL_his.20120113120000.nc MARBL_rst.20120115120000.nc\n", + "MARBL_his.20120114120000.nc MARBL_rst.20120116120000.nc\n", + "MARBL_his.20120115120000.nc MARBL_rst.20120117120000.nc\n", + "MARBL_his.20120116120000.nc MARBL_rst.20120118120000.nc\n", + "MARBL_his.20120117120000.nc MARBL_rst.20120119120000.nc\n", + "MARBL_his.20120118120000.nc MARBL_rst.20120120120000.nc\n", + "MARBL_his.20120119120000.nc MARBL_rst.20120121120000.nc\n", + "MARBL_his.20120120120000.nc MARBL_rst.20120122120000.nc\n", + "MARBL_his.20120121120000.nc MARBL_rst.20120123120000.nc\n", + "MARBL_his.20120122120000.nc MARBL_rst.20120124120000.nc\n", + "MARBL_his.20120123120000.nc MARBL_rst.20120125120000.nc\n", + "MARBL_his.20120124120000.nc MARBL_rst.20120126120000.nc\n", + "MARBL_his.20120125120000.nc MARBL_rst.20120127120000.nc\n", + "MARBL_his.20120126120000.nc MARBL_rst.20120128120000.nc\n", + "MARBL_his.20120127120000.nc MARBL_rst.20120129120000.nc\n", + "MARBL_his.20120128120000.nc MARBL_rst.20120130120000.nc\n", + "MARBL_his.20120129120000.nc MARBL_rst.20120131120000.nc\n", + "MARBL_his.20120130120000.nc \u001b[34mPARTITIONED\u001b[m\u001b[m/\n" ] } ], "source": [ - "ls /Users/dafyddstephenson/Code/my_c_star/cstar_ocean/roms_marbl_example_cstar_case/output/" + "ls {roms_marbl_case.caseroot}/output/" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 22, "id": "6e52ba38-986d-4943-8301-33e0a682ca23", "metadata": {}, "outputs": [ @@ -828,6 +845,7 @@ } ], "source": [ + "%matplotlib inline\n", "import numpy as np\n", "import xarray as xr\n", "import matplotlib.pyplot as plt\n", From b032dccde617fe61ba0c33a89b75d72333d8b228 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CDafydd?= <“dafydd_stephenson@hotmail.com”> Date: Mon, 19 Aug 2024 13:21:04 -0600 Subject: [PATCH 5/5] update docstring for InputDataset.get() --- cstar_ocean/input_dataset.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/cstar_ocean/input_dataset.py b/cstar_ocean/input_dataset.py index cc44974..b224388 100644 --- a/cstar_ocean/input_dataset.py +++ b/cstar_ocean/input_dataset.py @@ -100,9 +100,12 @@ def __repr__(self): def get(self, local_dir: str): """ - Fetch the file containing this input dataset and save it within `local_dir/input_datasets` using Pooch. + Make the file containing this input dataset available in `local_dir/input_datasets` - This method updates the `local_path` attribute of the calling InputDataset object + If InputDataset.source is... + - ...a local path: create a symbolic link to the file in `local_dir/input_datasets`. + - ...a URL: fetch the file to `local_dir/input_datasets` using Pooch + (updating the `local_path` attribute of the calling InputDataset) Parameters: ----------- @@ -114,6 +117,7 @@ def get(self, local_dir: str): os.makedirs(tgt_dir, exist_ok=True) tgt_path = tgt_dir + os.path.basename(self.source) + # If the file is somewhere else on the system, make a symbolic link where we want it if self.exists_locally: assert ( self.local_path is not None @@ -126,7 +130,6 @@ def get(self, local_dir: str): ) # TODO maybe this should check the hash and just `return` if it matches? else: - # If the file is somewhere else on the system, make a symbolic link where we want it os.symlink(self.local_path, tgt_path) return else: @@ -134,6 +137,7 @@ def get(self, local_dir: str): return else: + # Otherwise, download the file # NOTE: default timeout was leading to a lot of timeouterrors downloader = pooch.HTTPDownloader(timeout=120) to_fetch = pooch.create(