From 5884143a54cfedcc5945a1286f8e5cbe68ed72f7 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 2 Jul 2019 09:52:25 +0100 Subject: [PATCH 01/13] Updated to match xarray v0.12.2 onwards --- xbout/load.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xbout/load.py b/xbout/load.py index 7f914308..bc8e78b6 100644 --- a/xbout/load.py +++ b/xbout/load.py @@ -20,8 +20,9 @@ def _auto_open_mfboutdataset(datapath, chunks={}, info=True, keep_guards=True): _preprocess = partial(_trim, ghosts={'x': mxg, 'y': myg}) ds = xarray.open_mfdataset(paths_grid, concat_dim=concat_dims, - data_vars='minimal', preprocess=_preprocess, - engine=filetype, chunks=chunks) + combine='nested', data_vars='minimal', + preprocess=_preprocess, engine=filetype, + chunks=chunks) ds, metadata = _strip_metadata(ds) From dec228c9d798fddf546592fa9107ecc494d2f6b5 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 2 Jul 2019 09:59:50 +0100 Subject: [PATCH 02/13] Updated requirements --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e0a9ca3d..2df45747 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -git+https://github.com/TomNicholas/xarray@feature/nd_combine#egg=xarray +xarray >= 0.12.2 dask[array] >= 1.0.0 natsort >= 5.5.0 matplotlib >= 2.2 From 766c5b09c0cdcf3f52f9372936a97886d0cda5b5 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 2 Jul 2019 10:00:15 +0100 Subject: [PATCH 03/13] Updated readme to describe dependencies correctly --- README.md | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/README.md b/README.md index ae99e6e0..8ba65879 100644 --- a/README.md +++ b/README.md @@ -101,13 +101,7 @@ install using `python setup.py` You can run the tests by navigating to the `/xBOUT/` directory and entering `pytest`. - -It relies on two upstream additions to xarray -([first](https://github.com/pydata/xarray/pull/2482) & -[second](https://github.com/pydata/xarray/pull/2553) pull requests). -The first is merged, but the second isn't, so for now you need to clone -the branch of xarray containing the PR -[here](https://github.com/TomNicholas/xarray/tree/feature/nd_combine). +Requires xarray v0.12.2 or later. You will also need to install [dask](https://dask.org/), as described in the xarray documentation From 8328c98eed02b07f16faeb168399ce3091ea8c19 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 15 Jul 2019 09:05:02 +0100 Subject: [PATCH 04/13] Drop BOUT++ timing information before concatenating Timing information is not consistent between processes and therefore cannot be concatenated. --- xbout/load.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/xbout/load.py b/xbout/load.py index bc8e78b6..cc3bc8fa 100644 --- a/xbout/load.py +++ b/xbout/load.py @@ -8,6 +8,9 @@ from natsort import natsorted +_bout_timing_variables = ['wall_time', 'wtime', 'wtime_rhs', 'wtime_invert', + 'wtime_comms', 'wtime_io', 'wtime_per_rhs', 'wtime_per_rhs_e', + 'wtime_per_rhs_i'] def _auto_open_mfboutdataset(datapath, chunks={}, info=True, keep_guards=True): filepaths, filetype = _expand_filepaths(datapath) @@ -155,6 +158,8 @@ def _trim(ds, ghosts={}, keep_guards=True): """ Trims all ghost and guard cells off a single dataset read from a single BOUT dump file, to prepare for concatenation. + Also drops some variables that store timing information, which are different for each + process and so cannot be concatenated. Parameters ---------- @@ -173,6 +178,10 @@ def _trim(ds, ghosts={}, keep_guards=True): selection[dim] = slice(ghosts[dim], -ghosts[dim]) trimmed_ds = ds.isel(**selection) + + vars_to_drop = [v for v in _bout_timing_variables if v in ds] + trimmed_ds = trimmed_ds.drop(vars_to_drop) + return trimmed_ds From 9900bf6bf51d714f42958ddafef7c22a62918d0a Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 15 Jul 2019 09:26:29 +0100 Subject: [PATCH 05/13] Test for dropping timing information --- xbout/tests/test_load.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/xbout/tests/test_load.py b/xbout/tests/test_load.py index 6a2eb3d0..e8c93f90 100644 --- a/xbout/tests/test_load.py +++ b/xbout/tests/test_load.py @@ -353,3 +353,23 @@ def test_trim_ghosts(self): selection = {'time': slice(2, -2)} expected = ds.isel(**selection) xrt.assert_equal(expected, actual) + + def test_trim_timing_info(self): + ds = create_test_data(0) + from xbout.load import _bout_timing_variables + + # remove a couple of entries from _bout_timing_variables so we test that _trim + # does not fail if not all of them are present + _bout_timing_variables = _bout_timing_variables[:-2] + + for v in _bout_timing_variables: + ds[v] = 42. + expected = create_test_data(0) + try: + xrt.assert_equal(ds, expected) + except AssertionError: + pass + else: + assert False, "ds has had variables added, should be different from expected" + ds = _trim(ds) + xrt.assert_equal(ds, expected) From eb43c0a7b86bfd2cd393c95a9b59c8f5b9cef950 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 15 Jul 2019 09:38:02 +0100 Subject: [PATCH 06/13] Simplify ignoring not-present timing variables when dropping --- xbout/load.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xbout/load.py b/xbout/load.py index cc3bc8fa..85c2bf87 100644 --- a/xbout/load.py +++ b/xbout/load.py @@ -179,8 +179,7 @@ def _trim(ds, ghosts={}, keep_guards=True): trimmed_ds = ds.isel(**selection) - vars_to_drop = [v for v in _bout_timing_variables if v in ds] - trimmed_ds = trimmed_ds.drop(vars_to_drop) + trimmed_ds = trimmed_ds.drop(_bout_timing_variables, errors='ignore') return trimmed_ds From d3a22571c7aeeb95a12fd59340ef986932fd25ce Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 15 Jul 2019 11:50:26 +0100 Subject: [PATCH 07/13] Tidy up trimming of timing variables - Make _BOUT_TIMING_VARIABLES all-uppercase as it is a global constant. - Remove attempt to test that some timing variables have been added to a test dataset before removing them. --- xbout/load.py | 4 ++-- xbout/tests/test_load.py | 17 ++++++----------- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/xbout/load.py b/xbout/load.py index 85c2bf87..6eb5efdd 100644 --- a/xbout/load.py +++ b/xbout/load.py @@ -8,7 +8,7 @@ from natsort import natsorted -_bout_timing_variables = ['wall_time', 'wtime', 'wtime_rhs', 'wtime_invert', +_BOUT_TIMING_VARIABLES = ['wall_time', 'wtime', 'wtime_rhs', 'wtime_invert', 'wtime_comms', 'wtime_io', 'wtime_per_rhs', 'wtime_per_rhs_e', 'wtime_per_rhs_i'] @@ -179,7 +179,7 @@ def _trim(ds, ghosts={}, keep_guards=True): trimmed_ds = ds.isel(**selection) - trimmed_ds = trimmed_ds.drop(_bout_timing_variables, errors='ignore') + trimmed_ds = trimmed_ds.drop(_BOUT_TIMING_VARIABLES, errors='ignore') return trimmed_ds diff --git a/xbout/tests/test_load.py b/xbout/tests/test_load.py index e8c93f90..a3056cc6 100644 --- a/xbout/tests/test_load.py +++ b/xbout/tests/test_load.py @@ -356,20 +356,15 @@ def test_trim_ghosts(self): def test_trim_timing_info(self): ds = create_test_data(0) - from xbout.load import _bout_timing_variables + from xbout.load import _BOUT_TIMING_VARIABLES - # remove a couple of entries from _bout_timing_variables so we test that _trim + # remove a couple of entries from _BOUT_TIMING_VARIABLES so we test that _trim # does not fail if not all of them are present - _bout_timing_variables = _bout_timing_variables[:-2] + _BOUT_TIMING_VARIABLES = _BOUT_TIMING_VARIABLES[:-2] - for v in _bout_timing_variables: + for v in _BOUT_TIMING_VARIABLES: ds[v] = 42. - expected = create_test_data(0) - try: - xrt.assert_equal(ds, expected) - except AssertionError: - pass - else: - assert False, "ds has had variables added, should be different from expected" ds = _trim(ds) + + expected = create_test_data(0) xrt.assert_equal(ds, expected) From 017e7b3f3282c3d00cff6c930338acf9af062deb Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 22 Jul 2019 22:01:28 +0100 Subject: [PATCH 08/13] Fix test_trim_timing_info _trim was missing a required argument. --- xbout/tests/test_load.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xbout/tests/test_load.py b/xbout/tests/test_load.py index a56a734c..74131568 100644 --- a/xbout/tests/test_load.py +++ b/xbout/tests/test_load.py @@ -455,7 +455,7 @@ def test_trim_timing_info(self): for v in _BOUT_TIMING_VARIABLES: ds[v] = 42. - ds = _trim(ds) + ds = _trim(ds, {}) expected = create_test_data(0) xrt.assert_equal(ds, expected) From dfabc7a5c59b14442fe34441868a1270031f68d4 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 22 Jul 2019 21:35:34 +0100 Subject: [PATCH 09/13] Use '=None' rather than '={}' for default arguments in _trim --- xbout/load.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/xbout/load.py b/xbout/load.py index 5e67589b..1bd066d5 100644 --- a/xbout/load.py +++ b/xbout/load.py @@ -159,7 +159,7 @@ def _arrange_for_concatenation(filepaths, nxpe=1, nype=1): return paths_grid, concat_dims -def _trim(ds, ghosts, guards={}, keep_guards={}, nxpe=1, nype=1): +def _trim(ds, ghosts, guards=None, keep_guards=None, nxpe=1, nype=1): """ Trims all ghost and guard cells off a single dataset read from a single BOUT dump file, to prepare for concatenation. @@ -177,6 +177,12 @@ def _trim(ds, ghosts, guards={}, keep_guards={}, nxpe=1, nype=1): {'x': True, 'y': False} """ + if guards is None: + guards = {} + + if keep_guards is None: + keep_guards = {} + if any(keep_guards.values()): # Work out if this particular dataset contains any guard cells # Relies on a change to xarray so datasets always have source encoding From 50a4afa39248537ee3577a738a4fb726911f72de Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 22 Jul 2019 21:41:29 +0100 Subject: [PATCH 10/13] Make all arguments to _trim non-optional Since _trim is a private method, and in the only place it is used all the arguments are passed, it is clearer not to handle default arguments. --- xbout/boutdataset.py | 8 ++-- xbout/load.py | 78 +++++++++++++++------------------ xbout/tests/test_boutdataset.py | 6 +-- xbout/tests/test_load.py | 66 +++++++++++++++------------- 4 files changed, 78 insertions(+), 80 deletions(-) diff --git a/xbout/boutdataset.py b/xbout/boutdataset.py index 8551e809..58eb8a7f 100644 --- a/xbout/boutdataset.py +++ b/xbout/boutdataset.py @@ -31,7 +31,7 @@ def open_boutdataset(datapath='./BOUT.dmp.*.nc', inputfilepath=None, gridfilepath=None, chunks={}, - keep_xguards=True, keep_yguards=False, + keep_xboundaries=True, keep_yboundaries=False, run_name=None, info=True): """ Load a dataset from a set of BOUT output files, including the input options file. @@ -56,8 +56,8 @@ def open_boutdataset(datapath='./BOUT.dmp.*.nc', # Gather pointers to all numerical data from BOUT++ output files ds, metadata = _auto_open_mfboutdataset(datapath=datapath, chunks=chunks, - keep_xguards=keep_xguards, - keep_yguards=keep_yguards) + keep_xboundaries=keep_xboundaries, + keep_yboundaries=keep_yboundaries) ds = _set_attrs_on_all_vars(ds, 'metadata', metadata) @@ -235,7 +235,7 @@ def to_restart(self, savepath='.', nxpe=None, nype=None, else: nxpe, nype = self.metadata['NXPE'], self.metadata['NYPE'] - # Is this even possible without saving the ghost cells? + # Is this even possible without saving the guard cells? # Can they be recreated? restart_datasets, paths = _split_into_restarts(self.data, savepath, nxpe, nype) diff --git a/xbout/load.py b/xbout/load.py index 1bd066d5..149200b7 100644 --- a/xbout/load.py +++ b/xbout/load.py @@ -13,7 +13,7 @@ 'wtime_per_rhs_i'] def _auto_open_mfboutdataset(datapath, chunks={}, info=True, - keep_xguards=False, keep_yguards=False): + keep_xboundaries=False, keep_yboundaries=False): filepaths, filetype = _expand_filepaths(datapath) # Open just one file to read processor splitting @@ -21,9 +21,9 @@ def _auto_open_mfboutdataset(datapath, chunks={}, info=True, paths_grid, concat_dims = _arrange_for_concatenation(filepaths, nxpe, nype) - _preprocess = partial(_trim, ghosts={'x': mxg, 'y': myg}, - guards={'x': mxg, 'y': myg}, - keep_guards={'x': keep_xguards, 'y': keep_yguards}, + _preprocess = partial(_trim, guards={'x': mxg, 'y': myg}, + boundary_cells={'x': mxg, 'y': myg}, + keep_boundaries={'x': keep_xboundaries, 'y': keep_yboundaries}, nxpe=nxpe, nype=nype) # TODO warning message to make sure user knows if it's parallelized @@ -159,58 +159,52 @@ def _arrange_for_concatenation(filepaths, nxpe=1, nype=1): return paths_grid, concat_dims -def _trim(ds, ghosts, guards=None, keep_guards=None, nxpe=1, nype=1): +def _trim(ds, *, guards, boundary_cells, keep_boundaries, nxpe, nype): """ - Trims all ghost and guard cells off a single dataset read from a single - BOUT dump file, to prepare for concatenation. + Trims all guard (and optionally boundary) cells off a single dataset read from a + single BOUT dump file, to prepare for concatenation. Also drops some variables that store timing information, which are different for each process and so cannot be concatenated. Parameters ---------- - ghosts : dict, optional - Number of ghost cells along each dimension, e.g. {'x': 2, 't': 0} - guards : dict, optional - Number of guard cells along each dimension, e.g. {'x': 2, 'y': 2} - keep_guards : dict, optional - Whether or not to preserve the guard cells along each dimension, e.g. + guards : dict + Number of guard cells along each dimension, e.g. {'x': 2, 't': 0} + boundary_cells : dict + Number of boundary cells along each dimension, e.g. {'x': 2, 'y': 2} + keep_boundaries : dict + Whether or not to preserve the boundary cells along each dimension, e.g. {'x': True, 'y': False} """ - if guards is None: - guards = {} - - if keep_guards is None: - keep_guards = {} - - if any(keep_guards.values()): - # Work out if this particular dataset contains any guard cells + if any(keep_boundaries.values()): + # Work out if this particular dataset contains any boundary cells # Relies on a change to xarray so datasets always have source encoding # See xarray GH issue #2550 - lower_guards, upper_guards = _infer_contains_guards( + lower_boundaries, upper_boundaries = _infer_contains_boundaries( ds.encoding['source'], nxpe, nype) else: - lower_guards, upper_guards = {}, {} + lower_boundaries, upper_boundaries = {}, {} selection = {} for dim in ds.dims: - # Check for guard cells, otherwise use ghost cells, else leave alone - if keep_guards.get(dim, False): - if lower_guards.get(dim, False): + # Check for boundary cells, otherwise use guard cells, else leave alone + if keep_boundaries.get(dim, False): + if lower_boundaries.get(dim, False): lower = None else: - lower = max(ghosts[dim], guards[dim]) - elif ghosts.get(dim, False): - lower = ghosts[dim] + lower = max(guards[dim], boundary_cells[dim]) + elif guards.get(dim, False): + lower = guards[dim] else: lower = None - if keep_guards.get(dim, False): - if upper_guards.get(dim, False): + if keep_boundaries.get(dim, False): + if upper_boundaries.get(dim, False): upper = None else: - upper = -max(ghosts[dim], guards[dim]) - elif ghosts.get(dim, False): - upper = -ghosts[dim] + upper = -max(guards[dim], boundary_cells[dim]) + elif guards.get(dim, False): + upper = -guards[dim] else: upper = None selection[dim] = slice(lower, upper) @@ -222,10 +216,10 @@ def _trim(ds, ghosts, guards=None, keep_guards=None, nxpe=1, nype=1): return trimmed_ds -def _infer_contains_guards(filename, nxpe, nype): +def _infer_contains_boundaries(filename, nxpe, nype): """ Uses the name of the output file and the domain decomposition to work out - whether this dataset contains guard (boundary) cells, and on which side. + whether this dataset contains boundary cells, and on which side. Uses knowledge that BOUT names its output files as /folder/prefix.num.nc, with a numbering scheme @@ -235,15 +229,15 @@ def _infer_contains_guards(filename, nxpe, nype): *prefix, filenum, extension = Path(filename).suffixes filenum = int(filenum.replace('.', '')) - lower_guards, upper_guards = {}, {} + lower_boundaries, upper_boundaries = {}, {} - lower_guards['x'] = filenum % nxpe == 0 - upper_guards['x'] = filenum % nxpe == nxpe-1 + lower_boundaries['x'] = filenum % nxpe == 0 + upper_boundaries['x'] = filenum % nxpe == nxpe-1 - lower_guards['y'] = filenum < nxpe - upper_guards['y'] = filenum >= (nype-1)*nxpe + lower_boundaries['y'] = filenum < nxpe + upper_boundaries['y'] = filenum >= (nype-1)*nxpe - return lower_guards, upper_guards + return lower_boundaries, upper_boundaries def _strip_metadata(ds): diff --git a/xbout/tests/test_boutdataset.py b/xbout/tests/test_boutdataset.py index 532abe5e..b8bbaa44 100644 --- a/xbout/tests/test_boutdataset.py +++ b/xbout/tests/test_boutdataset.py @@ -21,17 +21,17 @@ class TestBoutDatasetIsXarrayDataset: def test_concat(self, tmpdir_factory, bout_xyt_example_files): path1 = bout_xyt_example_files(tmpdir_factory, nxpe=3, nype=4, nt=1) bd1 = open_boutdataset(datapath=path1, inputfilepath=None, - keep_xguards=False) + keep_xboundaries=False) path2 = bout_xyt_example_files(tmpdir_factory, nxpe=3, nype=4, nt=1) bd2 = open_boutdataset(datapath=path2, inputfilepath=None, - keep_xguards=False) + keep_xboundaries=False) result = concat([bd1, bd2], dim='run') assert result.dims == {**bd1.dims, 'run': 2} def test_isel(self, tmpdir_factory, bout_xyt_example_files): path = bout_xyt_example_files(tmpdir_factory, nxpe=1, nype=1, nt=1) bd = open_boutdataset(datapath=path, inputfilepath=None, - keep_xguards=False) + keep_xboundaries=False) actual = bd.isel(x=slice(None,None,2)) expected = bd.bout.data.isel(x=slice(None,None,2)) xrt.assert_equal(actual, expected) diff --git a/xbout/tests/test_load.py b/xbout/tests/test_load.py index 74131568..27c31276 100644 --- a/xbout/tests/test_load.py +++ b/xbout/tests/test_load.py @@ -13,7 +13,7 @@ from xbout.load import _check_filetype, _expand_wildcards, _expand_filepaths,\ _arrange_for_concatenation, _trim, _strip_metadata, \ - _auto_open_mfboutdataset, _infer_contains_guards + _auto_open_mfboutdataset, _infer_contains_boundaries def test_check_extensions(tmpdir): @@ -168,7 +168,7 @@ def bout_xyt_example_files(tmpdir_factory): def _bout_xyt_example_files(tmpdir_factory, prefix='BOUT.dmp', lengths=(2,4,7,6), - nxpe=4, nype=2, nt=1, ghosts={}, guards={}, syn_data_type='random'): + nxpe=4, nype=2, nt=1, guards={}, boundary_cells={}, syn_data_type='random'): """ Mocks up a set of BOUT-like netCDF files, and return the temporary test directory containing them. @@ -178,7 +178,7 @@ def _bout_xyt_example_files(tmpdir_factory, prefix='BOUT.dmp', lengths=(2,4,7,6) save_dir = tmpdir_factory.mktemp("data") ds_list, file_list = create_bout_ds_list(prefix=prefix, lengths=lengths, nxpe=nxpe, nype=nype, nt=nt, - ghosts=ghosts, guards=guards, syn_data_type=syn_data_type) + guards=guards, boundary_cells=boundary_cells, syn_data_type=syn_data_type) for ds, file_name in zip(ds_list, file_list): ds.to_netcdf(str(save_dir.join(str(file_name)))) @@ -196,7 +196,8 @@ def _bout_xyt_example_files(tmpdir_factory, prefix='BOUT.dmp', lengths=(2,4,7,6) return glob_pattern -def create_bout_ds_list(prefix, lengths=(2,4,7,6), nxpe=4, nype=2, nt=1, ghosts={}, guards={}, syn_data_type='random'): +def create_bout_ds_list(prefix, lengths=(2,4,7,6), nxpe=4, nype=2, nt=1, guards={}, + boundary_cells={}, syn_data_type='random'): """ Mocks up a set of BOUT-like datasets. @@ -211,21 +212,21 @@ def create_bout_ds_list(prefix, lengths=(2,4,7,6), nxpe=4, nype=2, nt=1, ghosts= filename = prefix + "." + str(num) + ".nc" file_list.append(filename) - # Include ghost cells - upper_bndry_cells = {dim: ghosts.get(dim) for dim in ghosts.keys()} - lower_bndry_cells = {dim: ghosts.get(dim) for dim in ghosts.keys()} - # Include guard cells + upper_bndry_cells = {dim: guards.get(dim) for dim in guards.keys()} + lower_bndry_cells = {dim: guards.get(dim) for dim in guards.keys()} + + # Include boundary cells for dim in ['x', 'y']: - if dim in guards.keys(): + if dim in boundary_cells.keys(): if i == 0: - lower_bndry_cells[dim] = guards[dim] + lower_bndry_cells[dim] = boundary_cells[dim] if i == nxpe-1: - upper_bndry_cells[dim] = guards[dim] + upper_bndry_cells[dim] = boundary_cells[dim] ds = create_bout_ds(syn_data_type=syn_data_type, num=num, lengths=lengths, nxpe=nxpe, nype=nype, upper_bndry_cells=upper_bndry_cells, lower_bndry_cells=lower_bndry_cells, - guards=guards, ghosts=ghosts) + boundary_cells=boundary_cells, guards=guards) ds_list.append(ds) # Sort this in order of num to remove any BOUT-specific structure @@ -236,7 +237,8 @@ def create_bout_ds_list(prefix, lengths=(2,4,7,6), nxpe=4, nype=2, nt=1, ghosts= def create_bout_ds(syn_data_type='random', lengths=(2,4,7,6), num=0, nxpe=1, nype=1, - upper_bndry_cells={}, lower_bndry_cells={}, guards={}, ghosts={}): + upper_bndry_cells={}, lower_bndry_cells={}, boundary_cells={}, + guards={}): # Set the shape of the data in this dataset x_length, y_length, z_length, t_length = lengths @@ -269,11 +271,11 @@ def create_bout_ds(syn_data_type='random', lengths=(2,4,7,6), num=0, nxpe=1, nyp # Include metadata ds['NXPE'] = nxpe ds['NYPE'] = nype - ds['MXG'] = guards.get('x', 0) - ds['MYG'] = guards.get('y', 0) + ds['MXG'] = boundary_cells.get('x', 0) + ds['MYG'] = boundary_cells.get('y', 0) ds['nx'] = x_length - ds['MXSUB'] = ghosts.get('x', 0) - ds['MYSUB'] = ghosts.get('y', 0) + ds['MXSUB'] = guards.get('x', 0) + ds['MYSUB'] = guards.get('y', 0) ds['MZ'] = z_length return ds @@ -294,7 +296,7 @@ def test_strip_metadata(self): assert metadata['NXPE'] == 1 -# TODO also test loading multiple files which have ghost cells +# TODO also test loading multiple files which have guard cells class TestCombineNoTrim: def test_single_file(self, tmpdir_factory, bout_xyt_example_files): path = bout_xyt_example_files(tmpdir_factory, nxpe=1, nype=1, nt=1) @@ -346,19 +348,21 @@ def test_no_trim(self): ds = create_test_data(0) # Manually add filename - encoding normally added by xr.open_dataset ds.encoding['source'] = 'folder0/BOUT.dmp.0.nc' - actual = _trim(ds, ghosts={}, guards={}, keep_guards={}) + actual = _trim(ds, guards={}, boundary_cells={}, keep_boundaries={}, nxpe=1, + nype=1) xrt.assert_equal(actual, ds) - def test_trim_ghosts(self): + def test_trim_guards(self): ds = create_test_data(0) # Manually add filename - encoding normally added by xr.open_dataset ds.encoding['source'] = 'folder0/BOUT.dmp.0.nc' - actual = _trim(ds, ghosts={'time': 2}) + actual = _trim(ds, guards={'time': 2}, boundary_cells={}, keep_boundaries={}, + nxpe=1, nype=1) selection = {'time': slice(2, -2)} expected = ds.isel(**selection) xrt.assert_equal(expected, actual) - @pytest.mark.parametrize("filenum, nxpe, nype, lower_guards, upper_guards", + @pytest.mark.parametrize("filenum, nxpe, nype, lower_boundaries, upper_boundaries", # no parallelization [(0, 1, 1, {'x': True, 'y': True}, {'x': True, 'y': True}), @@ -414,8 +418,8 @@ def test_trim_ghosts(self): (10, 3, 4, {'x': False, 'y': False}, {'x': False, 'y': True}) ]) - def test_infer_guards_2d_parallelization(self, filenum, nxpe, nype, - lower_guards, upper_guards): + def test_infer_boundaries_2d_parallelization(self, filenum, nxpe, nype, + lower_boundaries, upper_boundaries): """ Numbering scheme for nxpe=3, nype=4 @@ -427,21 +431,21 @@ def test_infer_guards_2d_parallelization(self, filenum, nxpe, nype, """ filename = "folder0/BOUT.dmp." + str(filenum) + ".nc" - actual_lower_guards, actual_upper_guards = _infer_contains_guards( + actual_lower_boundaries, actual_upper_boundaries = _infer_contains_boundaries( filename, nxpe, nype) - assert actual_lower_guards == lower_guards - assert actual_upper_guards == upper_guards + assert actual_lower_boundaries == lower_boundaries + assert actual_upper_boundaries == upper_boundaries - def test_keep_xguards(self): + def test_keep_xboundaries(self): ds = create_test_data(0) ds = ds.rename({'dim2': 'x'}) # Manually add filename - encoding normally added by xr.open_dataset ds.encoding['source'] = 'folder0/BOUT.dmp.0.nc' - actual = _trim(ds, ghosts={'x': 2}, guards={'x': 2}, - keep_guards={'x': True}, nxpe=1, nype=1) + actual = _trim(ds, guards={'x': 2}, boundary_cells={'x': 2}, + keep_boundaries={'x': True}, nxpe=1, nype=1) expected = ds # Should be unchanged xrt.assert_equal(expected, actual) @@ -455,7 +459,7 @@ def test_trim_timing_info(self): for v in _BOUT_TIMING_VARIABLES: ds[v] = 42. - ds = _trim(ds, {}) + ds = _trim(ds, guards={}, boundary_cells={}, keep_boundaries={}, nxpe=1, nype=1) expected = create_test_data(0) xrt.assert_equal(ds, expected) From 8301fe0e9d64e4ddac6c0eb21ba9fc3d8efec250 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 22 Jul 2019 21:57:06 +0100 Subject: [PATCH 11/13] Remove separate guard and boundary cell count inputs In BOUT++ there are always the same number of guard cells and boundary cells in a given direction, so do not need to specify the number of boundary cells separately. --- xbout/load.py | 11 +++-------- xbout/tests/test_load.py | 30 ++++++++++++++---------------- 2 files changed, 17 insertions(+), 24 deletions(-) diff --git a/xbout/load.py b/xbout/load.py index 149200b7..4e402110 100644 --- a/xbout/load.py +++ b/xbout/load.py @@ -22,7 +22,6 @@ def _auto_open_mfboutdataset(datapath, chunks={}, info=True, paths_grid, concat_dims = _arrange_for_concatenation(filepaths, nxpe, nype) _preprocess = partial(_trim, guards={'x': mxg, 'y': myg}, - boundary_cells={'x': mxg, 'y': myg}, keep_boundaries={'x': keep_xboundaries, 'y': keep_yboundaries}, nxpe=nxpe, nype=nype) @@ -93,8 +92,6 @@ def _expand_wildcards(path): def _read_splitting(filepath, info=True): ds = xarray.open_dataset(str(filepath)) - # TODO check that BOUT doesn't ever set the number of guards to be different to the number of ghosts - # Account for case of no parallelisation, when nxpe etc won't be in dataset def get_scalar(ds, key, default=1, info=True): if key in ds: @@ -159,7 +156,7 @@ def _arrange_for_concatenation(filepaths, nxpe=1, nype=1): return paths_grid, concat_dims -def _trim(ds, *, guards, boundary_cells, keep_boundaries, nxpe, nype): +def _trim(ds, *, guards, keep_boundaries, nxpe, nype): """ Trims all guard (and optionally boundary) cells off a single dataset read from a single BOUT dump file, to prepare for concatenation. @@ -170,8 +167,6 @@ def _trim(ds, *, guards, boundary_cells, keep_boundaries, nxpe, nype): ---------- guards : dict Number of guard cells along each dimension, e.g. {'x': 2, 't': 0} - boundary_cells : dict - Number of boundary cells along each dimension, e.g. {'x': 2, 'y': 2} keep_boundaries : dict Whether or not to preserve the boundary cells along each dimension, e.g. {'x': True, 'y': False} @@ -193,7 +188,7 @@ def _trim(ds, *, guards, boundary_cells, keep_boundaries, nxpe, nype): if lower_boundaries.get(dim, False): lower = None else: - lower = max(guards[dim], boundary_cells[dim]) + lower = guards[dim] elif guards.get(dim, False): lower = guards[dim] else: @@ -202,7 +197,7 @@ def _trim(ds, *, guards, boundary_cells, keep_boundaries, nxpe, nype): if upper_boundaries.get(dim, False): upper = None else: - upper = -max(guards[dim], boundary_cells[dim]) + upper = -guards[dim] elif guards.get(dim, False): upper = -guards[dim] else: diff --git a/xbout/tests/test_load.py b/xbout/tests/test_load.py index 27c31276..f868509d 100644 --- a/xbout/tests/test_load.py +++ b/xbout/tests/test_load.py @@ -168,7 +168,7 @@ def bout_xyt_example_files(tmpdir_factory): def _bout_xyt_example_files(tmpdir_factory, prefix='BOUT.dmp', lengths=(2,4,7,6), - nxpe=4, nype=2, nt=1, guards={}, boundary_cells={}, syn_data_type='random'): + nxpe=4, nype=2, nt=1, guards={}, syn_data_type='random'): """ Mocks up a set of BOUT-like netCDF files, and return the temporary test directory containing them. @@ -178,7 +178,7 @@ def _bout_xyt_example_files(tmpdir_factory, prefix='BOUT.dmp', lengths=(2,4,7,6) save_dir = tmpdir_factory.mktemp("data") ds_list, file_list = create_bout_ds_list(prefix=prefix, lengths=lengths, nxpe=nxpe, nype=nype, nt=nt, - guards=guards, boundary_cells=boundary_cells, syn_data_type=syn_data_type) + guards=guards, syn_data_type=syn_data_type) for ds, file_name in zip(ds_list, file_list): ds.to_netcdf(str(save_dir.join(str(file_name)))) @@ -197,7 +197,7 @@ def _bout_xyt_example_files(tmpdir_factory, prefix='BOUT.dmp', lengths=(2,4,7,6) def create_bout_ds_list(prefix, lengths=(2,4,7,6), nxpe=4, nype=2, nt=1, guards={}, - boundary_cells={}, syn_data_type='random'): + syn_data_type='random'): """ Mocks up a set of BOUT-like datasets. @@ -218,15 +218,15 @@ def create_bout_ds_list(prefix, lengths=(2,4,7,6), nxpe=4, nype=2, nt=1, guards= # Include boundary cells for dim in ['x', 'y']: - if dim in boundary_cells.keys(): + if dim in guards.keys(): if i == 0: - lower_bndry_cells[dim] = boundary_cells[dim] + lower_bndry_cells[dim] = guards[dim] if i == nxpe-1: - upper_bndry_cells[dim] = boundary_cells[dim] + upper_bndry_cells[dim] = guards[dim] ds = create_bout_ds(syn_data_type=syn_data_type, num=num, lengths=lengths, nxpe=nxpe, nype=nype, upper_bndry_cells=upper_bndry_cells, lower_bndry_cells=lower_bndry_cells, - boundary_cells=boundary_cells, guards=guards) + guards=guards) ds_list.append(ds) # Sort this in order of num to remove any BOUT-specific structure @@ -237,8 +237,7 @@ def create_bout_ds_list(prefix, lengths=(2,4,7,6), nxpe=4, nype=2, nt=1, guards= def create_bout_ds(syn_data_type='random', lengths=(2,4,7,6), num=0, nxpe=1, nype=1, - upper_bndry_cells={}, lower_bndry_cells={}, boundary_cells={}, - guards={}): + upper_bndry_cells={}, lower_bndry_cells={}, guards={}): # Set the shape of the data in this dataset x_length, y_length, z_length, t_length = lengths @@ -271,8 +270,8 @@ def create_bout_ds(syn_data_type='random', lengths=(2,4,7,6), num=0, nxpe=1, nyp # Include metadata ds['NXPE'] = nxpe ds['NYPE'] = nype - ds['MXG'] = boundary_cells.get('x', 0) - ds['MYG'] = boundary_cells.get('y', 0) + ds['MXG'] = guards.get('x', 0) + ds['MYG'] = guards.get('y', 0) ds['nx'] = x_length ds['MXSUB'] = guards.get('x', 0) ds['MYSUB'] = guards.get('y', 0) @@ -348,7 +347,7 @@ def test_no_trim(self): ds = create_test_data(0) # Manually add filename - encoding normally added by xr.open_dataset ds.encoding['source'] = 'folder0/BOUT.dmp.0.nc' - actual = _trim(ds, guards={}, boundary_cells={}, keep_boundaries={}, nxpe=1, + actual = _trim(ds, guards={}, keep_boundaries={}, nxpe=1, nype=1) xrt.assert_equal(actual, ds) @@ -356,7 +355,7 @@ def test_trim_guards(self): ds = create_test_data(0) # Manually add filename - encoding normally added by xr.open_dataset ds.encoding['source'] = 'folder0/BOUT.dmp.0.nc' - actual = _trim(ds, guards={'time': 2}, boundary_cells={}, keep_boundaries={}, + actual = _trim(ds, guards={'time': 2}, keep_boundaries={}, nxpe=1, nype=1) selection = {'time': slice(2, -2)} expected = ds.isel(**selection) @@ -444,8 +443,7 @@ def test_keep_xboundaries(self): # Manually add filename - encoding normally added by xr.open_dataset ds.encoding['source'] = 'folder0/BOUT.dmp.0.nc' - actual = _trim(ds, guards={'x': 2}, boundary_cells={'x': 2}, - keep_boundaries={'x': True}, nxpe=1, nype=1) + actual = _trim(ds, guards={'x': 2}, keep_boundaries={'x': True}, nxpe=1, nype=1) expected = ds # Should be unchanged xrt.assert_equal(expected, actual) @@ -459,7 +457,7 @@ def test_trim_timing_info(self): for v in _BOUT_TIMING_VARIABLES: ds[v] = 42. - ds = _trim(ds, guards={}, boundary_cells={}, keep_boundaries={}, nxpe=1, nype=1) + ds = _trim(ds, guards={}, keep_boundaries={}, nxpe=1, nype=1) expected = create_test_data(0) xrt.assert_equal(ds, expected) From 9f857a6a879e15b9c07a975496ce3ed7f0e62443 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 22 Jul 2019 23:08:37 +0100 Subject: [PATCH 12/13] Docstrings for keep_xboundaries and keep_yboundaries in open_boutdataset --- xbout/boutdataset.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/xbout/boutdataset.py b/xbout/boutdataset.py index 58eb8a7f..8a13fac2 100644 --- a/xbout/boutdataset.py +++ b/xbout/boutdataset.py @@ -44,6 +44,14 @@ def open_boutdataset(datapath='./BOUT.dmp.*.nc', chunks : dict, optional inputfilepath : str, optional gridfilepath : str, optional + keep_xboundaries : bool, optional + If true, keep x-direction boundary cells (the cells past the physical edges of the + grid, where boundary conditions are set); increases the size of the x dimension in + the returned data-set. If false, trim these cells. + keep_yboundaries : bool, optional + If true, keep y-direction boundary cells (the cells past the physical edges of the + grid, where boundary conditions are set); increases the size of the y dimension in + the returned data-set. If false, trim these cells. run_name : str, optional info : bool, optional From b4d8a23de903791b8b2cc201a98ec25d396c083e Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 23 Jul 2019 09:17:15 +0100 Subject: [PATCH 13/13] Fix PEP8 warnings --- xbout/boutdataset.py | 12 ++++++------ xbout/tests/test_load.py | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/xbout/boutdataset.py b/xbout/boutdataset.py index 8a13fac2..13317ae6 100644 --- a/xbout/boutdataset.py +++ b/xbout/boutdataset.py @@ -45,13 +45,13 @@ def open_boutdataset(datapath='./BOUT.dmp.*.nc', inputfilepath : str, optional gridfilepath : str, optional keep_xboundaries : bool, optional - If true, keep x-direction boundary cells (the cells past the physical edges of the - grid, where boundary conditions are set); increases the size of the x dimension in - the returned data-set. If false, trim these cells. + If true, keep x-direction boundary cells (the cells past the physical edges of + the grid, where boundary conditions are set); increases the size of the x + dimension in the returned data-set. If false, trim these cells. keep_yboundaries : bool, optional - If true, keep y-direction boundary cells (the cells past the physical edges of the - grid, where boundary conditions are set); increases the size of the y dimension in - the returned data-set. If false, trim these cells. + If true, keep y-direction boundary cells (the cells past the physical edges of + the grid, where boundary conditions are set); increases the size of the y + dimension in the returned data-set. If false, trim these cells. run_name : str, optional info : bool, optional diff --git a/xbout/tests/test_load.py b/xbout/tests/test_load.py index f868509d..97a57dfc 100644 --- a/xbout/tests/test_load.py +++ b/xbout/tests/test_load.py @@ -196,8 +196,8 @@ def _bout_xyt_example_files(tmpdir_factory, prefix='BOUT.dmp', lengths=(2,4,7,6) return glob_pattern -def create_bout_ds_list(prefix, lengths=(2,4,7,6), nxpe=4, nype=2, nt=1, guards={}, - syn_data_type='random'): +def create_bout_ds_list(prefix, lengths=(2, 4, 7, 6), nxpe=4, nype=2, nt=1, guards={}, + syn_data_type='random'): """ Mocks up a set of BOUT-like datasets. @@ -348,7 +348,7 @@ def test_no_trim(self): # Manually add filename - encoding normally added by xr.open_dataset ds.encoding['source'] = 'folder0/BOUT.dmp.0.nc' actual = _trim(ds, guards={}, keep_boundaries={}, nxpe=1, - nype=1) + nype=1) xrt.assert_equal(actual, ds) def test_trim_guards(self): @@ -356,7 +356,7 @@ def test_trim_guards(self): # Manually add filename - encoding normally added by xr.open_dataset ds.encoding['source'] = 'folder0/BOUT.dmp.0.nc' actual = _trim(ds, guards={'time': 2}, keep_boundaries={}, - nxpe=1, nype=1) + nxpe=1, nype=1) selection = {'time': slice(2, -2)} expected = ds.isel(**selection) xrt.assert_equal(expected, actual) @@ -418,7 +418,7 @@ def test_trim_guards(self): {'x': False, 'y': True}) ]) def test_infer_boundaries_2d_parallelization(self, filenum, nxpe, nype, - lower_boundaries, upper_boundaries): + lower_boundaries, upper_boundaries): """ Numbering scheme for nxpe=3, nype=4