diff --git a/xbout/boutdataset.py b/xbout/boutdataset.py index 6726f9e5..13317ae6 100644 --- a/xbout/boutdataset.py +++ b/xbout/boutdataset.py @@ -31,6 +31,7 @@ def open_boutdataset(datapath='./BOUT.dmp.*.nc', inputfilepath=None, gridfilepath=None, chunks={}, + keep_xboundaries=True, keep_yboundaries=False, run_name=None, info=True): """ Load a dataset from a set of BOUT output files, including the input options file. @@ -43,6 +44,14 @@ def open_boutdataset(datapath='./BOUT.dmp.*.nc', chunks : dict, optional inputfilepath : str, optional gridfilepath : str, optional + keep_xboundaries : bool, optional + If true, keep x-direction boundary cells (the cells past the physical edges of + the grid, where boundary conditions are set); increases the size of the x + dimension in the returned data-set. If false, trim these cells. + keep_yboundaries : bool, optional + If true, keep y-direction boundary cells (the cells past the physical edges of + the grid, where boundary conditions are set); increases the size of the y + dimension in the returned data-set. If false, trim these cells. run_name : str, optional info : bool, optional @@ -54,7 +63,10 @@ def open_boutdataset(datapath='./BOUT.dmp.*.nc', # TODO handle possibility that we are loading a previously saved (and trimmed) dataset # Gather pointers to all numerical data from BOUT++ output files - ds, metadata = _auto_open_mfboutdataset(datapath=datapath, chunks=chunks) + ds, metadata = _auto_open_mfboutdataset(datapath=datapath, chunks=chunks, + keep_xboundaries=keep_xboundaries, + keep_yboundaries=keep_yboundaries) + ds = _set_attrs_on_all_vars(ds, 'metadata', metadata) if inputfilepath: @@ -231,7 +243,7 @@ def to_restart(self, savepath='.', nxpe=None, nype=None, else: nxpe, nype = self.metadata['NXPE'], self.metadata['NYPE'] - # Is this even possible without saving the ghost cells? + # Is this even possible without saving the guard cells? # Can they be recreated? restart_datasets, paths = _split_into_restarts(self.data, savepath, nxpe, nype) diff --git a/xbout/load.py b/xbout/load.py index 6eb5efdd..c4b363c1 100644 --- a/xbout/load.py +++ b/xbout/load.py @@ -12,7 +12,9 @@ 'wtime_comms', 'wtime_io', 'wtime_per_rhs', 'wtime_per_rhs_e', 'wtime_per_rhs_i'] -def _auto_open_mfboutdataset(datapath, chunks={}, info=True, keep_guards=True): + +def _auto_open_mfboutdataset(datapath, chunks={}, info=True, + keep_xboundaries=False, keep_yboundaries=False): filepaths, filetype = _expand_filepaths(datapath) # Open just one file to read processor splitting @@ -20,8 +22,11 @@ def _auto_open_mfboutdataset(datapath, chunks={}, info=True, keep_guards=True): paths_grid, concat_dims = _arrange_for_concatenation(filepaths, nxpe, nype) - _preprocess = partial(_trim, ghosts={'x': mxg, 'y': myg}) + _preprocess = partial(_trim, guards={'x': mxg, 'y': myg}, + keep_boundaries={'x': keep_xboundaries, 'y': keep_yboundaries}, + nxpe=nxpe, nype=nype) + # TODO warning message to make sure user knows if it's parallelized ds = xarray.open_mfdataset(paths_grid, concat_dim=concat_dims, combine='nested', data_vars='minimal', preprocess=_preprocess, engine=filetype, @@ -88,8 +93,6 @@ def _expand_wildcards(path): def _read_splitting(filepath, info=True): ds = xarray.open_dataset(str(filepath)) - # TODO check that BOUT doesn't ever set the number of guards to be different to the number of ghosts - # Account for case of no parallelisation, when nxpe etc won't be in dataset def get_scalar(ds, key, default=1, info=True): if key in ds: @@ -118,9 +121,9 @@ def _arrange_for_concatenation(filepaths, nxpe=1, nype=1): ordering across different processors and consecutive simulation runs. Filepaths must be a sorted list. Uses the fact that BOUT's output files are - named as num = nxpe*i + j, and assumes that any consectutive simulation - runs are in directories which when sorted are in the correct order - (e.g. /run0/*, /run1/*, ...). + named as num = nxpe*i + j, where i={0, ..., nype}, j={0, ..., nxpe}. + Also assumes that any consecutive simulation runs are in directories which + when sorted are in the correct order (e.g. /run0/*, /run1/*, ...). """ nprocs = nxpe * nype @@ -154,28 +157,57 @@ def _arrange_for_concatenation(filepaths, nxpe=1, nype=1): return paths_grid, concat_dims -def _trim(ds, ghosts={}, keep_guards=True): +def _trim(ds, *, guards, keep_boundaries, nxpe, nype): """ - Trims all ghost and guard cells off a single dataset read from a single - BOUT dump file, to prepare for concatenation. + Trims all guard (and optionally boundary) cells off a single dataset read from a + single BOUT dump file, to prepare for concatenation. Also drops some variables that store timing information, which are different for each process and so cannot be concatenated. Parameters ---------- - ghosts : dict, optional - guards : dict, optional - keep_guards : dict, optional + guards : dict + Number of guard cells along each dimension, e.g. {'x': 2, 't': 0} + keep_boundaries : dict + Whether or not to preserve the boundary cells along each dimension, e.g. + {'x': True, 'y': False} + nxpe : int + Number of processors in x direction + nype : int + Number of processors in y direction """ - # TODO generalise this function to handle guard cells being optional - if not keep_guards: - raise NotImplementedError + if any(keep_boundaries.values()): + # Work out if this particular dataset contains any boundary cells + # Relies on a change to xarray so datasets always have source encoding + # See xarray GH issue #2550 + lower_boundaries, upper_boundaries = _infer_contains_boundaries( + ds.encoding['source'], nxpe, nype) + else: + lower_boundaries, upper_boundaries = {}, {} selection = {} for dim in ds.dims: - if ghosts.get(dim, False): - selection[dim] = slice(ghosts[dim], -ghosts[dim]) + # Check for boundary cells, otherwise use guard cells, else leave alone + if keep_boundaries.get(dim, False): + if lower_boundaries.get(dim, False): + lower = None + else: + lower = guards[dim] + elif guards.get(dim, False): + lower = guards[dim] + else: + lower = None + if keep_boundaries.get(dim, False): + if upper_boundaries.get(dim, False): + upper = None + else: + upper = -guards[dim] + elif guards.get(dim, False): + upper = -guards[dim] + else: + upper = None + selection[dim] = slice(lower, upper) trimmed_ds = ds.isel(**selection) @@ -184,6 +216,30 @@ def _trim(ds, ghosts={}, keep_guards=True): return trimmed_ds +def _infer_contains_boundaries(filename, nxpe, nype): + """ + Uses the name of the output file and the domain decomposition to work out + whether this dataset contains boundary cells, and on which side. + + Uses knowledge that BOUT names its output files as /folder/prefix.num.nc, + with a numbering scheme + num = nxpe*i + j, where i={0, ..., nype}, j={0, ..., nxpe} + """ + + *prefix, filenum, extension = Path(filename).suffixes + filenum = int(filenum.replace('.', '')) + + lower_boundaries, upper_boundaries = {}, {} + + lower_boundaries['x'] = filenum % nxpe == 0 + upper_boundaries['x'] = filenum % nxpe == nxpe-1 + + lower_boundaries['y'] = filenum < nxpe + upper_boundaries['y'] = filenum >= (nype-1)*nxpe + + return lower_boundaries, upper_boundaries + + def _strip_metadata(ds): """ Extract the metadata (nxpe, myg etc.) from the Dataset. diff --git a/xbout/tests/test_boutdataset.py b/xbout/tests/test_boutdataset.py index c4f81db9..b8bbaa44 100644 --- a/xbout/tests/test_boutdataset.py +++ b/xbout/tests/test_boutdataset.py @@ -20,15 +20,18 @@ class TestBoutDatasetIsXarrayDataset: def test_concat(self, tmpdir_factory, bout_xyt_example_files): path1 = bout_xyt_example_files(tmpdir_factory, nxpe=3, nype=4, nt=1) - bd1 = open_boutdataset(datapath=path1, inputfilepath=None) + bd1 = open_boutdataset(datapath=path1, inputfilepath=None, + keep_xboundaries=False) path2 = bout_xyt_example_files(tmpdir_factory, nxpe=3, nype=4, nt=1) - bd2 = open_boutdataset(datapath=path2, inputfilepath=None) + bd2 = open_boutdataset(datapath=path2, inputfilepath=None, + keep_xboundaries=False) result = concat([bd1, bd2], dim='run') assert result.dims == {**bd1.dims, 'run': 2} def test_isel(self, tmpdir_factory, bout_xyt_example_files): path = bout_xyt_example_files(tmpdir_factory, nxpe=1, nype=1, nt=1) - bd = open_boutdataset(datapath=path, inputfilepath=None) + bd = open_boutdataset(datapath=path, inputfilepath=None, + keep_xboundaries=False) actual = bd.isel(x=slice(None,None,2)) expected = bd.bout.data.isel(x=slice(None,None,2)) xrt.assert_equal(actual, expected) diff --git a/xbout/tests/test_load.py b/xbout/tests/test_load.py index a3056cc6..97a57dfc 100644 --- a/xbout/tests/test_load.py +++ b/xbout/tests/test_load.py @@ -13,7 +13,7 @@ from xbout.load import _check_filetype, _expand_wildcards, _expand_filepaths,\ _arrange_for_concatenation, _trim, _strip_metadata, \ - _auto_open_mfboutdataset + _auto_open_mfboutdataset, _infer_contains_boundaries def test_check_extensions(tmpdir): @@ -168,7 +168,7 @@ def bout_xyt_example_files(tmpdir_factory): def _bout_xyt_example_files(tmpdir_factory, prefix='BOUT.dmp', lengths=(2,4,7,6), - nxpe=4, nype=2, nt=1, ghosts={}, guards={}, syn_data_type='random'): + nxpe=4, nype=2, nt=1, guards={}, syn_data_type='random'): """ Mocks up a set of BOUT-like netCDF files, and return the temporary test directory containing them. @@ -178,7 +178,7 @@ def _bout_xyt_example_files(tmpdir_factory, prefix='BOUT.dmp', lengths=(2,4,7,6) save_dir = tmpdir_factory.mktemp("data") ds_list, file_list = create_bout_ds_list(prefix=prefix, lengths=lengths, nxpe=nxpe, nype=nype, nt=nt, - ghosts=ghosts, guards=guards, syn_data_type=syn_data_type) + guards=guards, syn_data_type=syn_data_type) for ds, file_name in zip(ds_list, file_list): ds.to_netcdf(str(save_dir.join(str(file_name)))) @@ -196,7 +196,8 @@ def _bout_xyt_example_files(tmpdir_factory, prefix='BOUT.dmp', lengths=(2,4,7,6) return glob_pattern -def create_bout_ds_list(prefix, lengths=(2,4,7,6), nxpe=4, nype=2, nt=1, ghosts={}, guards={}, syn_data_type='random'): +def create_bout_ds_list(prefix, lengths=(2, 4, 7, 6), nxpe=4, nype=2, nt=1, guards={}, + syn_data_type='random'): """ Mocks up a set of BOUT-like datasets. @@ -211,11 +212,11 @@ def create_bout_ds_list(prefix, lengths=(2,4,7,6), nxpe=4, nype=2, nt=1, ghosts= filename = prefix + "." + str(num) + ".nc" file_list.append(filename) - # Include ghost cells - upper_bndry_cells = {dim: ghosts.get(dim) for dim in ghosts.keys()} - lower_bndry_cells = {dim: ghosts.get(dim) for dim in ghosts.keys()} - # Include guard cells + upper_bndry_cells = {dim: guards.get(dim) for dim in guards.keys()} + lower_bndry_cells = {dim: guards.get(dim) for dim in guards.keys()} + + # Include boundary cells for dim in ['x', 'y']: if dim in guards.keys(): if i == 0: @@ -225,7 +226,7 @@ def create_bout_ds_list(prefix, lengths=(2,4,7,6), nxpe=4, nype=2, nt=1, ghosts= ds = create_bout_ds(syn_data_type=syn_data_type, num=num, lengths=lengths, nxpe=nxpe, nype=nype, upper_bndry_cells=upper_bndry_cells, lower_bndry_cells=lower_bndry_cells, - guards=guards, ghosts=ghosts) + guards=guards) ds_list.append(ds) # Sort this in order of num to remove any BOUT-specific structure @@ -236,7 +237,7 @@ def create_bout_ds_list(prefix, lengths=(2,4,7,6), nxpe=4, nype=2, nt=1, ghosts= def create_bout_ds(syn_data_type='random', lengths=(2,4,7,6), num=0, nxpe=1, nype=1, - upper_bndry_cells={}, lower_bndry_cells={}, guards={}, ghosts={}): + upper_bndry_cells={}, lower_bndry_cells={}, guards={}): # Set the shape of the data in this dataset x_length, y_length, z_length, t_length = lengths @@ -272,15 +273,15 @@ def create_bout_ds(syn_data_type='random', lengths=(2,4,7,6), num=0, nxpe=1, nyp ds['MXG'] = guards.get('x', 0) ds['MYG'] = guards.get('y', 0) ds['nx'] = x_length - ds['MXSUB'] = ghosts.get('x', 0) - ds['MYSUB'] = ghosts.get('y', 0) + ds['MXSUB'] = guards.get('x', 0) + ds['MYSUB'] = guards.get('y', 0) ds['MZ'] = z_length return ds -METADATA_VARS = ['NXPE', 'NYPE', 'MXG', 'MYG', 'nx', 'MXSUB', 'MYSUB', - 'MZ'] +METADATA_VARS = ['NXPE', 'NYPE', 'MXG', 'MYG', 'nx', 'MXSUB', 'MYSUB', 'MZ'] + class TestStripMetadata(): def test_strip_metadata(self): @@ -294,7 +295,7 @@ def test_strip_metadata(self): assert metadata['NXPE'] == 1 -# TODO also test loading multiple files which have ghost cells +# TODO also test loading multiple files which have guard cells class TestCombineNoTrim: def test_single_file(self, tmpdir_factory, bout_xyt_example_files): path = bout_xyt_example_files(tmpdir_factory, nxpe=1, nype=1, nt=1) @@ -344,16 +345,108 @@ def test_combine_along_tx(self): class TestTrim: def test_no_trim(self): ds = create_test_data(0) - actual = _trim(ds) + # Manually add filename - encoding normally added by xr.open_dataset + ds.encoding['source'] = 'folder0/BOUT.dmp.0.nc' + actual = _trim(ds, guards={}, keep_boundaries={}, nxpe=1, + nype=1) xrt.assert_equal(actual, ds) - def test_trim_ghosts(self): + def test_trim_guards(self): ds = create_test_data(0) - actual = _trim(ds, ghosts={'time': 2}) + # Manually add filename - encoding normally added by xr.open_dataset + ds.encoding['source'] = 'folder0/BOUT.dmp.0.nc' + actual = _trim(ds, guards={'time': 2}, keep_boundaries={}, + nxpe=1, nype=1) selection = {'time': slice(2, -2)} expected = ds.isel(**selection) xrt.assert_equal(expected, actual) + @pytest.mark.parametrize("filenum, nxpe, nype, lower_boundaries, upper_boundaries", + # no parallelization + [(0, 1, 1, {'x': True, 'y': True}, + {'x': True, 'y': True}), + + # 1d parallelization along x: + # Left + (0, 3, 1, {'x': True, 'y': True}, + {'x': False, 'y': True}), + # Middle + (1, 3, 1, {'x': False, 'y': True}, + {'x': False, 'y': True}), + # Right + (2, 3, 1, {'x': False, 'y': True}, + {'x': True, 'y': True}), + + # 1d parallelization along y: + # Bottom + (0, 1, 3, {'x': True, 'y': True}, + {'x': True, 'y': False}), + # Middle + (1, 1, 3, {'x': True, 'y': False}, + {'x': True, 'y': False}), + # Top + (2, 1, 3, {'x': True, 'y': False}, + {'x': True, 'y': True}), + + # 2d parallelization: + # Bottom left corner + (0, 3, 4, {'x': True, 'y': True}, + {'x': False, 'y': False}), + # Bottom right corner + (2, 3, 4, {'x': False, 'y': True}, + {'x': True, 'y': False}), + # Top left corner + (9, 3, 4, {'x': True, 'y': False}, + {'x': False, 'y': True}), + # Top right corner + (11, 3, 4, {'x': False, 'y': False}, + {'x': True, 'y': True}), + # Centre + (7, 3, 4, {'x': False, 'y': False}, + {'x': False, 'y': False}), + # Left side + (3, 3, 4, {'x': True, 'y': False}, + {'x': False, 'y': False}), + # Right side + (5, 3, 4, {'x': False, 'y': False}, + {'x': True, 'y': False}), + # Bottom side + (1, 3, 4, {'x': False, 'y': True}, + {'x': False, 'y': False}), + # Top side + (10, 3, 4, {'x': False, 'y': False}, + {'x': False, 'y': True}) + ]) + def test_infer_boundaries_2d_parallelization(self, filenum, nxpe, nype, + lower_boundaries, upper_boundaries): + """ + Numbering scheme for nxpe=3, nype=4 + + y 9 10 11 + ^ 6 7 8 + | 3 4 5 + | 0 1 2 + -----> x + """ + + filename = "folder0/BOUT.dmp." + str(filenum) + ".nc" + actual_lower_boundaries, actual_upper_boundaries = _infer_contains_boundaries( + filename, nxpe, nype) + + assert actual_lower_boundaries == lower_boundaries + assert actual_upper_boundaries == upper_boundaries + + def test_keep_xboundaries(self): + ds = create_test_data(0) + ds = ds.rename({'dim2': 'x'}) + + # Manually add filename - encoding normally added by xr.open_dataset + ds.encoding['source'] = 'folder0/BOUT.dmp.0.nc' + + actual = _trim(ds, guards={'x': 2}, keep_boundaries={'x': True}, nxpe=1, nype=1) + expected = ds # Should be unchanged + xrt.assert_equal(expected, actual) + def test_trim_timing_info(self): ds = create_test_data(0) from xbout.load import _BOUT_TIMING_VARIABLES @@ -364,7 +457,7 @@ def test_trim_timing_info(self): for v in _BOUT_TIMING_VARIABLES: ds[v] = 42. - ds = _trim(ds) + ds = _trim(ds, guards={}, keep_boundaries={}, nxpe=1, nype=1) expected = create_test_data(0) xrt.assert_equal(ds, expected)