Skip to content

Commit

Permalink
Merge pull request #39 from boutproject/keep_guard_cells-merge-master
Browse files Browse the repository at this point in the history
Merge master into #30 to fix conflicts in making boundary cells optional
  • Loading branch information
TomNicholas authored Jul 25, 2019
2 parents 602f557 + b4d8a23 commit 99f6d2b
Show file tree
Hide file tree
Showing 6 changed files with 103 additions and 80 deletions.
8 changes: 1 addition & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,7 @@ install using `python setup.py`
You can run the tests by navigating to the `/xBOUT/` directory and
entering `pytest`.


It relies on two upstream additions to xarray
([first](https://github.com/pydata/xarray/pull/2482) &
[second](https://github.com/pydata/xarray/pull/2553) pull requests).
The first is merged, but the second isn't, so for now you need to clone
the branch of xarray containing the PR
[here](https://github.com/TomNicholas/xarray/tree/feature/nd_combine).
Requires xarray v0.12.2 or later.

You will also need to install [dask](https://dask.org/),
as described in the xarray documentation
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
git+https://github.com/TomNicholas/xarray@feature/nd_combine#egg=xarray
xarray >= 0.12.2
dask[array] >= 1.0.0
natsort >= 5.5.0
matplotlib >= 2.2
Expand Down
16 changes: 12 additions & 4 deletions xbout/boutdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

def open_boutdataset(datapath='./BOUT.dmp.*.nc',
inputfilepath=None, gridfilepath=None, chunks={},
keep_xguards=True, keep_yguards=False,
keep_xboundaries=True, keep_yboundaries=False,
run_name=None, info=True):
"""
Load a dataset from a set of BOUT output files, including the input options file.
Expand All @@ -44,6 +44,14 @@ def open_boutdataset(datapath='./BOUT.dmp.*.nc',
chunks : dict, optional
inputfilepath : str, optional
gridfilepath : str, optional
keep_xboundaries : bool, optional
If true, keep x-direction boundary cells (the cells past the physical edges of
the grid, where boundary conditions are set); increases the size of the x
dimension in the returned data-set. If false, trim these cells.
keep_yboundaries : bool, optional
If true, keep y-direction boundary cells (the cells past the physical edges of
the grid, where boundary conditions are set); increases the size of the y
dimension in the returned data-set. If false, trim these cells.
run_name : str, optional
info : bool, optional
Expand All @@ -56,8 +64,8 @@ def open_boutdataset(datapath='./BOUT.dmp.*.nc',

# Gather pointers to all numerical data from BOUT++ output files
ds, metadata = _auto_open_mfboutdataset(datapath=datapath, chunks=chunks,
keep_xguards=keep_xguards,
keep_yguards=keep_yguards)
keep_xboundaries=keep_xboundaries,
keep_yboundaries=keep_yboundaries)

ds = _set_attrs_on_all_vars(ds, 'metadata', metadata)

Expand Down Expand Up @@ -235,7 +243,7 @@ def to_restart(self, savepath='.', nxpe=None, nype=None,
else:
nxpe, nype = self.metadata['NXPE'], self.metadata['NYPE']

# Is this even possible without saving the ghost cells?
# Is this even possible without saving the guard cells?
# Can they be recreated?
restart_datasets, paths = _split_into_restarts(self.data, savepath,
nxpe, nype)
Expand Down
84 changes: 44 additions & 40 deletions xbout/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,28 @@

from natsort import natsorted

_BOUT_TIMING_VARIABLES = ['wall_time', 'wtime', 'wtime_rhs', 'wtime_invert',
'wtime_comms', 'wtime_io', 'wtime_per_rhs', 'wtime_per_rhs_e',
'wtime_per_rhs_i']

def _auto_open_mfboutdataset(datapath, chunks={}, info=True,
keep_xguards=False, keep_yguards=False):
keep_xboundaries=False, keep_yboundaries=False):
filepaths, filetype = _expand_filepaths(datapath)

# Open just one file to read processor splitting
nxpe, nype, mxg, myg, mxsub, mysub = _read_splitting(filepaths[0], info)

paths_grid, concat_dims = _arrange_for_concatenation(filepaths, nxpe, nype)

_preprocess = partial(_trim, ghosts={'x': mxg, 'y': myg},
guards={'x': mxg, 'y': myg},
keep_guards={'x': keep_xguards, 'y': keep_yguards},
_preprocess = partial(_trim, guards={'x': mxg, 'y': myg},
keep_boundaries={'x': keep_xboundaries, 'y': keep_yboundaries},
nxpe=nxpe, nype=nype)

# TODO warning message to make sure user knows if it's parallelized
ds = xarray.open_mfdataset(paths_grid, concat_dim=concat_dims,
data_vars='minimal', preprocess=_preprocess,
engine=filetype, chunks=chunks, parallel=False)
combine='nested', data_vars='minimal',
preprocess=_preprocess, engine=filetype,
chunks=chunks)

ds, metadata = _strip_metadata(ds)

Expand Down Expand Up @@ -89,8 +92,6 @@ def _expand_wildcards(path):
def _read_splitting(filepath, info=True):
ds = xarray.open_dataset(str(filepath))

# TODO check that BOUT doesn't ever set the number of guards to be different to the number of ghosts

# Account for case of no parallelisation, when nxpe etc won't be in dataset
def get_scalar(ds, key, default=1, info=True):
if key in ds:
Expand Down Expand Up @@ -155,62 +156,65 @@ def _arrange_for_concatenation(filepaths, nxpe=1, nype=1):
return paths_grid, concat_dims


def _trim(ds, ghosts, guards={}, keep_guards={}, nxpe=1, nype=1):
def _trim(ds, *, guards, keep_boundaries, nxpe, nype):
"""
Trims all ghost and guard cells off a single dataset read from a single
BOUT dump file, to prepare for concatenation.
Trims all guard (and optionally boundary) cells off a single dataset read from a
single BOUT dump file, to prepare for concatenation.
Also drops some variables that store timing information, which are different for each
process and so cannot be concatenated.
Parameters
----------
ghosts : dict, optional
Number of ghost cells along each dimension, e.g. {'x': 2, 't': 0}
guards : dict, optional
Number of guard cells along each dimension, e.g. {'x': 2, 'y': 2}
keep_guards : dict, optional
Whether or not to preserve the guard cells along each dimension, e.g.
guards : dict
Number of guard cells along each dimension, e.g. {'x': 2, 't': 0}
keep_boundaries : dict
Whether or not to preserve the boundary cells along each dimension, e.g.
{'x': True, 'y': False}
"""

if any(keep_guards.values()):
# Work out if this particular dataset contains any guard cells
if any(keep_boundaries.values()):
# Work out if this particular dataset contains any boundary cells
# Relies on a change to xarray so datasets always have source encoding
# See xarray GH issue #2550
lower_guards, upper_guards = _infer_contains_guards(
lower_boundaries, upper_boundaries = _infer_contains_boundaries(
ds.encoding['source'], nxpe, nype)
else:
lower_guards, upper_guards = {}, {}
lower_boundaries, upper_boundaries = {}, {}

selection = {}
for dim in ds.dims:
# Check for guard cells, otherwise use ghost cells, else leave alone
if keep_guards.get(dim, False):
if lower_guards.get(dim, False):
# Check for boundary cells, otherwise use guard cells, else leave alone
if keep_boundaries.get(dim, False):
if lower_boundaries.get(dim, False):
lower = None
else:
lower = max(ghosts[dim], guards[dim])
elif ghosts.get(dim, False):
lower = ghosts[dim]
lower = guards[dim]
elif guards.get(dim, False):
lower = guards[dim]
else:
lower = None
if keep_guards.get(dim, False):
if upper_guards.get(dim, False):
if keep_boundaries.get(dim, False):
if upper_boundaries.get(dim, False):
upper = None
else:
upper = -max(ghosts[dim], guards[dim])
elif ghosts.get(dim, False):
upper = -ghosts[dim]
upper = -guards[dim]
elif guards.get(dim, False):
upper = -guards[dim]
else:
upper = None
selection[dim] = slice(lower, upper)

trimmed_ds = ds.isel(**selection)

trimmed_ds = trimmed_ds.drop(_BOUT_TIMING_VARIABLES, errors='ignore')

return trimmed_ds


def _infer_contains_guards(filename, nxpe, nype):
def _infer_contains_boundaries(filename, nxpe, nype):
"""
Uses the name of the output file and the domain decomposition to work out
whether this dataset contains guard (boundary) cells, and on which side.
whether this dataset contains boundary cells, and on which side.
Uses knowledge that BOUT names its output files as /folder/prefix.num.nc,
with a numbering scheme
Expand All @@ -220,15 +224,15 @@ def _infer_contains_guards(filename, nxpe, nype):
*prefix, filenum, extension = Path(filename).suffixes
filenum = int(filenum.replace('.', ''))

lower_guards, upper_guards = {}, {}
lower_boundaries, upper_boundaries = {}, {}

lower_guards['x'] = filenum % nxpe == 0
upper_guards['x'] = filenum % nxpe == nxpe-1
lower_boundaries['x'] = filenum % nxpe == 0
upper_boundaries['x'] = filenum % nxpe == nxpe-1

lower_guards['y'] = filenum < nxpe
upper_guards['y'] = filenum >= (nype-1)*nxpe
lower_boundaries['y'] = filenum < nxpe
upper_boundaries['y'] = filenum >= (nype-1)*nxpe

return lower_guards, upper_guards
return lower_boundaries, upper_boundaries


def _strip_metadata(ds):
Expand Down
6 changes: 3 additions & 3 deletions xbout/tests/test_boutdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,17 @@ class TestBoutDatasetIsXarrayDataset:
def test_concat(self, tmpdir_factory, bout_xyt_example_files):
path1 = bout_xyt_example_files(tmpdir_factory, nxpe=3, nype=4, nt=1)
bd1 = open_boutdataset(datapath=path1, inputfilepath=None,
keep_xguards=False)
keep_xboundaries=False)
path2 = bout_xyt_example_files(tmpdir_factory, nxpe=3, nype=4, nt=1)
bd2 = open_boutdataset(datapath=path2, inputfilepath=None,
keep_xguards=False)
keep_xboundaries=False)
result = concat([bd1, bd2], dim='run')
assert result.dims == {**bd1.dims, 'run': 2}

def test_isel(self, tmpdir_factory, bout_xyt_example_files):
path = bout_xyt_example_files(tmpdir_factory, nxpe=1, nype=1, nt=1)
bd = open_boutdataset(datapath=path, inputfilepath=None,
keep_xguards=False)
keep_xboundaries=False)
actual = bd.isel(x=slice(None,None,2))
expected = bd.bout.data.isel(x=slice(None,None,2))
xrt.assert_equal(actual, expected)
Expand Down
Loading

0 comments on commit 99f6d2b

Please sign in to comment.