Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optionally keep guard cells #30

Merged
merged 12 commits into from
Jul 29, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions xbout/boutdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

def open_boutdataset(datapath='./BOUT.dmp.*.nc',
inputfilepath=None, gridfilepath=None, chunks={},
keep_xboundaries=True, keep_yboundaries=False,
run_name=None, info=True):
"""
Load a dataset from a set of BOUT output files, including the input options file.
Expand All @@ -43,6 +44,14 @@ def open_boutdataset(datapath='./BOUT.dmp.*.nc',
chunks : dict, optional
inputfilepath : str, optional
gridfilepath : str, optional
keep_xboundaries : bool, optional
If true, keep x-direction boundary cells (the cells past the physical edges of
the grid, where boundary conditions are set); increases the size of the x
dimension in the returned data-set. If false, trim these cells.
keep_yboundaries : bool, optional
If true, keep y-direction boundary cells (the cells past the physical edges of
the grid, where boundary conditions are set); increases the size of the y
dimension in the returned data-set. If false, trim these cells.
run_name : str, optional
info : bool, optional

Expand All @@ -54,7 +63,10 @@ def open_boutdataset(datapath='./BOUT.dmp.*.nc',
# TODO handle possibility that we are loading a previously saved (and trimmed) dataset

# Gather pointers to all numerical data from BOUT++ output files
ds, metadata = _auto_open_mfboutdataset(datapath=datapath, chunks=chunks)
ds, metadata = _auto_open_mfboutdataset(datapath=datapath, chunks=chunks,
keep_xboundaries=keep_xboundaries,
keep_yboundaries=keep_yboundaries)

ds = _set_attrs_on_all_vars(ds, 'metadata', metadata)

if inputfilepath:
Expand Down Expand Up @@ -231,7 +243,7 @@ def to_restart(self, savepath='.', nxpe=None, nype=None,
else:
nxpe, nype = self.metadata['NXPE'], self.metadata['NYPE']

# Is this even possible without saving the ghost cells?
# Is this even possible without saving the guard cells?
# Can they be recreated?
restart_datasets, paths = _split_into_restarts(self.data, savepath,
nxpe, nype)
Expand Down
92 changes: 74 additions & 18 deletions xbout/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,21 @@
'wtime_comms', 'wtime_io', 'wtime_per_rhs', 'wtime_per_rhs_e',
'wtime_per_rhs_i']

def _auto_open_mfboutdataset(datapath, chunks={}, info=True, keep_guards=True):

def _auto_open_mfboutdataset(datapath, chunks={}, info=True,
keep_xboundaries=False, keep_yboundaries=False):
filepaths, filetype = _expand_filepaths(datapath)

# Open just one file to read processor splitting
nxpe, nype, mxg, myg, mxsub, mysub = _read_splitting(filepaths[0], info)

paths_grid, concat_dims = _arrange_for_concatenation(filepaths, nxpe, nype)

_preprocess = partial(_trim, ghosts={'x': mxg, 'y': myg})
_preprocess = partial(_trim, guards={'x': mxg, 'y': myg},
keep_boundaries={'x': keep_xboundaries, 'y': keep_yboundaries},
nxpe=nxpe, nype=nype)

# TODO warning message to make sure user knows if it's parallelized
ds = xarray.open_mfdataset(paths_grid, concat_dim=concat_dims,
combine='nested', data_vars='minimal',
preprocess=_preprocess, engine=filetype,
Expand Down Expand Up @@ -88,8 +93,6 @@ def _expand_wildcards(path):
def _read_splitting(filepath, info=True):
ds = xarray.open_dataset(str(filepath))

# TODO check that BOUT doesn't ever set the number of guards to be different to the number of ghosts

# Account for case of no parallelisation, when nxpe etc won't be in dataset
def get_scalar(ds, key, default=1, info=True):
if key in ds:
Expand Down Expand Up @@ -118,9 +121,9 @@ def _arrange_for_concatenation(filepaths, nxpe=1, nype=1):
ordering across different processors and consecutive simulation runs.

Filepaths must be a sorted list. Uses the fact that BOUT's output files are
named as num = nxpe*i + j, and assumes that any consectutive simulation
runs are in directories which when sorted are in the correct order
(e.g. /run0/*, /run1/*, ...).
named as num = nxpe*i + j, where i={0, ..., nype}, j={0, ..., nxpe}.
Also assumes that any consecutive simulation runs are in directories which
when sorted are in the correct order (e.g. /run0/*, /run1/*, ...).
"""

nprocs = nxpe * nype
Expand Down Expand Up @@ -154,28 +157,57 @@ def _arrange_for_concatenation(filepaths, nxpe=1, nype=1):
return paths_grid, concat_dims


def _trim(ds, ghosts={}, keep_guards=True):
def _trim(ds, *, guards, keep_boundaries, nxpe, nype):
"""
Trims all ghost and guard cells off a single dataset read from a single
BOUT dump file, to prepare for concatenation.
Trims all guard (and optionally boundary) cells off a single dataset read from a
single BOUT dump file, to prepare for concatenation.
Also drops some variables that store timing information, which are different for each
process and so cannot be concatenated.

Parameters
----------
ghosts : dict, optional
guards : dict, optional
keep_guards : dict, optional
guards : dict
Number of guard cells along each dimension, e.g. {'x': 2, 't': 0}
keep_boundaries : dict
Whether or not to preserve the boundary cells along each dimension, e.g.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just for reference, nxpe, nype aren't documented here

Also, I guess guards and keep_boundaries are optional, although that is somewhat obvious from the signature (if you're familiar with the syntax!)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added nxpe and nype to the docstring.

guards and keep_boundaries aren't optional. The foo(*, a, b) syntax means that a and b are required keyword arguments. I found it recently and quite like it because I think long sequences of positional arguments are bug-prone, but it's still nice to use required arguments sometimes - for example here it's an internal-implementation-only function which isn't called with default arguments, so no need to have the handling for default arguments (that would be needed if we had something like guards=None).

{'x': True, 'y': False}
nxpe : int
Number of processors in x direction
nype : int
Number of processors in y direction
"""

# TODO generalise this function to handle guard cells being optional
if not keep_guards:
raise NotImplementedError
if any(keep_boundaries.values()):
# Work out if this particular dataset contains any boundary cells
# Relies on a change to xarray so datasets always have source encoding
# See xarray GH issue #2550
lower_boundaries, upper_boundaries = _infer_contains_boundaries(
ds.encoding['source'], nxpe, nype)
else:
lower_boundaries, upper_boundaries = {}, {}

selection = {}
for dim in ds.dims:
if ghosts.get(dim, False):
selection[dim] = slice(ghosts[dim], -ghosts[dim])
# Check for boundary cells, otherwise use guard cells, else leave alone
if keep_boundaries.get(dim, False):
if lower_boundaries.get(dim, False):
lower = None
else:
lower = guards[dim]
elif guards.get(dim, False):
lower = guards[dim]
else:
lower = None
if keep_boundaries.get(dim, False):
if upper_boundaries.get(dim, False):
upper = None
else:
upper = -guards[dim]
elif guards.get(dim, False):
upper = -guards[dim]
else:
upper = None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic for upper is almost the same as for lower -- is it possible to pull out into a helper function?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably - I'll have a go

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As they are not completely identical, it might be more trouble that its worth

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I managed to factor this out, so do you think this is ready to merge now @ZedThree ?

selection[dim] = slice(lower, upper)

trimmed_ds = ds.isel(**selection)

Expand All @@ -184,6 +216,30 @@ def _trim(ds, ghosts={}, keep_guards=True):
return trimmed_ds


def _infer_contains_boundaries(filename, nxpe, nype):
"""
Uses the name of the output file and the domain decomposition to work out
whether this dataset contains boundary cells, and on which side.

Uses knowledge that BOUT names its output files as /folder/prefix.num.nc,
with a numbering scheme
num = nxpe*i + j, where i={0, ..., nype}, j={0, ..., nxpe}
"""

*prefix, filenum, extension = Path(filename).suffixes
filenum = int(filenum.replace('.', ''))

lower_boundaries, upper_boundaries = {}, {}

lower_boundaries['x'] = filenum % nxpe == 0
upper_boundaries['x'] = filenum % nxpe == nxpe-1

lower_boundaries['y'] = filenum < nxpe
upper_boundaries['y'] = filenum >= (nype-1)*nxpe

return lower_boundaries, upper_boundaries


def _strip_metadata(ds):
"""
Extract the metadata (nxpe, myg etc.) from the Dataset.
Expand Down
9 changes: 6 additions & 3 deletions xbout/tests/test_boutdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,18 @@ class TestBoutDatasetIsXarrayDataset:

def test_concat(self, tmpdir_factory, bout_xyt_example_files):
path1 = bout_xyt_example_files(tmpdir_factory, nxpe=3, nype=4, nt=1)
bd1 = open_boutdataset(datapath=path1, inputfilepath=None)
bd1 = open_boutdataset(datapath=path1, inputfilepath=None,
keep_xboundaries=False)
path2 = bout_xyt_example_files(tmpdir_factory, nxpe=3, nype=4, nt=1)
bd2 = open_boutdataset(datapath=path2, inputfilepath=None)
bd2 = open_boutdataset(datapath=path2, inputfilepath=None,
keep_xboundaries=False)
result = concat([bd1, bd2], dim='run')
assert result.dims == {**bd1.dims, 'run': 2}

def test_isel(self, tmpdir_factory, bout_xyt_example_files):
path = bout_xyt_example_files(tmpdir_factory, nxpe=1, nype=1, nt=1)
bd = open_boutdataset(datapath=path, inputfilepath=None)
bd = open_boutdataset(datapath=path, inputfilepath=None,
keep_xboundaries=False)
actual = bd.isel(x=slice(None,None,2))
expected = bd.bout.data.isel(x=slice(None,None,2))
xrt.assert_equal(actual, expected)
Expand Down
Loading