Merge pull request #39 from boutproject/keep_guard_cells-merge-master

Merge master into #30 to fix conflicts in making boundary cells optional
boutproject · Jul 25, 2019 · 99f6d2b · 99f6d2b
2 parents 602f557 + b4d8a23
commit 99f6d2b
Show file tree

Hide file tree

Showing 6 changed files with 103 additions and 80 deletions.
diff --git a/README.md b/README.md
@@ -101,13 +101,7 @@ install using `python setup.py`
 You can run the tests by navigating to the `/xBOUT/` directory and
 entering `pytest`.
 
-
-It relies on two upstream additions to xarray
-([first](https://github.com/pydata/xarray/pull/2482) &
-[second](https://github.com/pydata/xarray/pull/2553) pull requests).
-The first is merged, but the second isn't, so for now you need to clone
-the branch of xarray containing the PR
-[here](https://github.com/TomNicholas/xarray/tree/feature/nd_combine).
+Requires xarray v0.12.2 or later.
 
 You will also need to install [dask](https://dask.org/),
 as described in the xarray documentation

diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,4 @@
-git+https://github.com/TomNicholas/xarray@feature/nd_combine#egg=xarray
+xarray >= 0.12.2
 dask[array] >= 1.0.0
 natsort >= 5.5.0
 matplotlib >= 2.2

diff --git a/xbout/boutdataset.py b/xbout/boutdataset.py
@@ -31,7 +31,7 @@
 
 def open_boutdataset(datapath='./BOUT.dmp.*.nc',
                      inputfilepath=None, gridfilepath=None, chunks={},
-                     keep_xguards=True, keep_yguards=False,
+                     keep_xboundaries=True, keep_yboundaries=False,
                      run_name=None, info=True):
     """
     Load a dataset from a set of BOUT output files, including the input options file.
@@ -44,6 +44,14 @@ def open_boutdataset(datapath='./BOUT.dmp.*.nc',
     chunks : dict, optional
     inputfilepath : str, optional
     gridfilepath : str, optional
+    keep_xboundaries : bool, optional
+        If true, keep x-direction boundary cells (the cells past the physical edges of
+        the grid, where boundary conditions are set); increases the size of the x
+        dimension in the returned data-set. If false, trim these cells.
+    keep_yboundaries : bool, optional
+        If true, keep y-direction boundary cells (the cells past the physical edges of
+        the grid, where boundary conditions are set); increases the size of the y
+        dimension in the returned data-set. If false, trim these cells.
     run_name : str, optional
     info : bool, optional
 
@@ -56,8 +64,8 @@ def open_boutdataset(datapath='./BOUT.dmp.*.nc',
 
     # Gather pointers to all numerical data from BOUT++ output files
     ds, metadata = _auto_open_mfboutdataset(datapath=datapath, chunks=chunks,
-                                            keep_xguards=keep_xguards,
-                                            keep_yguards=keep_yguards)
+                                            keep_xboundaries=keep_xboundaries,
+                                            keep_yboundaries=keep_yboundaries)
 
     ds = _set_attrs_on_all_vars(ds, 'metadata', metadata)
 
@@ -235,7 +243,7 @@ def to_restart(self, savepath='.', nxpe=None, nype=None,
             else:
                 nxpe, nype = self.metadata['NXPE'], self.metadata['NYPE']
 
-        # Is this even possible without saving the ghost cells?
+        # Is this even possible without saving the guard cells?
         # Can they be recreated?
         restart_datasets, paths = _split_into_restarts(self.data, savepath,
                                                        nxpe, nype)

diff --git a/xbout/load.py b/xbout/load.py
@@ -8,25 +8,28 @@
 
 from natsort import natsorted
 
+_BOUT_TIMING_VARIABLES = ['wall_time', 'wtime', 'wtime_rhs', 'wtime_invert',
+                          'wtime_comms', 'wtime_io', 'wtime_per_rhs', 'wtime_per_rhs_e',
+                          'wtime_per_rhs_i']
 
 def _auto_open_mfboutdataset(datapath, chunks={}, info=True,
-                             keep_xguards=False, keep_yguards=False):
+                             keep_xboundaries=False, keep_yboundaries=False):
     filepaths, filetype = _expand_filepaths(datapath)
 
     # Open just one file to read processor splitting
     nxpe, nype, mxg, myg, mxsub, mysub = _read_splitting(filepaths[0], info)
 
     paths_grid, concat_dims = _arrange_for_concatenation(filepaths, nxpe, nype)
 
-    _preprocess = partial(_trim, ghosts={'x': mxg, 'y': myg},
-                          guards={'x': mxg, 'y': myg},
-                          keep_guards={'x': keep_xguards, 'y': keep_yguards},
+    _preprocess = partial(_trim, guards={'x': mxg, 'y': myg},
+                          keep_boundaries={'x': keep_xboundaries, 'y': keep_yboundaries},
                           nxpe=nxpe, nype=nype)
 
     # TODO warning message to make sure user knows if it's parallelized
     ds = xarray.open_mfdataset(paths_grid, concat_dim=concat_dims,
-                               data_vars='minimal', preprocess=_preprocess,
-                               engine=filetype, chunks=chunks, parallel=False)
+                               combine='nested', data_vars='minimal',
+                               preprocess=_preprocess, engine=filetype,
+                               chunks=chunks)
 
     ds, metadata = _strip_metadata(ds)
 
@@ -89,8 +92,6 @@ def _expand_wildcards(path):
 def _read_splitting(filepath, info=True):
     ds = xarray.open_dataset(str(filepath))
 
-    # TODO check that BOUT doesn't ever set the number of guards to be different to the number of ghosts
-
     # Account for case of no parallelisation, when nxpe etc won't be in dataset
     def get_scalar(ds, key, default=1, info=True):
         if key in ds:
@@ -155,62 +156,65 @@ def _arrange_for_concatenation(filepaths, nxpe=1, nype=1):
     return paths_grid, concat_dims
 
 
-def _trim(ds, ghosts, guards={}, keep_guards={}, nxpe=1, nype=1):
+def _trim(ds, *, guards, keep_boundaries, nxpe, nype):
     """
-    Trims all ghost and guard cells off a single dataset read from a single
-    BOUT dump file, to prepare for concatenation.
+    Trims all guard (and optionally boundary) cells off a single dataset read from a
+    single BOUT dump file, to prepare for concatenation.
+    Also drops some variables that store timing information, which are different for each
+    process and so cannot be concatenated.
 
     Parameters
     ----------
-    ghosts : dict, optional
-        Number of ghost cells along each dimension, e.g. {'x': 2, 't': 0}
-    guards : dict, optional
-        Number of guard cells along each dimension, e.g. {'x': 2, 'y': 2}
-    keep_guards : dict, optional
-        Whether or not to preserve the guard cells along each dimension, e.g.
+    guards : dict
+        Number of guard cells along each dimension, e.g. {'x': 2, 't': 0}
+    keep_boundaries : dict
+        Whether or not to preserve the boundary cells along each dimension, e.g.
         {'x': True, 'y': False}
     """
 
-    if any(keep_guards.values()):
-        # Work out if this particular dataset contains any guard cells
+    if any(keep_boundaries.values()):
+        # Work out if this particular dataset contains any boundary cells
         # Relies on a change to xarray so datasets always have source encoding
         # See xarray GH issue #2550
-        lower_guards, upper_guards = _infer_contains_guards(
+        lower_boundaries, upper_boundaries = _infer_contains_boundaries(
             ds.encoding['source'], nxpe, nype)
     else:
-        lower_guards, upper_guards = {}, {}
+        lower_boundaries, upper_boundaries = {}, {}
 
     selection = {}
     for dim in ds.dims:
-        # Check for guard cells, otherwise use ghost cells, else leave alone
-        if keep_guards.get(dim, False):
-            if lower_guards.get(dim, False):
+        # Check for boundary cells, otherwise use guard cells, else leave alone
+        if keep_boundaries.get(dim, False):
+            if lower_boundaries.get(dim, False):
                 lower = None
             else:
-                lower = max(ghosts[dim], guards[dim])
-        elif ghosts.get(dim, False):
-            lower = ghosts[dim]
+                lower = guards[dim]
+        elif guards.get(dim, False):
+            lower = guards[dim]
         else:
             lower = None
-        if keep_guards.get(dim, False):
-            if upper_guards.get(dim, False):
+        if keep_boundaries.get(dim, False):
+            if upper_boundaries.get(dim, False):
                 upper = None
             else:
-                upper = -max(ghosts[dim], guards[dim])
-        elif ghosts.get(dim, False):
-            upper = -ghosts[dim]
+                upper = -guards[dim]
+        elif guards.get(dim, False):
+            upper = -guards[dim]
         else:
             upper = None
         selection[dim] = slice(lower, upper)
 
     trimmed_ds = ds.isel(**selection)
+
+    trimmed_ds = trimmed_ds.drop(_BOUT_TIMING_VARIABLES, errors='ignore')
+
     return trimmed_ds
 
 
-def _infer_contains_guards(filename, nxpe, nype):
+def _infer_contains_boundaries(filename, nxpe, nype):
     """
     Uses the name of the output file and the domain decomposition to work out
-    whether this dataset contains guard (boundary) cells, and on which side.
+    whether this dataset contains boundary cells, and on which side.
 
     Uses knowledge that BOUT names its output files as /folder/prefix.num.nc,
     with a numbering scheme
@@ -220,15 +224,15 @@ def _infer_contains_guards(filename, nxpe, nype):
     *prefix, filenum, extension = Path(filename).suffixes
     filenum = int(filenum.replace('.', ''))
 
-    lower_guards, upper_guards = {}, {}
+    lower_boundaries, upper_boundaries = {}, {}
 
-    lower_guards['x'] = filenum % nxpe == 0
-    upper_guards['x'] = filenum % nxpe == nxpe-1
+    lower_boundaries['x'] = filenum % nxpe == 0
+    upper_boundaries['x'] = filenum % nxpe == nxpe-1
 
-    lower_guards['y'] = filenum < nxpe
-    upper_guards['y'] = filenum >= (nype-1)*nxpe
+    lower_boundaries['y'] = filenum < nxpe
+    upper_boundaries['y'] = filenum >= (nype-1)*nxpe
 
-    return lower_guards, upper_guards
+    return lower_boundaries, upper_boundaries
 
 
 def _strip_metadata(ds):

diff --git a/xbout/tests/test_boutdataset.py b/xbout/tests/test_boutdataset.py
@@ -21,17 +21,17 @@ class TestBoutDatasetIsXarrayDataset:
     def test_concat(self, tmpdir_factory, bout_xyt_example_files):
         path1 = bout_xyt_example_files(tmpdir_factory, nxpe=3, nype=4, nt=1)
         bd1 = open_boutdataset(datapath=path1, inputfilepath=None,
-                               keep_xguards=False)
+                               keep_xboundaries=False)
         path2 = bout_xyt_example_files(tmpdir_factory, nxpe=3, nype=4, nt=1)
         bd2 = open_boutdataset(datapath=path2, inputfilepath=None,
-                               keep_xguards=False)
+                               keep_xboundaries=False)
         result = concat([bd1, bd2], dim='run')
         assert result.dims == {**bd1.dims, 'run': 2}
 
     def test_isel(self, tmpdir_factory, bout_xyt_example_files):
         path = bout_xyt_example_files(tmpdir_factory, nxpe=1, nype=1, nt=1)
         bd = open_boutdataset(datapath=path, inputfilepath=None,
-                              keep_xguards=False)
+                              keep_xboundaries=False)
         actual = bd.isel(x=slice(None,None,2))
         expected = bd.bout.data.isel(x=slice(None,None,2))
         xrt.assert_equal(actual, expected)