boutproject · TomNicholas · Jul 29, 2019 · Dec 19, 2018 · Dec 20, 2018 · Feb 12, 2019
diff --git a/xbout/boutdataset.py b/xbout/boutdataset.py
@@ -31,6 +31,7 @@
 
 def open_boutdataset(datapath='./BOUT.dmp.*.nc',
                      inputfilepath=None, gridfilepath=None, chunks={},
+                     keep_xboundaries=True, keep_yboundaries=False,
                      run_name=None, info=True):
     """
     Load a dataset from a set of BOUT output files, including the input options file.
@@ -43,6 +44,14 @@ def open_boutdataset(datapath='./BOUT.dmp.*.nc',
     chunks : dict, optional
     inputfilepath : str, optional
     gridfilepath : str, optional
+    keep_xboundaries : bool, optional
+        If true, keep x-direction boundary cells (the cells past the physical edges of
+        the grid, where boundary conditions are set); increases the size of the x
+        dimension in the returned data-set. If false, trim these cells.
+    keep_yboundaries : bool, optional
+        If true, keep y-direction boundary cells (the cells past the physical edges of
+        the grid, where boundary conditions are set); increases the size of the y
+        dimension in the returned data-set. If false, trim these cells.
     run_name : str, optional
     info : bool, optional
 
@@ -54,7 +63,10 @@ def open_boutdataset(datapath='./BOUT.dmp.*.nc',
     # TODO handle possibility that we are loading a previously saved (and trimmed) dataset
 
     # Gather pointers to all numerical data from BOUT++ output files
-    ds, metadata = _auto_open_mfboutdataset(datapath=datapath, chunks=chunks)
+    ds, metadata = _auto_open_mfboutdataset(datapath=datapath, chunks=chunks,
+                                            keep_xboundaries=keep_xboundaries,
+                                            keep_yboundaries=keep_yboundaries)
+
     ds = _set_attrs_on_all_vars(ds, 'metadata', metadata)
 
     if inputfilepath:
@@ -231,7 +243,7 @@ def to_restart(self, savepath='.', nxpe=None, nype=None,
             else:
                 nxpe, nype = self.metadata['NXPE'], self.metadata['NYPE']
 
-        # Is this even possible without saving the ghost cells?
+        # Is this even possible without saving the guard cells?
         # Can they be recreated?
         restart_datasets, paths = _split_into_restarts(self.data, savepath,
                                                        nxpe, nype)

diff --git a/xbout/load.py b/xbout/load.py
@@ -12,16 +12,21 @@
                           'wtime_comms', 'wtime_io', 'wtime_per_rhs', 'wtime_per_rhs_e',
                           'wtime_per_rhs_i']
 
-def _auto_open_mfboutdataset(datapath, chunks={}, info=True, keep_guards=True):
+
+def _auto_open_mfboutdataset(datapath, chunks={}, info=True,
+                             keep_xboundaries=False, keep_yboundaries=False):
     filepaths, filetype = _expand_filepaths(datapath)
 
     # Open just one file to read processor splitting
     nxpe, nype, mxg, myg, mxsub, mysub = _read_splitting(filepaths[0], info)
 
     paths_grid, concat_dims = _arrange_for_concatenation(filepaths, nxpe, nype)
 
-    _preprocess = partial(_trim, ghosts={'x': mxg, 'y': myg})
+    _preprocess = partial(_trim, guards={'x': mxg, 'y': myg},
+                          keep_boundaries={'x': keep_xboundaries, 'y': keep_yboundaries},
+                          nxpe=nxpe, nype=nype)
 
+    # TODO warning message to make sure user knows if it's parallelized
     ds = xarray.open_mfdataset(paths_grid, concat_dim=concat_dims,
                                combine='nested', data_vars='minimal',
                                preprocess=_preprocess, engine=filetype,
@@ -88,8 +93,6 @@ def _expand_wildcards(path):
 def _read_splitting(filepath, info=True):
     ds = xarray.open_dataset(str(filepath))
 
-    # TODO check that BOUT doesn't ever set the number of guards to be different to the number of ghosts
-
     # Account for case of no parallelisation, when nxpe etc won't be in dataset
     def get_scalar(ds, key, default=1, info=True):
         if key in ds:
@@ -118,9 +121,9 @@ def _arrange_for_concatenation(filepaths, nxpe=1, nype=1):
     ordering across different processors and consecutive simulation runs.
 
     Filepaths must be a sorted list. Uses the fact that BOUT's output files are
-    named as num = nxpe*i + j, and assumes that any consectutive simulation
-    runs are in directories which when sorted are in the correct order
-    (e.g. /run0/*, /run1/*,  ...).
+    named as num = nxpe*i + j, where i={0, ..., nype}, j={0, ..., nxpe}.
+    Also assumes that any consecutive simulation runs are in directories which
+    when sorted are in the correct order (e.g. /run0/*, /run1/*, ...).
     """
 
     nprocs = nxpe * nype
@@ -154,28 +157,57 @@ def _arrange_for_concatenation(filepaths, nxpe=1, nype=1):
     return paths_grid, concat_dims
 
 
-def _trim(ds, ghosts={}, keep_guards=True):
+def _trim(ds, *, guards, keep_boundaries, nxpe, nype):
     """
-    Trims all ghost and guard cells off a single dataset read from a single
-    BOUT dump file, to prepare for concatenation.
+    Trims all guard (and optionally boundary) cells off a single dataset read from a
+    single BOUT dump file, to prepare for concatenation.
     Also drops some variables that store timing information, which are different for each
     process and so cannot be concatenated.
 
     Parameters
     ----------
-    ghosts : dict, optional
-    guards : dict, optional
-    keep_guards : dict, optional
+    guards : dict
+        Number of guard cells along each dimension, e.g. {'x': 2, 't': 0}
+    keep_boundaries : dict
+        Whether or not to preserve the boundary cells along each dimension, e.g.
+        {'x': True, 'y': False}
+    nxpe : int
+        Number of processors in x direction
+    nype : int
+        Number of processors in y direction
     """
 
-    # TODO generalise this function to handle guard cells being optional
-    if not keep_guards:
-        raise NotImplementedError
+    if any(keep_boundaries.values()):
+        # Work out if this particular dataset contains any boundary cells
+        # Relies on a change to xarray so datasets always have source encoding
+        # See xarray GH issue #2550
+        lower_boundaries, upper_boundaries = _infer_contains_boundaries(
+            ds.encoding['source'], nxpe, nype)
+    else:
+        lower_boundaries, upper_boundaries = {}, {}
 
     selection = {}
     for dim in ds.dims:
-        if ghosts.get(dim, False):
-            selection[dim] = slice(ghosts[dim], -ghosts[dim])
+        # Check for boundary cells, otherwise use guard cells, else leave alone
+        if keep_boundaries.get(dim, False):
+            if lower_boundaries.get(dim, False):
+                lower = None
+            else:
+                lower = guards[dim]
+        elif guards.get(dim, False):
+            lower = guards[dim]
+        else:
+            lower = None
+        if keep_boundaries.get(dim, False):
+            if upper_boundaries.get(dim, False):
+                upper = None
+            else:
+                upper = -guards[dim]
+        elif guards.get(dim, False):
+            upper = -guards[dim]
+        else:
+            upper = None
+        selection[dim] = slice(lower, upper)
 
     trimmed_ds = ds.isel(**selection)
 
@@ -184,6 +216,30 @@ def _trim(ds, ghosts={}, keep_guards=True):
     return trimmed_ds
 
 
+def _infer_contains_boundaries(filename, nxpe, nype):
+    """
+    Uses the name of the output file and the domain decomposition to work out
+    whether this dataset contains boundary cells, and on which side.
+
+    Uses knowledge that BOUT names its output files as /folder/prefix.num.nc,
+    with a numbering scheme
+    num = nxpe*i + j, where i={0, ..., nype}, j={0, ..., nxpe}
+    """
+
+    *prefix, filenum, extension = Path(filename).suffixes
+    filenum = int(filenum.replace('.', ''))
+
+    lower_boundaries, upper_boundaries = {}, {}
+
+    lower_boundaries['x'] = filenum % nxpe == 0
+    upper_boundaries['x'] = filenum % nxpe == nxpe-1
+
+    lower_boundaries['y'] = filenum < nxpe
+    upper_boundaries['y'] = filenum >= (nype-1)*nxpe
+
+    return lower_boundaries, upper_boundaries
+
+
 def _strip_metadata(ds):
     """
     Extract the metadata (nxpe, myg etc.) from the Dataset.

diff --git a/xbout/tests/test_boutdataset.py b/xbout/tests/test_boutdataset.py
@@ -20,15 +20,18 @@ class TestBoutDatasetIsXarrayDataset:
 
     def test_concat(self, tmpdir_factory, bout_xyt_example_files):
         path1 = bout_xyt_example_files(tmpdir_factory, nxpe=3, nype=4, nt=1)
-        bd1 = open_boutdataset(datapath=path1, inputfilepath=None)
+        bd1 = open_boutdataset(datapath=path1, inputfilepath=None,
+                               keep_xboundaries=False)
         path2 = bout_xyt_example_files(tmpdir_factory, nxpe=3, nype=4, nt=1)
-        bd2 = open_boutdataset(datapath=path2, inputfilepath=None)
+        bd2 = open_boutdataset(datapath=path2, inputfilepath=None,
+                               keep_xboundaries=False)
         result = concat([bd1, bd2], dim='run')
         assert result.dims == {**bd1.dims, 'run': 2}
 
     def test_isel(self, tmpdir_factory, bout_xyt_example_files):
         path = bout_xyt_example_files(tmpdir_factory, nxpe=1, nype=1, nt=1)
-        bd = open_boutdataset(datapath=path, inputfilepath=None)
+        bd = open_boutdataset(datapath=path, inputfilepath=None,
+                              keep_xboundaries=False)
         actual = bd.isel(x=slice(None,None,2))
         expected = bd.bout.data.isel(x=slice(None,None,2))
         xrt.assert_equal(actual, expected)