diff --git a/.circleci/config.yml b/.circleci/config.yml
index d95cac9fa1..75044102ac 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -2,6 +2,10 @@ defaults: &defaults
   steps:
     - checkout:
         path: source
+    - run:
+        # https://discuss.circleci.com/t/circle-working-directory-doesnt-expand/17007
+        name: "Fix CIRCLE_WORKING_DIRECTORY"
+        command: echo 'CIRCLE_WORKING_DIRECTORY="${CIRCLE_WORKING_DIRECTORY/#\~/$HOME}"' >> $BASH_ENV
     - run:
         name: CDash
         command: bash source/scripts/ci/circle/postCDashStatus.sh
diff --git a/.github/ISSUE_TEMPLATE/new_release.md b/.github/ISSUE_TEMPLATE/new_release.md
index a60870a8a8..809132eebc 100644
--- a/.github/ISSUE_TEMPLATE/new_release.md
+++ b/.github/ISSUE_TEMPLATE/new_release.md
@@ -33,7 +33,7 @@ git push
 - [ ] Create PR (BASE to master if release_@MAJOR@@MINOR@ does not exists; otherwise release_@MAJOR@@MINOR@)
 - [ ] Ask for review
 - [ ] Merge PR
-- [ ] Create Tag commit `git tag -a v@VERSION@ the_merge_commit`
+- [ ] Create Tag commit `git tag -a -m 'v@VERSION' v@VERSION@ the_merge_commit`
 - [ ] Create Release in GitHub page
   - Use the following script for getting the PR of this release
     - `./scripts/developer/create-changelog.sh v@VERSION@ v@OLD_RELEASE@`
@@ -66,5 +66,8 @@ git push origin master
   - CondaForge robot should do this for you automatically, expect a new PR at
     https://github.com/conda-forge/adios2-feedstock a couple of hours after the
     release.
+- [ ] Submit a MR for ParaView Superbuild to use v@VERSION@ release.
+- [ ] Update the website to point to the v@VERSION@ release
 - [ ] Write an announcement in the ADIOS-ECP mail-list
   (https://groups.google.com/a/kitware.com/g/adios-ecp)
+
diff --git a/.github/workflows/everything.yml b/.github/workflows/everything.yml
index a2eb4a60de..ae00aaddbf 100644
--- a/.github/workflows/everything.yml
+++ b/.github/workflows/everything.yml
@@ -7,7 +7,7 @@
 # Note the use of multiple checkout actions in most jobs.  This has been
 # implemented to allow the use of CI scripts at a different ref or sha than
 # the source code they're evaluating.  For push events (when a pull_request is
-# merged) ther is no difference.  However, for pull_request events this allows
+# merged) there is no difference.  However, for pull_request events this allows
 # us test code at the head of a pull_request using the CI scripts from the
 # prospectively merged pull_request, which will include any CI updates that
 # may have made it to the target branch after the pull_request was started.
@@ -127,14 +127,17 @@ jobs:
         os: [ubuntu20.04]
         compiler: [gcc8, gcc9, gcc10, gcc11, clang6, clang10]
         shared: [shared]
-        parallel: [ompi]
+        parallel: [mpich]
         include:
           - os: ubuntu20.04
-            compiler: gcc10
-            parallel: mpich
+            compiler: gcc8
+            parallel: ompi
           - os: ubuntu20.04
             compiler: gcc8
             parallel: serial
+          - os: ubuntu20.04
+            compiler: clang6
+            parallel: ompi
           - os: ubuntu20.04
             compiler: clang6
             parallel: serial
@@ -215,7 +218,7 @@ jobs:
       matrix:
         os: [el8]
         compiler: [icc, oneapi]
-        parallel: [ompi]
+        parallel: [mpich]
 
     steps:
       - uses: actions/checkout@v4
@@ -448,8 +451,9 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        code: [lammps, tau]
+        code: [examples, lammps, tau]
         include:
+          - code: examples
           - code: lammps
             repo: pnorbert/lammps
             ref: fix-deprecated-adios-init
diff --git a/.gitlab/config/generate_pipelines.py b/.gitlab/config/generate_pipelines.py
index 15a70cfde3..bb8528ccf1 100755
--- a/.gitlab/config/generate_pipelines.py
+++ b/.gitlab/config/generate_pipelines.py
@@ -18,9 +18,43 @@
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 
 
-def request_as_dict(url):
-    r = requests.get(url + '?per_page=100', verify=False)
-    return r.json()
+class skip_after_n_successes:
+    def __init__(self, default_value, n):
+        self.runs_max = n
+        self.runs_current = 0
+        self.default_value = default_value
+
+    def __call__(self, fn, *args, **kwargs):
+        if self.runs_current >= self.runs_max:
+            return self.default_value
+
+        ret = fn(*args, **kwargs)
+        if ret:
+            self.runs_current += 1
+        return ret
+
+
+def http_get_request(*args, **kwargs):
+    kwargs['verify'] = False
+    return requests.get(*args, **kwargs)
+
+
+def request_as_list(url, *args, **kwargs):
+    current_url = url
+    body_json = []
+    while current_url:
+        response = http_get_request(current_url, *args, **kwargs)
+        body_json += response.json()
+
+        header = response.headers
+        current_url = None
+        if 'link' in header:
+            links = re.search(
+                r'(?<=\<)([\S]*)(?=>; rel="next")', header['link'], flags=re.IGNORECASE)
+            if links is not None:
+                current_url = links.group(0)
+
+    return body_json
 
 
 def add_timestamp(branch):
@@ -44,7 +78,12 @@ def has_no_status(branch):
         gh_commit_sha = branch['commit']['parent_ids'][1]
 
     # Query GitHub for the status of this commit
-    commit = request_as_dict(gh_url + '/commits/' + gh_commit_sha + '/status')
+    response = http_get_request(
+        gh_url + '/commits/' + gh_commit_sha + '/status')
+    if int(response.headers['x-ratelimit-remaining']) <= 0:
+        raise ConnectionError(response.json())
+
+    commit = response.json()
     if commit is None or 'sha' not in commit:
         return False
 
@@ -88,14 +127,15 @@ def has_no_status(branch):
 with open(args.template_file, 'r') as fd:
     template_str = fd.read()
 
-    branches = request_as_dict(gl_url + '/repository/branches')
-    branches = map(add_timestamp, branches)
-    branches = filter(is_recent, branches)
-    branches = filter(has_no_status, branches)
-
-    # Select the arg.max most least recent branches
+    branches = request_as_list(gl_url + '/repository/branches')
+    branches = [add_timestamp(branch) for branch in branches]
+    branches = [b for b in branches if is_recent(b)]
     branches = sorted(branches, key=lambda x: x['dt'])
-    branches = itertools.islice(branches, args.max)
+
+    # Skip running (and return true) has_no_status after returning True args.max times.
+    # We need this not to hog the Github Rest API draconian ratelimit.
+    run_n_times = skip_after_n_successes(default_value=False, n=args.max)
+    branches = [b for b in branches if run_n_times(has_no_status, b)]
 
     for branch in branches:
         print(template_str.format(
diff --git a/.shellcheck_exclude_paths b/.shellcheck_exclude_paths
index 1ef95d56a2..873e54f928 100644
--- a/.shellcheck_exclude_paths
+++ b/.shellcheck_exclude_paths
@@ -1,9 +1,3 @@
-scripts/ci/circle/postCDashStatus.sh
-scripts/ci/circle/run.sh
-scripts/ci/gh-actions/check-branch-name.sh
-scripts/ci/gh-actions/get-changed-files.sh
-scripts/ci/gh-actions/macos-setup.sh
-scripts/ci/gh-actions/run.sh
 scripts/ci/scripts/github-prs-to-gitlab.sh
 scripts/ci/scripts/run-clang-format.sh
 scripts/ci/scripts/run-flake8.sh
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ebb992c1f4..1a1b21d87c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -171,6 +171,7 @@ adios_option(Endian_Reverse "Enable support for Little/Big Endian Interoperabili
 adios_option(Sodium     "Enable support for Sodium for encryption" AUTO)
 adios_option(Catalyst   "Enable support for in situ visualization plugin using ParaView Catalyst" AUTO)
 adios_option(AWSSDK     "Enable support for S3 compatible storage using AWS SDK's S3 module" OFF)
+adios_option(Derived_Variable    "Enable support for derived variables" OFF)
 include(${PROJECT_SOURCE_DIR}/cmake/DetectOptions.cmake)
 
 if(ADIOS2_HAVE_CUDA OR ADIOS2_HAVE_Kokkos_CUDA)
@@ -187,7 +188,7 @@ if(ADIOS2_HAVE_CUDA OR ADIOS2_HAVE_Kokkos_CUDA)
 endif()
 
 if(NOT DEFINED CMAKE_HIP_ARCHITECTURES AND DEFINED Kokkos_HIP_ARCHITECTURES)
-    set(CMAKE_HIP_ARCHITECTURES Kokkos_HIP_ARCHITECTURES)
+    set(CMAKE_HIP_ARCHITECTURES ${Kokkos_HIP_ARCHITECTURES})
 endif()
 
 if(ADIOS2_HAVE_MPI)
@@ -243,8 +244,8 @@ endif()
 set(ADIOS2_CONFIG_OPTS
     DataMan DataSpaces HDF5 HDF5_VOL MHS SST Fortran MPI Python Blosc2 BZip2
     LIBPRESSIO MGARD PNG SZ ZFP DAOS IME O_DIRECT Sodium Catalyst SysVShMem UCX
-    ZeroMQ Profiling Endian_Reverse AWSSDK GPU_Support CUDA Kokkos Kokkos_CUDA
-    Kokkos_HIP Kokkos_SYCL
+    ZeroMQ Profiling Endian_Reverse Derived_Variable AWSSDK GPU_Support CUDA Kokkos
+    Kokkos_CUDA Kokkos_HIP Kokkos_SYCL
 )
 
 GenerateADIOSHeaderConfig(${ADIOS2_CONFIG_OPTS})
diff --git a/CTestConfig.cmake b/CTestConfig.cmake
index 67ef8c43ee..49d6e3826c 100644
--- a/CTestConfig.cmake
+++ b/CTestConfig.cmake
@@ -11,3 +11,178 @@ set(CTEST_DROP_SITE "open.cdash.org")
 set(CTEST_DROP_LOCATION "/submit.php?project=ADIOS")
 set(CTEST_DROP_SITE_CDASH TRUE)
 set(MEMORYCHECK_SUPPRESSIONS_FILE ${CMAKE_SOURCE_DIR}/scripts/dashboard/nightly/valgrind-suppressions.txt)
+
+# Ignore tests that are currently failing, remove tests here as they are fixed
+list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE
+  Bindings.C.ADIOS2_C_API.ADIOS2BPWriteTypes.Serial
+  Bindings.C.BPWriteReadMultiblockCC.ZeroSizeBlocks.Serial
+  Engine.BP.*/BPAppendAfterStepsP.Test/*.BP5.Serial
+  Engine.BP.*/BPChangingShapeWithinStep.MultiBlock/*.BP5.Serial
+  Engine.BP.*/BPParameterSelectStepsP.Read/*.BP5.Serial
+  Engine.BP.*/BPReadMultithreadedTestP.ReadFile/*.BP5.Serial
+  Engine.BP.*/BPStepsFileGlobalArrayParameters.EveryOtherStep/*.BP5.Serial
+  Engine.BP.*/BPStepsFileGlobalArrayReaders.EveryStep/*.BP5.Serial
+  Engine.BP.*/BPStepsFileGlobalArrayReaders.NewVarPerStep/*.BP5.Serial
+  Engine.BP.*/BPStepsFileLocalArrayParameters.EveryOtherStep/*.BP5.Serial
+  Engine.BP.*/BPStepsFileLocalArrayReaders.EveryStep/*.BP5.Serial
+  Engine.BP.*/BPStepsFileLocalArrayReaders.NewVarPerStep/*.BP5.Serial
+  Engine.BP.BPChangingShape.BPWriteReadShape2D.BP5.Serial
+  Engine.BP.BPLargeMetadata.ManyLongStrings.BP5.Serial
+  Engine.BP.BPWriteAppendReadTestADIOS2.ADIOS2BPWriteAppendRead2D2x4.BP5.Serial
+  Engine.BP.BPWriteAppendReadTestADIOS2.ADIOS2BPWriteAppendReadAggregate.BP5.Serial
+  Engine.BP.BPWriteAppendReadTestADIOS2.ADIOS2BPWriteAppendReadVaryingAggregation.BP5.Serial
+  Engine.BP.BPWriteMultiblockReadTest.ADIOS2BPWriteMultiblockRead1D8.BP5.Serial
+  Engine.BP.BPWriteMultiblockReadTest.ADIOS2BPWriteMultiblockRead2D2x4.BP5.Serial
+  Engine.BP.BPWriteMultiblockReadTest.ADIOS2BPWriteMultiblockRead2D4x2.BP5.Serial
+  Engine.BP.BPWriteMultiblockReadTest.ADIOS2BPWriteRead1D8ZeroBlock.BP5.Serial
+  Engine.BP.BPWriteReadAsStreamTestADIOS2.ReaderWriterDefineVariable.BP5.Serial
+  Engine.BP.BPWriteReadAttributes.BPWriteReadSingleTypesVar.BP5.Serial
+  Engine.BP.BPWriteReadAttributes.WriteReadArrayTypes.BP5.Serial
+  Engine.BP.BPWriteReadAttributes.WriteReadArrayTypesVar.BP5.Serial
+  Engine.BP.BPWriteReadAttributes.WriteReadSingleTypes.BP5.Serial
+  Engine.BP.BPWriteReadAttributes.WriteReadStreamVarp.BP5.Serial
+  Engine.BP.BPWriteReadAttributeTestMultirank.ADIOS2BPWriteReadArrayTypes.BP5.Serial
+  Engine.BP.BPWriteReadBlockInfo.BPWriteReadBlockInfo1D8_C.BP3.Serial
+  Engine.BP.BPWriteReadBlockInfo.BPWriteReadBlockInfo1D8_C.BP4.Serial
+  Engine.BP.BPWriteReadBlockInfo.BPWriteReadBlockInfo1D8_C.BP5.Serial
+  Engine.BP.BPWriteReadBlockInfo.BPWriteReadBlockInfo1D8.BP5.Serial
+  Engine.BP.BPWriteReadBlockInfo.BPWriteReadBlockInfo2D2x4.BP5.Serial
+  Engine.BP.BPWriteReadLocalVariables.ADIOS2BPWriteReadLocal1DAllSteps.BP5.Serial
+  Engine.BP.BPWriteReadLocalVariables.ADIOS2BPWriteReadLocal1DBlockInfo.BP5.Serial
+  Engine.BP.BPWriteReadLocalVariables.ADIOS2BPWriteReadLocal2DChangeCount.BP3.Serial
+  Engine.BP.BPWriteReadLocalVariables.ADIOS2BPWriteReadLocal2DChangeCount.BP4.Serial
+  Engine.BP.BPWriteReadLocalVariables.ADIOS2BPWriteReadLocal2DChangeCount.BP5.Serial
+  Engine.BP.BPWriteReadLocalVariablesSel.BPWriteReadLocal1DAllStepsSel.BP5.Serial
+  Engine.BP.BPWriteReadLocalVariablesSelHighLevel.BPWriteReadLocal1DAllStepsSel.BP5.Serial
+  Engine.BP.BPWriteReadMultiblockTest.ADIOS2BPWriteReadMultiblock1D8.BP5.Serial
+  Engine.BP.BPWriteReadMultiblockTest.ADIOS2BPWriteReadMultiblock2D2x4.BP5.Serial
+  Engine.BP.BPWriteReadMultiblockTest.ADIOS2BPWriteReadMultiblock2D4x2.BP5.Serial
+  Engine.BP.BPWriteReadMultiblockTest.MultiblockNullBlocks.BP5.Serial
+  Engine.BP.BPWriteReadMultiblockTest.MultiblockPerformDataWrite.BP5.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead10D2x2.Async.BP5.EWS.Guided.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead10D2x2.Async.BP5.EWS.Naive.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead10D2x2.Async.BP5.TLS.Guided.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead10D2x2.Async.BP5.TLS.Naive.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead10D2x2.BP5.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead1D8.Async.BP5.EWS.Guided.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead1D8.Async.BP5.EWS.Naive.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead1D8.Async.BP5.TLS.Guided.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead1D8.Async.BP5.TLS.Naive.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead1D8.BP5.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead2D2x4.Async.BP5.EWS.Guided.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead2D2x4.Async.BP5.EWS.Naive.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead2D2x4.Async.BP5.TLS.Guided.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead2D2x4.Async.BP5.TLS.Naive.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead2D2x4.BP5.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead2D4x2_MultiStepsOverflow.Async.BP5.EWS.Guided.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead2D4x2_MultiStepsOverflow.Async.BP5.EWS.Naive.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead2D4x2_MultiStepsOverflow.Async.BP5.TLS.Guided.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead2D4x2_MultiStepsOverflow.Async.BP5.TLS.Naive.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead2D4x2_MultiStepsOverflow.BP5.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead2D4x2_ReadMultiSteps.Async.BP5.EWS.Guided.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead2D4x2_ReadMultiSteps.Async.BP5.EWS.Naive.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead2D4x2_ReadMultiSteps.Async.BP5.TLS.Guided.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead2D4x2_ReadMultiSteps.Async.BP5.TLS.Naive.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead2D4x2_ReadMultiSteps.BP5.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead2D4x2.Async.BP5.EWS.Guided.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead2D4x2.Async.BP5.EWS.Naive.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead2D4x2.Async.BP5.TLS.Guided.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead2D4x2.Async.BP5.TLS.Naive.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ADIOS2BPWriteRead2D4x2.BP5.Serial
+  Engine.BP.BPWriteReadTestADIOS2.GetDeferredInClose.Async.BP5.EWS.Guided.Serial
+  Engine.BP.BPWriteReadTestADIOS2.GetDeferredInClose.Async.BP5.EWS.Naive.Serial
+  Engine.BP.BPWriteReadTestADIOS2.GetDeferredInClose.Async.BP5.TLS.Guided.Serial
+  Engine.BP.BPWriteReadTestADIOS2.GetDeferredInClose.Async.BP5.TLS.Naive.Serial
+  Engine.BP.BPWriteReadTestADIOS2.GetDeferredInClose.BP5.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ReadStartCount.Async.BP5.EWS.Guided.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ReadStartCount.Async.BP5.EWS.Naive.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ReadStartCount.Async.BP5.TLS.Guided.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ReadStartCount.Async.BP5.TLS.Naive.Serial
+  Engine.BP.BPWriteReadTestADIOS2.ReadStartCount.BP5.Serial
+  Engine.BP.BPWriteReadTestADIOS2fstream.ADIOS2BPWriteRead1D8.BP5.Serial
+  Engine.BP.BPWriteReadTestADIOS2fstream.ADIOS2BPWriteRead2D2x4.BP5.Serial
+  Engine.BP.BPWriteReadTestADIOS2fstream.ADIOS2BPWriteRead2D4x2_MultiStepsOverflow.BP5.Serial
+  Engine.BP.BPWriteReadTestADIOS2fstream.ADIOS2BPWriteRead2D4x2_ReadMultiSteps.BP5.Serial
+  Engine.BP.BPWriteReadTestADIOS2fstream.ADIOS2BPWriteRead2D4x2.BP5.Serial
+  Engine.BP.BPWriteReadTestADIOS2stdio.ADIOS2BPWriteRead1D8.BP5.Serial
+  Engine.BP.BPWriteReadTestADIOS2stdio.ADIOS2BPWriteRead2D2x4.BP5.Serial
+  Engine.BP.BPWriteReadTestADIOS2stdio.ADIOS2BPWriteRead2D4x2_MultiStepsOverflow.BP5.Serial
+  Engine.BP.BPWriteReadTestADIOS2stdio.ADIOS2BPWriteRead2D4x2_ReadMultiSteps.BP5.Serial
+  Engine.BP.BPWriteReadTestADIOS2stdio.ADIOS2BPWriteRead2D4x2.BP5.Serial
+  Engine.BP.BPWriteReadVector.ADIOS2BPWriteRead1D8.BP5.Serial
+  Engine.BP.BPWriteReadVector.ADIOS2BPWriteRead2D2x4.BP5.Serial
+  Engine.BP.BPWriteReadVector.ADIOS2BPWriteRead2D4x2.BP5.Serial
+  Engine.BP.BPWriteReadVector.ADIOS2BPWriteReadVector2D4x2_MultiSteps.BP5.Serial
+  Engine.BPEngineTest.ZfpComplex.Serial
+  Engine.SST.SstWriteFails.InvalidBeginStep.Serial
+  Engine.SST.SstWriteFails.InvalidPut.Serial
+  Engine.Staging.TestThreads.Basic.SST.BP.Serial
+  Engine.Staging.TestThreads.Basic.SST.FFS.Serial
+  Interface.ADIOS2_CXX11_API_Selection.MemorySelectionComplex.BPfile.Serial
+  Interface.ADIOS2_CXX11_API_Selection.MemorySelectionReadStart.BPfile.Serial
+  Remote.BPWriteMemorySelectionRead.FileRemote
+  Remote.BPWriteMemorySelectionRead.GetRemote
+  Remote.BPWriteReadADIOS2stdio.GetRemote
+  remoteServerCleanup
+  remoteServerSetup
+  Staging.1x1.Attrs.BP5
+  Staging.1x1.Attrs.CommMin.BP.SST
+  Staging.1x1.Attrs.CommMin.BP5.SST
+  Staging.1x1.CommMin.BP.SST
+  Staging.1x1.CommMin.BP5.SST
+  Staging.1x1.ForcePreload.CommMin.BP.SST
+  Staging.1x1.ForcePreload.CommMin.BP5.SST
+  Staging.1x1.Local2.CommMin.BP.SST
+  Staging.1x1.Local2.CommMin.BP5.SST
+  Staging.1x1.LocalMultiblock.CommMin.BP.SST
+  Staging.1x1.LocalMultiblock.CommMin.BP5.SST
+  Staging.1x1.ModAttrs.BP5
+  Staging.1x1.ModAttrs.CommMin.BP.SST
+  Staging.1x1.ModAttrs.CommMin.BP5.SST
+  Staging.1x1.NoPreload.CommMin.BP.SST
+  Staging.1x1.NoPreload.CommMin.BP5.SST
+  Staging.1x1.SstRUDP.CommMin.BP.SST
+  Staging.1x1.SstRUDP.CommMin.BP5.SST
+  Staging.1x1Joined.BP5
+  Staging.1x1Joined.CommMin.BP5.SST
+  Staging.1x1LockGeometry.CommMin.BP.SST
+  Staging.1x1LockGeometry.CommMin.BP5.SST
+  Staging.1x1Struct.BP5
+  Staging.1x1Struct.CommMin.BP5.SST
+  Staging.1x1VarDestruction.CommMin.BP.SST
+  Staging.1x1VarDestruction.CommMin.BP5.SST
+  Staging.AllToAllDistribution.1x1x3.CommMin.BP.SST
+  Staging.AllToAllDistribution.1x1x3.CommMin.BP5.SST
+  Staging.DiscardWriter.1x1.CommMin.BP.SST
+  Staging.DiscardWriter.1x1.CommMin.BP5.SST
+  Staging.LatestReader.1x1.CommMin.BP.SST
+  Staging.LatestReader.1x1.CommMin.BP5.SST
+  Staging.LatestReaderHold.1x1.CommMin.BP.SST
+  Staging.LatestReaderHold.1x1.CommMin.BP5.SST
+  Staging.OnDemandSingle.1x1.CommMin.BP.SST
+  Staging.OnDemandSingle.1x1.CommMin.BP5.SST
+  Staging.RoundRobinDistribution.1x1x3.CommMin.BP.SST
+  Staging.RoundRobinDistribution.1x1x3.CommMin.BP5.SST
+  Staging.TimeoutReader.1x1.CommMin.BP.SST
+  Staging.TimeoutReader.1x1.CommMin.BP5.SST
+  Staging.WriteMemorySelectionRead.1x1.CommMin.BP.SST
+  Staging.WriteMemorySelectionRead.1x1.CommMin.BP5.SST
+  Staging.ZFPCompression.1x1.CommMin.BP.SST
+  Staging.ZFPCompression.1x1.CommMin.BP5.SST
+  Staging.ZFPCompression.3x5.CommMin.BP.SST
+  Staging.ZFPCompression.3x5.CommMin.BP5.SST
+  Utils.ChangingShape.AlternatingStepsAndChangingShapeVar.Dump
+  Utils.ChangingShape.AlternatingStepsVarSelection.Dump
+  Utils.ChangingShape.ChangingShapeVarOneStep.Dump
+  Utils.ChangingShape.Dump
+  Utils.ChangingShape.FixedShapeVarTooManySteps.Dump
+  Utils.ChangingShape.Screen
+  Utils.CWriter
+  Utils.CWriter.Bpls.Al.Dump
+  Utils.CWriter.Bpls.h.Dump
+  Utils.CWriter.Bpls.la.Dump
+  Utils.CWriter.Bpls.la.Screen
+  Utils.CWriter.Bpls.ldDav.Dump
+  Utils.CWriter.Bpls.ldDavvv.Dump
+  Utils.CWriter.Bpls.ldvarI16.Dump
+)
diff --git a/ReadMe.md b/ReadMe.md
index 3998d14f9b..9e73588a36 100644
--- a/ReadMe.md
+++ b/ReadMe.md
@@ -70,7 +70,7 @@ Once ADIOS2 is installed refer to:
 
 ## Releases
 
-* Latest release: [v2.9.0](https://github.com/ornladios/ADIOS2/releases/tag/v2.9.0)
+* Latest release: [v2.9.2](https://github.com/ornladios/ADIOS2/releases/tag/v2.9.2)
 
 * Previous releases: [https://github.com/ornladios/ADIOS2/releases](https://github.com/ornladios/ADIOS2/releases)
 
diff --git a/bindings/C/adios2/c/adios2_c_adios.h b/bindings/C/adios2/c/adios2_c_adios.h
index b5044b0447..147cb8ab6a 100644
--- a/bindings/C/adios2/c/adios2_c_adios.h
+++ b/bindings/C/adios2/c/adios2_c_adios.h
@@ -46,7 +46,7 @@ adios2_adios *adios2_init_config_mpi(const char *config_file, MPI_Comm comm);
 
 #else
 #define adios2_init() adios2_init_serial()
-#define adios2_init_config(config_file) adios2_init_config_seria(config_file)
+#define adios2_init_config(config_file) adios2_init_config_serial(config_file)
 #endif
 
 /**
diff --git a/bindings/CXX11/CMakeLists.txt b/bindings/CXX11/CMakeLists.txt
index 861764313b..76cf76120b 100644
--- a/bindings/CXX11/CMakeLists.txt
+++ b/bindings/CXX11/CMakeLists.txt
@@ -37,6 +37,12 @@ target_include_directories(adios2_cxx11
 
 add_library(adios2::cxx11 ALIAS adios2_cxx11)
 
+if (ADIOS2_HAVE_Derived_Variable)
+    target_sources(adios2_cxx11 PRIVATE
+      adios2/cxx11/VariableDerived.cpp
+  )
+endif()
+
 if(ADIOS2_HAVE_MPI)
   add_library(adios2_cxx11_mpi
     adios2/cxx11/ADIOSMPI.cpp
@@ -79,6 +85,14 @@ install(
   COMPONENT adios2_cxx11-development
 )
 
+if (ADIOS2_HAVE_Derived_Variable)
+    install(
+        FILES adios2/cxx11/VariableDerived.h
+        DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/adios2/cxx11
+        COMPONENT adios2_cxx11-development
+    )
+endif()
+
 install(
   FILES adios2/cxx11/ADIOS.h
         adios2/cxx11/ADIOS.inl
diff --git a/bindings/CXX11/adios2/cxx11/Engine.h b/bindings/CXX11/adios2/cxx11/Engine.h
index 533912758a..2d5a1de68a 100644
--- a/bindings/CXX11/adios2/cxx11/Engine.h
+++ b/bindings/CXX11/adios2/cxx11/Engine.h
@@ -212,11 +212,12 @@ class Engine
     void Put(Variable<T> variable, U const &data, const Mode launch = Mode::Deferred)
     {
         auto bufferView = static_cast<AdiosView<U>>(data);
-#ifdef ADIOS2_HAVE_GPU_SUPPORT
         auto bufferMem = bufferView.memory_space();
+#ifdef ADIOS2_HAVE_GPU_SUPPORT
         auto variableMem = variable.GetMemorySpace();
         CheckMemorySpace(variableMem, bufferMem);
 #endif
+        variable.SetMemorySpace(bufferMem);
         Put(variable, bufferView.data(), launch);
     }
 
diff --git a/bindings/CXX11/adios2/cxx11/IO.cpp b/bindings/CXX11/adios2/cxx11/IO.cpp
index 8018c06d77..e8edb54528 100644
--- a/bindings/CXX11/adios2/cxx11/IO.cpp
+++ b/bindings/CXX11/adios2/cxx11/IO.cpp
@@ -179,6 +179,16 @@ VariableNT IO::DefineVariable(const DataType type, const std::string &name, cons
     }
 }
 
+#ifdef ADIOS2_HAVE_DERIVED_VARIABLE
+VariableDerived IO::DefineDerivedVariable(const std::string &name, const std::string &expression,
+                                          const DerivedVarType varType)
+{
+    helper::CheckForNullptr(m_IO,
+                            "for variable name " + name + ", in call to IO::DefineDerivedVariable");
+
+    return VariableDerived(&m_IO->DefineDerivedVariable(name, expression, varType));
+}
+#endif
 StructDefinition IO::DefineStruct(const std::string &name, const size_t size)
 {
     helper::CheckForNullptr(m_IO, "for struct name " + name + ", in call to IO::DefineStruct");
diff --git a/bindings/CXX11/adios2/cxx11/IO.h b/bindings/CXX11/adios2/cxx11/IO.h
index 1702c769f4..2b599e3695 100644
--- a/bindings/CXX11/adios2/cxx11/IO.h
+++ b/bindings/CXX11/adios2/cxx11/IO.h
@@ -20,6 +20,9 @@
 #include "Group.h"
 #include "Operator.h"
 #include "Variable.h"
+#ifdef ADIOS2_HAVE_DERIVED_VARIABLE
+#include "VariableDerived.h"
+#endif
 #include "VariableNT.h"
 #include "adios2/common/ADIOSMacros.h"
 #include "adios2/common/ADIOSTypes.h"
@@ -151,7 +154,11 @@ class IO
     Variable<T> DefineVariable(const std::string &name, const Dims &shape = Dims(),
                                const Dims &start = Dims(), const Dims &count = Dims(),
                                const bool constantDims = false);
-
+#ifdef ADIOS2_HAVE_DERIVED_VARIABLE
+    VariableDerived
+    DefineDerivedVariable(const std::string &name, const std::string &expression,
+                          const DerivedVarType varType = DerivedVarType::MetadataOnly);
+#endif
     VariableNT DefineVariable(const DataType type, const std::string &name,
                               const Dims &shape = Dims(), const Dims &start = Dims(),
                               const Dims &count = Dims(), const bool constantDims = false);
diff --git a/bindings/CXX11/adios2/cxx11/VariableDerived.cpp b/bindings/CXX11/adios2/cxx11/VariableDerived.cpp
new file mode 100644
index 0000000000..ce6a4dc0e5
--- /dev/null
+++ b/bindings/CXX11/adios2/cxx11/VariableDerived.cpp
@@ -0,0 +1,8 @@
+#include "VariableDerived.h"
+
+#include "adios2/core/VariableDerived.h"
+
+namespace adios2
+{
+VariableDerived::VariableDerived(core::VariableDerived *variable) : m_VariableDerived(variable) {}
+} // end namespace adios2
diff --git a/bindings/CXX11/adios2/cxx11/VariableDerived.h b/bindings/CXX11/adios2/cxx11/VariableDerived.h
new file mode 100644
index 0000000000..cc69273c2e
--- /dev/null
+++ b/bindings/CXX11/adios2/cxx11/VariableDerived.h
@@ -0,0 +1,43 @@
+#ifndef ADIOS2_BINDINGS_CXX11_VARIABLE_DERIVED_H_
+#define ADIOS2_BINDINGS_CXX11_VARIABLE_DERIVED_H_
+
+#include "Operator.h"
+#include "adios2/common/ADIOSTypes.h"
+
+namespace adios2
+{
+
+/// \cond EXCLUDE_FROM_DOXYGEN
+// forward declare
+class IO; // friend
+namespace core
+{
+
+class VariableDerived; // private implementation
+}
+/// \endcond
+
+class VariableDerived
+{
+    friend class IO;
+
+public:
+    /**
+     * Empty (default) constructor, use it as a placeholder for future
+     * variables from IO:DefineVariableDerived<T> or IO:InquireVariableDerived<T>.
+     * Can be used with STL containers.
+     */
+    VariableDerived() = default;
+
+    /** Default, using RAII STL containers */
+    ~VariableDerived() = default;
+
+private:
+    core::VariableDerived *m_VariableDerived = nullptr;
+
+    VariableDerived(core::VariableDerived *variable);
+};
+
+} // end namespace adios2
+
+#endif // ADIOS2_BINDINGS_CXX11_VARIABLE_DERIVED_H_
diff --git a/bindings/Matlab/Makefile b/bindings/Matlab/Makefile
index 09978c32b8..aaaebf3a0e 100644
--- a/bindings/Matlab/Makefile
+++ b/bindings/Matlab/Makefile
@@ -18,6 +18,16 @@ MEXLIBS="LDFLAGS=${ADIOS_LIBS}"
 ADIOS_INC=-I${ADIOS_DIR}/include
 ADIOS_LIBS=`${ADIOS_DIR}/bin/adios2-config --c-libs`
 
+### MacOS - example using homebrew installed ADIOS2 and Xcode 15 clang
+###      1) Install homebrew (https://brew.sh/) and Xcode (App Store)
+###      2) brew install adios2
+###      OR
+###      2) Compile Adios2 from scratch and update ADIOS_DIR below to match install directory
+#ADIOS_DIR=/opt/homebrew/opt/adios2
+#ADIOS_INC=-I${ADIOS_DIR}/include
+#ADIOS_LIBS=-Wl,-rpath,${ADIOS_DIR}/lib -shared -L${ADIOS_DIR}/lib -ladios2_c -ladios2_core
+#MEXLIBS="LDFLAGS=${ADIOS_LIBS}"
+
 
 MEXOPTS=-largeArrayDims -DDEBUG CFLAGS="-g -std=c99 -fPIC -O0"
 default:
diff --git a/bindings/Matlab/adiosopenc.c b/bindings/Matlab/adiosopenc.c
index 95dc6d41b3..07c4188d36 100644
--- a/bindings/Matlab/adiosopenc.c
+++ b/bindings/Matlab/adiosopenc.c
@@ -140,9 +140,9 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
 
     /********************************************************/
     /* Open ADIOS file now and get variables and attributes */
-    adiosobj = adios2_init(false);
+    adiosobj = adios2_init_serial();
     group = adios2_declare_io(adiosobj, "matlabiogroup"); // name is arbitrary
-    fp = adios2_open(group, fname, adios2_mode_read);
+    fp = adios2_open(group, fname, adios2_mode_readRandomAccess);
     if (fp == NULL)
     {
         mexErrMsgIdAndTxt("MATLAB:adiosopenc:open", "Opening the file failed\n");
diff --git a/cmake/DetectOptions.cmake b/cmake/DetectOptions.cmake
index 3f511e02a8..d66e096bda 100644
--- a/cmake/DetectOptions.cmake
+++ b/cmake/DetectOptions.cmake
@@ -67,6 +67,9 @@ function(lists_get_prefix listVars outVar)
   set(${outVar} "${prefix}" PARENT_SCOPE)
 endfunction()
 
+# Multithreading
+find_package(Threads REQUIRED)
+
 # Blosc2
 if(ADIOS2_USE_Blosc2 STREQUAL AUTO)
   # Prefect CONFIG mode
@@ -193,6 +196,10 @@ endif()
 
 set(mpi_find_components C)
 
+if(ADIOS2_USE_Derived_Variable)
+    set(ADIOS2_HAVE_Derived_Variable TRUE)
+endif()
+
 if(ADIOS2_USE_Kokkos AND ADIOS2_USE_CUDA)
   message(FATAL_ERROR "ADIOS2_USE_Kokkos is incompatible with ADIOS2_USE_CUDA")
 endif()
@@ -443,6 +450,14 @@ if(ADIOS2_USE_SST AND NOT WIN32)
     if(CrayDRC_FOUND)
       set(ADIOS2_SST_HAVE_CRAY_DRC TRUE)
     endif()
+
+    try_compile(ADIOS2_SST_HAVE_CRAY_CXI
+      ${ADIOS2_BINARY_DIR}/check_libfabric_cxi
+      ${ADIOS2_SOURCE_DIR}/cmake/check_libfabric_cxi.c
+      CMAKE_FLAGS
+        "-DINCLUDE_DIRECTORIES=${LIBFABRIC_INCLUDE_DIRS}"
+        "-DLINK_DIRECTORIES=${LIBFABRIC_LIBRARIES}")
+    message(STATUS "Libfabric support for the HPE CXI provider: ${ADIOS2_SST_HAVE_CRAY_CXI}")
   endif()
   if(ADIOS2_HAVE_MPI)
     set(CMAKE_REQUIRED_LIBRARIES "MPI::MPI_C;Threads::Threads")
@@ -554,9 +569,6 @@ if(AWSSDK_FOUND)
     set(ADIOS2_HAVE_AWSSDK TRUE)
 endif()
 
-# Multithreading
-find_package(Threads REQUIRED)
-
 # Floating point detection
 include(CheckTypeRepresentation)
 
diff --git a/cmake/check_libfabric_cxi.c b/cmake/check_libfabric_cxi.c
new file mode 100644
index 0000000000..c2f2c80878
--- /dev/null
+++ b/cmake/check_libfabric_cxi.c
@@ -0,0 +1,5 @@
+#include <stdbool.h>
+#include <rdma/fabric.h>
+#include <rdma/fi_cxi_ext.h>
+
+int main() {}
diff --git a/docs/user_guide/source/advanced/ecp_hardware.rst b/docs/user_guide/source/advanced/ecp_hardware.rst
index 5f79efb49b..37e410fa2f 100644
--- a/docs/user_guide/source/advanced/ecp_hardware.rst
+++ b/docs/user_guide/source/advanced/ecp_hardware.rst
@@ -33,27 +33,29 @@ your tests.
 Examples of launching ADIOS2 SST unit tests using MPI DP:
 
 .. code-block:: bash
- # We omit some of the srun (SLURM) arguments which are specific of the project
- # you are working on. Note that you could avoid calling srun directly by
- # setting the CMAKE variable `MPIEXEC_EXECUTABLE`.
 
- # Launch simple writer test instance
- srun {PROJFLAGS }-N 1 /gpfs/alpine/proj-shared/csc331/vbolea/ADIOS2-build/bin/TestCommonWrite SST mpi_dp_test CPCommPattern=Min,MarshalMethod=BP5'
+  # We omit some of the srun (SLURM) arguments which are specific of the project
+  # you are working on. Note that you could avoid calling srun directly by
+  # setting the CMAKE variable `MPIEXEC_EXECUTABLE`.
 
- # On another terminal launch multiple instances of the Reader test
- srun {PROJFLAGS} -N 2 /gpfs/alpine/proj-shared/csc331/vbolea/ADIOS2-build/bin/TestCommonRead SST mpi_dp_test
+  # Launch simple writer test instance
+  srun {PROJFLAGS } -N 1 /gpfs/alpine/proj-shared/csc331/vbolea/ADIOS2-build/bin/TestCommonWrite SST mpi_dp_test CPCommPattern=Min,MarshalMethod=BP5
+
+  # On another terminal launch multiple instances of the Reader test
+  srun {PROJFLAGS} -N 2 /gpfs/alpine/proj-shared/csc331/vbolea/ADIOS2-build/bin/TestCommonRead SST mpi_dp_test
 
 Alternatively, you can configure your CMake build to use srun directly:
 
 .. code-block:: bash
- cmake . -DMPIEXEC_EXECUTABLE:FILEPATH="/usr/bin/srun" \
-         -DMPIEXEC_EXTRA_FLAGS:STRING="-A{YourProject} -pbatch -t10" \
-         -DMPIEXEC_NUMPROC_FLAG:STRING="-N" \
-         -DMPIEXEC_MAX_NUMPROCS:STRING="-8" \
-         -DADIOS2_RUN_MPI_MPMD_TESTS=OFF
 
- cmake --build .
- ctest
+  cmake . -DMPIEXEC_EXECUTABLE:FILEPATH="/usr/bin/srun" \
+       -DMPIEXEC_EXTRA_FLAGS:STRING="-A{YourProject} -pbatch -t10" \
+       -DMPIEXEC_NUMPROC_FLAG:STRING="-N" \
+       -DMPIEXEC_MAX_NUMPROCS:STRING="-8" \
+       -DADIOS2_RUN_MPI_MPMD_TESTS=OFF
+
+  cmake --build .
+  ctest
 
- # monitor your jobs
- watch -n1 squeue -l -u $USER
+  # monitor your jobs
+  watch -n1 squeue -l -u $USER
diff --git a/docs/user_guide/source/advanced/gpu_aware.rst b/docs/user_guide/source/advanced/gpu_aware.rst
index ee4c600180..0cf34a893d 100644
--- a/docs/user_guide/source/advanced/gpu_aware.rst
+++ b/docs/user_guide/source/advanced/gpu_aware.rst
@@ -35,7 +35,7 @@ If there is no CUDA toolkit installed, cmake will turn CUDA off automatically. A
 When building ADIOS2 with CUDA enabled, the user is responsible with setting the correct ``CMAKE_CUDA_ARCHITECTURES`` (e.g. for Summit the ``CMAKE_CUDA_ARCHITECTURES`` needs to be set to 70 to match the NVIDIA Volta V100).
 
 Building with Kokkos enabled
---------------------------
+----------------------------
 
 The Kokkos library can be used to enable GPU within ADIOS2. Based on how Kokkos is build, either the CUDA, HIP or SYCL backend will be enabled. Building with Kokkos requires ``-DADIOS2_USE_Kokkos=ON``. The ``CMAKE_CUDA_ARCHITECTURES`` is set automanically to point to the same architecture used when configuring the Kokkos library.
 
@@ -43,9 +43,9 @@ The Kokkos library can be used to enable GPU within ADIOS2. Based on how Kokkos
     Kokkos version >= 3.7 is required to enable the GPU backend in ADIOS2
 
 
-*************
+****************
 Writing GPU code
-*************
+****************
 
 The following is a simple example of writing data to storage directly from a GPU buffer allocated with CUDA relying on the automatic detection of device pointers in ADIOS2. The ADIOS2 API is identical to codes using Host buffers for both the read and write logic.
 
diff --git a/docs/user_guide/source/advanced/query.rst b/docs/user_guide/source/advanced/query.rst
index 852e0e52dc..e1a967cdfb 100644
--- a/docs/user_guide/source/advanced/query.rst
+++ b/docs/user_guide/source/advanced/query.rst
@@ -3,7 +3,7 @@ ADIOS2 query API
 #################
 
 The query API in ADIOS2 allows a client to pass a query in XML or json format,
-and get back a list of blocks or subblocks that contains hits. 
+and get back a list of blocks or sub-blocks that contains hits.
 Both BP4 and BP5 engines are supported.  
 
 
@@ -21,14 +21,14 @@ to construct a query and evaluate using the engine.
         // configFile has query, can be either xml or json
         QueryWorker(const std::string &configFile, adios2::Engine &engine);
 
-	// touched_blocks is a list of regions specified by (start, count),
-	// that contains data that satisfies the query file
+	     // touched_blocks is a list of regions specified by (start, count),
+	     // that contains data that satisfies the query file
         void GetResultCoverage(std::vector<adios2::Box<adios2::Dims>> &touched_blocks);
     ... 
     }
 
 A Sample Compound Query  
-----------------------
+-----------------------
 
 This query targets a 1D variable "doubleV", data of interest is (x  > 6.6) or (x < -0.17) or (2.8 < x < 2.9) 
 In addition, this query also specied an output region [start=5,count=80]. 
diff --git a/docs/user_guide/source/components/anatomy.rst b/docs/user_guide/source/components/anatomy.rst
new file mode 100644
index 0000000000..0832b6dc3c
--- /dev/null
+++ b/docs/user_guide/source/components/anatomy.rst
@@ -0,0 +1,136 @@
+.. _sec:basics_interface_components_anatomy:
+
+***************************
+Anatomy of an ADIOS Program
+***************************
+
+Anatomy of an ADIOS Output
+--------------------------
+
+.. code:: C++
+
+    ADIOS adios("config.xml", MPI_COMM_WORLD);
+    |
+    |   IO io = adios.DeclareIO(...);
+    |   |
+    |   |   Variable<...> var = io.DefineVariable<...>(...)
+    |   |   Attribute<...> attr = io.DefineAttribute<...>(...)
+    |   |   Engine e = io.Open("OutputFileName.bp", adios2::Mode::Write);
+    |   |   |
+    |   |   |   e.BeginStep()
+    |   |   |   |
+    |   |   |   |   e.Put(var, datapointer);
+    |   |   |   |
+    |   |   |   e.EndStep()
+    |   |   |
+    |   |   e.Close();
+    |   |
+    |   |--> IO goes out of scope
+    |
+    |--> ADIOS goes out of scope or adios2_finalize()
+
+
+The pseudo code above depicts the basic structure of performing output. The ``ADIOS`` object is necessary to hold all
+other objects. It is initialized with an MPI communicator in a parallel program or without in a serial program.
+Additionally, a config file (XML or YAML format) can be specified here to load runtime configuration. Only one ADIOS
+object is needed throughout the entire application but you can create as many as you want (e.g. if you need to separate
+IO objects using the same name in a program that reads similar input from an ensemble of multiple applications).
+
+The ``IO`` object is required to hold the variable and attribute definitions, and runtime options for a particular input
+or output stream. The IO object has a name, which is used only to refer to runtime options in the configuration file.
+One IO object can only be used in one output or input stream. The only exception where an IO object can be used twice is
+one input stream plus one output stream where the output is reusing the variable definitions loaded during input.
+
+``Variable`` and ``Attribute`` definitions belong to one IO object, which means, they can only be used in one output.
+You need to define new ones for other outputs. Just because a Variable is defined, it will not appear in the output
+unless an associated Put() call provides the content.
+
+A stream is opened and closed once. The ``Engine`` object implements the data movement for the stream. It depends on the
+runtime options of the IO object that what type of an engine is created in the Open() call. One output step is denoted
+by a pair of BeginStep..EndStep block.
+
+An output step consist of variables and attributes. Variables are just definitions without content, so one must call a
+Put() function to provide the application data pointer that contains the data content one wants to write out. Attributes
+have their content in their definitions so there is no need for an extra call.
+
+Some rules:
+
+*   Variables can be defined any time, before the corresponding Put() call
+*   Attributes can be defined any time before EndStep
+*   The following functions must be treated as Collective operations
+
+  * ADIOS
+  * Open
+  * BeginStep
+  * EndStep
+  * Close
+
+.. note::
+
+    If there is only one output step, and we only want to write it to a file on disk, never stream it to other
+    application, then BeginStep and EndStep are not required but it does not make any difference if they are called.
+
+Anatomy of an ADIOS Input
+-------------------------
+
+.. code:: C++
+
+    ADIOS adios("config.xml", MPI_COMM_WORLD);
+    |
+    |   IO io = adios.DeclareIO(...);
+    |   |
+    |   |   Engine e = io.Open("InputFileName.bp", adios2::Mode::Read);
+    |   |   |
+    |   |   |   e.BeginStep()
+    |   |   |   |
+    |   |   |   |   varlist = io.AvailableVariables(...)
+    |   |   |   |   Variable var = io.InquireVariable(...)
+    |   |   |   |   Attribute attr = io.InquireAttribute(...)
+    |   |   |   |   |
+    |   |   |   |   |   e.Get(var, datapointer);
+    |   |   |   |   |
+    |   |   |   |
+    |   |   |   e.EndStep()
+    |   |   |
+    |   |   e.Close();
+    |   |
+    |   |--> IO goes out of scope
+    |
+    |--> ADIOS goes out of scope or adios2_finalize()
+
+The difference between input and output is that while we have to define the variables and attributes for an output, we
+have to retrieve the available variables in an input first as definitions (Variable and Attribute objects).
+
+If we know the particular variable (name and type) in the input stream, we can get the definition using
+InquireVariable(). Generic tools that process any input must use other functions to retrieve the list of variable names
+and their types first and then get the individual Variable objects. The same is true for Attributes.
+
+Anatomy of an ADIOS File-only Input
+-----------------------------------
+
+Previously we explored how to read using the input mode `adios2::Mode::Read`. Nonetheless, ADIOS has another input mode
+named `adios2::Mode::ReadRandomAccess`. `adios2::Mode::Read` mode allows data access only timestep by timestep using
+`BeginStep/EndStep`, but generally it is more memory efficient as ADIOS is only required to load metadata for the
+current timestep. `ReadRandomAccess` can only be used with file engines and involves loading all the file metadata at
+once. So it can be more memory intensive than `adios2::Mode::Read` mode, but allows reading data from any timestep using
+`SetStepSelection()`. If you use `adios2::Mode::ReadRandomAccess` mode, be sure to allocate enough memory to hold
+multiple steps of the variable content.
+
+.. code:: C++
+
+    ADIOS adios("config.xml", MPI_COMM_WORLD);
+    |
+    |   IO io = adios.DeclareIO(...);
+    |   |
+    |   |   Engine e = io.Open("InputFileName.bp", adios2::Mode::ReadRandomAccess);
+    |   |   |
+    |   |   |   Variable var = io.InquireVariable(...)
+    |   |   |   |   var.SetStepSelection()
+    |   |   |   |   e.Get(var, datapointer);
+    |   |   |   |
+    |   |   |
+    |   |   e.Close();
+    |   |
+    |   |--> IO goes out of scope
+    |
+    |--> ADIOS goes out of scope or adios2_finalize()
diff --git a/docs/user_guide/source/components/components.rst b/docs/user_guide/source/components/components.rst
index 97b4f0b277..791a149a0f 100644
--- a/docs/user_guide/source/components/components.rst
+++ b/docs/user_guide/source/components/components.rst
@@ -10,3 +10,4 @@ Interface Components
 .. include:: engine.rst
 .. include:: operator.rst
 .. include:: runtime.rst
+.. include:: anatomy.rst
diff --git a/docs/user_guide/source/components/engine.rst b/docs/user_guide/source/components/engine.rst
index 2dc0babf48..4a5377e3c9 100644
--- a/docs/user_guide/source/components/engine.rst
+++ b/docs/user_guide/source/components/engine.rst
@@ -2,6 +2,8 @@
 Engine
 ******
 
+.. _sec:basics_interface_components_engine:
+
 The Engine abstraction component serves as the base interface to the actual IO systems executing the heavy-load tasks performed when producing and consuming data.
 
 Engine functionality works around two concepts:
@@ -309,8 +311,8 @@ The ``data`` fed to the ``Put`` function is assumed to be allocated on the Host
    Only the BP4 and BP5 engines are capable of receiving device allocated buffers.
 
 
-PerformsPuts
-------------
+PerformPuts
+-----------
 
    Executes all pending ``Put`` calls in deferred mode and collects
    span data.  Specifically this call copies Put(Deferred) data into
@@ -322,8 +324,8 @@ PerformsPuts
    impact performance on some engines.
 
 
-PerformsDataWrite
-------------
+PerformDataWrite
+----------------
 
    If supported by the engine, moves data from prior ``Put`` calls to disk
 
@@ -361,13 +363,13 @@ The following table summarizes the memory contracts required by ADIOS2 engines b
 +----------+-------------+-----------------------------------------------+
 | Get      | Data Memory | Contract                                      |
 +----------+-------------+-----------------------------------------------+
-|          | Pointer     | do not modify until PerformPuts/EndStep/Close |
+|          | Pointer     | do not modify until PerformGets/EndStep/Close |
 | Deferred |             |                                               |
-|          | Contents    | populated at Put or PerformPuts/EndStep/Close |
+|          | Contents    | populated at Get or PerformGets/EndStep/Close |
 +----------+-------------+-----------------------------------------------+
-|          | Pointer     | modify after Put                              |
+|          | Pointer     | modify after Get                              |
 | Sync     |             |                                               |
-|          | Contents    | populated at Put                              |
+|          | Contents    | populated at Get                              |
 +----------+-------------+-----------------------------------------------+
 
 
@@ -452,8 +454,8 @@ Only use it if absolutely necessary (*e.g.* memory bound application or out of s
    ``Get`` doesn't support returning spans.
 
 
-PerformsGets
-------------
+PerformGets
+-----------
 
    Executes all pending ``Get`` calls in deferred mode.
 
diff --git a/docs/user_guide/source/components/operator.rst b/docs/user_guide/source/components/operator.rst
index 04f30bc686..d1fce6591a 100644
--- a/docs/user_guide/source/components/operator.rst
+++ b/docs/user_guide/source/components/operator.rst
@@ -2,6 +2,8 @@
 Operator
 ********
 
+.. _sec:basics_interface_components_operator:
+
 The Operator abstraction allows ADIOS2 to act upon the user application data, either from a ``adios2::Variable`` or a set of Variables in an ``adios2::IO`` object.
 Current supported operations are:
 
diff --git a/docs/user_guide/source/components/variable.rst b/docs/user_guide/source/components/variable.rst
index c3a27545d8..69adbc884d 100644
--- a/docs/user_guide/source/components/variable.rst
+++ b/docs/user_guide/source/components/variable.rst
@@ -6,15 +6,17 @@ An ``adios2::Variable`` is the link between a piece of data coming from an appli
 This component handles all application variables classified by data type and shape.
 
 Each ``IO`` holds a set of Variables, and each ``Variable`` is identified with a unique name.
-They are created using the reference from ``IO::DefineVariable<T>`` or retrieved using the pointer from ``IO::InquireVariable<T>`` functions in :ref:`IO`.
+They are created using the reference from ``IO::DefineVariable<T>`` or retrieved using the pointer from
+``IO::InquireVariable<T>`` functions in :ref:`IO`.
 
 Data Types
---------------------
+----------
 
 Only primitive types are supported in ADIOS2.
-Fixed-width types from `<cinttypes> and <cstdint> <https://en.cppreference.com/w/cpp/types/integer>`_  should be preferred when writing portable code.
-ADIOS2 maps primitive types to equivalent fixed-width types (e.g. ``int`` -> ``int32_t``).
-In C++, acceptable types ``T`` in ``Variable<T>`` along with their preferred fix-width equivalent in 64-bit platforms are given below:
+Fixed-width types from `<cinttypes> and <cstdint> <https://en.cppreference.com/w/cpp/types/integer>`_  should be
+preferred when writing portable code. ADIOS2 maps primitive types to equivalent fixed-width types
+(e.g. ``int`` -> ``int32_t``). In C++, acceptable types ``T`` in ``Variable<T>`` along with their preferred fix-width
+equivalent in 64-bit platforms are given below:
 
 .. code-block:: c++
 
@@ -52,19 +54,19 @@ In C++, acceptable types ``T`` in ``Variable<T>`` along with their preferred fix
    Python APIs: use the equivalent fixed-width types from numpy.
    If ``dtype`` is not specified, ADIOS2 handles numpy defaults just fine as long as primitive types are passed.
 
-
 Shapes
----------------------
+------
 
 ADIOS2 is designed for MPI applications.
 Thus different application data shapes must be supported depending on their scope within a particular MPI communicator.
-The shape is defined at creation from the ``IO`` object by providing the dimensions: shape, start, count in the ``IO::DefineVariable<T>``.
-The supported shapes are described below.
+The shape is defined at creation from the ``IO`` object by providing the dimensions: shape, start, count in the
+``IO::DefineVariable<T>``. The supported shapes are described below.
 
 
 1. **Global Single Value**:
 Only a name is required for their definition.
-These variables are helpful for storing global information, preferably managed by only one MPI process, that may or may not change over steps: *e.g.* total number of particles, collective norm, number of nodes/cells, etc.
+These variables are helpful for storing global information, preferably managed by only one MPI process, that may or may
+not change over steps: *e.g.* total number of particles, collective norm, number of nodes/cells, etc.
 
    .. code-block:: c++
 
@@ -142,9 +144,10 @@ Dims value.
 For example, the definition below defines a 2-D Joined array where the
 first dimension is the one along which blocks will be joined and the
 2nd dimension is 5.  Here this rank is contributing two rows to this array.
-   .. code-block:: c++
-    auto var = outIO.DefineVariable<double>("table", {adios2::JoinedDim, 5},
-                                            {}, {2, 5});
+
+.. code-block:: c++
+
+  auto var = outIO.DefineVariable<double>("table", {adios2::JoinedDim, 5}, {}, {2, 5});
 
 If each of N writer ranks were to declare a variable like this and do
 a single Put() in a timestep, the reader-side GlobalArray would have
@@ -152,14 +155,84 @@ shape {2*N, 5} and all normal reader-side GlobalArray operations would
 be applicable to it.  
 
 
-
-
 .. note::
 
    JoinedArrays are currently only supported by BP4 and BP5 engines,
    as well as the SST engine with BP5 marshalling.
-   
 
+Global Array Capabilities and Limitations
+-----------------------------------------
+
+ADIOS2 is focusing on writing and reading N-dimensional, distributed, global arrays of primitive types. The basic idea
+is that, usually, a simulation has such a data structure in memory (distributed across multiple processes) and wants to
+dump its content regularly as it progresses. ADIOS2 was designed to:
+
+1. to do this writing and reading as fast as possible
+2. to enable reading any subsection of the array
+
+.. image:: https://imgur.com/6nX67yq.png
+   :width: 400
+
+The figure above shows a parallel application of 12 processes producing a 2D array. Each process has a 2D array locally
+and the output is created by placing them into a 4x3 pattern. A reading application's individual process then can read
+any subsection of the entire global array. In the figure, a 6 process application decomposes the array in a 3x2 pattern
+and each process reads a 2D array whose content comes from multiple producer processes.
+
+The figure hopefully helps to understand the basic concept but it can be also misleading if it suggests limitations that
+are not there. Global Array is simply a boundary in N-dimensional space where processes can place their blocks of data.
+In the global space:
+
+1. one process can place multiple blocks
+
+  .. image:: https://imgur.com/Pb1s03h.png
+     :width: 400
+
+2. does NOT need to be fully covered by the blocks
+
+  .. image:: https://imgur.com/qJBXYcQ.png
+     :width: 400
 
+  * at reading, unfilled positions will not change the allocated memory
 
+3. blocks can overlap
+
+  .. image:: https://imgur.com/GA59lZ2.png
+     :width: 300
+
+  * the reader will get values in an overlapping position from one of the block but there is no control over from which
+    block
+
+4. each process can put a different size of block, or put multiple blocks of different sizes
+
+5. some process may not contribute anything to the global array
+
+Over multiple output steps
+
+1. the processes CAN change the size (and number) of blocks in the array
+
+  * E.g. atom table: global size is fixed but atoms wander around processes, so their block size is changing
+
+    .. image:: https://imgur.com/DorjG2q.png
+       :width: 400
+
+2. the global dimensions CAN change over output steps
+
+  * but then you cannot read multiple steps at once
+  * E.g. particle table size changes due to particles disappearing or appearing
+
+    .. image:: https://imgur.com/nkuHeVX.png
+       :width: 400
+
+
+Limitations of the ADIOS global array concept
+
+1. Indexing starts from 0
+2. Cyclic data patterns are not supported; only blocks can be written or read
+3. If Some blocks may fully or partially fall outside of the global boundary, the reader will not be able to read those
+   parts
+
+.. note::
 
+   Technically, the content of the individual blocks is kept in the BP format (but not in HDF5 format) and in staging.
+   If you really, really want to retrieve all the blocks, you need to handle this array as a Local Array and read the
+   blocks one by one.
diff --git a/docs/user_guide/source/ecosystem/h5vol/vol.rst b/docs/user_guide/source/ecosystem/h5vol/vol.rst
index 6315e1dc62..c8f7b87026 100644
--- a/docs/user_guide/source/ecosystem/h5vol/vol.rst
+++ b/docs/user_guide/source/ecosystem/h5vol/vol.rst
@@ -1,11 +1,12 @@
-***********
+**********
 Disclaimer
-***********
+**********
+
 .. note::
 
-The Virtual Object Layer (VOL) is a feature introduced in recent release of  HDF5 1.12 (https://hdf5.wiki/index.php/New_Features_in_HDF5_Release_1.12). 
+   The Virtual Object Layer (VOL) is a feature introduced in recent release of  HDF5 1.12 (https://hdf5.wiki/index.php/New_Features_in_HDF5_Release_1.12).
 
-So please do make sure your HDF5 version supports the latest VOL. 
+   So please do make sure your HDF5 version supports the latest VOL.
 
 Once the ADIOS VOL is compiled, There are two ways to apply it: 
 
@@ -20,8 +21,8 @@ External
 
 .. code-block:: c++
 
-  HDF5_VOL_CONNECTOR=ADIOS2_VOL
-  HDF5_PLUGIN_PATH=/replace/with/your/adios2_vol/lib/path/
+   HDF5_VOL_CONNECTOR=ADIOS2_VOL
+   HDF5_PLUGIN_PATH=/replace/with/your/adios2_vol/lib/path/
 
 
 Without code change, ADIOS2 VOL will be loaded at runtime by HDF5, to access ADIOS files without changing user code.
@@ -36,26 +37,22 @@ Internal
 
 .. code-block:: c++
 
-     // other includes 
-     #include <adios2/h5vol/H5Vol.h> // ADD THIS LINE TO INCLUDE ADIOS VOL
-     
-     hid_t  pid = H5Pcreate(H5P_FILE_ACCESS);
-     // other declarations
-     hid_t fid = H5Fopen(filename, mode, pid);
-
-     H5VL_ADIOS2_set(pid); // ADD THIS LINE TO USE ADIOS VOL
+   // other includes
+   #include <adios2/h5vol/H5Vol.h> // ADD THIS LINE TO INCLUDE ADIOS VOL
 
-     H5Fclose(fid);
+   hid_t  pid = H5Pcreate(H5P_FILE_ACCESS);
+   // other declarations
+   hid_t fid = H5Fopen(filename, mode, pid);
 
-     H5VL_ADIOS2_unset();  // ADD THIS LINE TO EXIT ADIOS VOL
+   H5VL_ADIOS2_set(pid); // ADD THIS LINE TO USE ADIOS VOL
 
+   H5Fclose(fid);
 
+   H5VL_ADIOS2_unset();  // ADD THIS LINE TO EXIT ADIOS VOL
 
 ..  To choose what ADIOS2 Engine to use, set env variable: ADIOS2_ENGINE (default is BP5)
 
 
-
-
 **Note:** The following features are not supported in this VOL:
 
   * hyperslab support
diff --git a/docs/user_guide/source/engines/bp3.rst b/docs/user_guide/source/engines/bp3.rst
index 4f46c7b44e..b9a18a71bd 100644
--- a/docs/user_guide/source/engines/bp3.rst
+++ b/docs/user_guide/source/engines/bp3.rst
@@ -1,6 +1,6 @@
-****
+***
 BP3 
-****
+***
 
 The BP3 Engine writes and reads files in ADIOS2 native binary-pack (bp) format. BP files are backwards compatible with ADIOS1.x and have the following structure given a "name" string passed as the first argument of ``IO::Open``:
 
diff --git a/docs/user_guide/source/engines/dataspaces.rst b/docs/user_guide/source/engines/dataspaces.rst
index a9627a833b..6f88a55704 100644
--- a/docs/user_guide/source/engines/dataspaces.rst
+++ b/docs/user_guide/source/engines/dataspaces.rst
@@ -1,6 +1,6 @@
-*********************************
+**********
 DataSpaces
-*********************************
+**********
 
 The DataSpaces engine for ADIOS2 is experimental. DataSpaces is an asynchronous I/O transfer method within ADIOS that enables 
 low-overhead, high-throughput data extraction from a running simulation. 
diff --git a/docs/user_guide/source/engines/engines.rst b/docs/user_guide/source/engines/engines.rst
index a6591322c0..948143453b 100644
--- a/docs/user_guide/source/engines/engines.rst
+++ b/docs/user_guide/source/engines/engines.rst
@@ -23,6 +23,7 @@ Parameters are passed at:
 .. include:: sst.rst
 .. include:: ssc.rst
 .. include:: dataman.rst
+.. include:: dataspaces.rst
 .. include:: inline.rst
 .. include:: null.rst
 .. include:: plugin.rst
diff --git a/docs/user_guide/source/engines/hdf5.rst b/docs/user_guide/source/engines/hdf5.rst
index c785c32e8f..a3ece47766 100644
--- a/docs/user_guide/source/engines/hdf5.rst
+++ b/docs/user_guide/source/engines/hdf5.rst
@@ -9,9 +9,9 @@ or, set it in client code. For example, here is how to create a hdf5 reader:
 
 .. code-block:: c++
 
- adios2::IO h5IO = adios.DeclareIO("SomeName");
- h5IO.SetEngine("HDF5");
- adios2::Engine h5Reader = h5IO.Open(filename, adios2::Mode::Read);
+  adios2::IO h5IO = adios.DeclareIO("SomeName");
+  h5IO.SetEngine("HDF5");
+  adios2::Engine h5Reader = h5IO.Open(filename, adios2::Mode::Read);
 
 To read back the h5 files generated with VDS to ADIOS2, one can use the HDF5 engine. Please make sure you are using the HDF5 library that has version greater than or equal to 1.11 in ADIOS2.
 
@@ -33,10 +33,11 @@ After the subfile feature is introduced  in HDF5 version 1.14, the ADIOS2 HDF5 e
 
 To use the subfile feature, client needs to support MPI_Init_thread with MPI_THREAD_MULTIPLE. 
 
-Useful parameters from the  HDF lirbary to tune subfiles are:
+Useful parameters from the HDF library to tune subfiles are:
+
 .. code-block:: xml
 		
-H5FD_SUBFILING_IOC_PER_NODE (num of subfiles per node)
-    set H5FD_SUBFILING_IOC_PER_NODE to 0 if the regular h5 file is prefered, before using ADIOS2 HDF5 engine. 
-H5FD_SUBFILING_STRIPE_SIZE
-H5FD_IOC_THREAD_POOL_SIZE
+  H5FD_SUBFILING_IOC_PER_NODE (num of subfiles per node)
+    set H5FD_SUBFILING_IOC_PER_NODE to 0 if the regular h5 file is preferred, before using ADIOS2 HDF5 engine.
+  H5FD_SUBFILING_STRIPE_SIZE
+  H5FD_IOC_THREAD_POOL_SIZE
diff --git a/docs/user_guide/source/engines/sst.rst b/docs/user_guide/source/engines/sst.rst
index 61a2e0aa87..83adf8d85e 100644
--- a/docs/user_guide/source/engines/sst.rst
+++ b/docs/user_guide/source/engines/sst.rst
@@ -280,24 +280,24 @@ single reader, but only upon request (with a request being initiated
 by the reader doing BeginStep()).  Normal reader-side rules (like
 BeginStep timeouts) and writer-side rules (like queue limit behavior) apply.
 
-============================= ===================== ====================================================
- **Key**                        **Value Format**      **Default** and Examples
-============================= ===================== ====================================================
- RendezvousReaderCount           integer             **1**
- RegistrationMethod              string              **File**, Screen
- QueueLimit                      integer             **0** (no queue limits)
- QueueFullPolicy                 string              **Block**, Discard
- ReserveQueueLimit               integer             **0** (no queue limits)
- DataTransport                   string              **default varies by platform**, UCX, MPI, RDMA, WAN
- WANDataTransport                string              **sockets**, enet, ib
- ControlTransport                string              **TCP**, Scalable
- MarshalMethod                   string              **BP5**, BP, FFS
- NetworkInterface                string              **NULL**
- ControlInterface                string              **NULL**
- DataInterface                   string              **NULL**
- FirstTimestepPrecious           boolean             **FALSE**, true, no, yes
- AlwaysProvideLatestTimestep     boolean             **FALSE**, true, no, yes
- OpenTimeoutSecs                 integer             **60**
- SpeculativePreloadMode          string              **AUTO**, ON, OFF
- SpecAutoNodeThreshold           integer             **1**
-============================= ===================== =====================================================
++-----------------------------+---------------------+----------------------------------------------------+
+| **Key**                     | **Value Format**    | **Default** and Examples                           |
++-----------------------------+---------------------+----------------------------------------------------+
+| RendezvousReaderCount       | integer             | **1**                                              |
+| RegistrationMethod          | string              | **File**, Screen                                   |
+| QueueLimit                  | integer             | **0** (no queue limits)                            |
+| QueueFullPolicy             | string              | **Block**, Discard                                 |
+| ReserveQueueLimit           | integer             | **0** (no queue limits)                            |
+| DataTransport               | string              | **default varies by platform**, UCX, MPI, RDMA, WAN|
+| WANDataTransport            | string              | **sockets**, enet, ib                              |
+| ControlTransport            | string              | **TCP**, Scalable                                  |
+| MarshalMethod               | string              | **BP5**, BP, FFS                                   |
+| NetworkInterface            | string              | **NULL**                                           |
+| ControlInterface            | string              | **NULL**                                           |
+| DataInterface               | string              | **NULL**                                           |
+| FirstTimestepPrecious       | boolean             | **FALSE**, true, no, yes                           |
+| AlwaysProvideLatestTimestep | boolean             | **FALSE**, true, no, yes                           |
+| OpenTimeoutSecs             | integer             | **60**                                             |
+| SpeculativePreloadMode      | string              | **AUTO**, ON, OFF                                  |
+| SpecAutoNodeThreshold       | integer             | **1**                                              |
++-----------------------------+---------------------+----------------------------------------------------+
diff --git a/docs/user_guide/source/index.rst b/docs/user_guide/source/index.rst
index 6fd178f524..c976e6826d 100644
--- a/docs/user_guide/source/index.rst
+++ b/docs/user_guide/source/index.rst
@@ -37,9 +37,17 @@ Funded by the `Exascale Computing Project (ECP) <https://www.exascaleproject.org
    advanced/aggregation
    advanced/memory_management
    advanced/gpu_aware
+   advanced/query
    advanced/plugins
    advanced/ecp_hardware
-   
+
+.. toctree::
+   :caption: Tutorials
+
+   tutorials/overview
+   tutorials/downloadAndBuild
+   tutorials/basicTutorials
+
 .. toctree::
    :caption: Ecosystem Tools
 
diff --git a/docs/user_guide/source/operators/operators.rst b/docs/user_guide/source/operators/operators.rst
index 1543bd009c..0deb69d833 100644
--- a/docs/user_guide/source/operators/operators.rst
+++ b/docs/user_guide/source/operators/operators.rst
@@ -8,7 +8,7 @@ object.  Current supported operations are:
 
 1. Data compression/decompression, lossy and lossless.
 
-This section provides a description of the :ref:`Available Operators` in ADIOS2
+This section provides a description of the supported operators in ADIOS2
 and their specific parameters to allow extra-control from the user. Parameters
 are passed in key-value pairs for:
 
diff --git a/docs/user_guide/source/setting_up/source/cmake.rst b/docs/user_guide/source/setting_up/source/cmake.rst
index 0785c00c07..0fe2210f45 100644
--- a/docs/user_guide/source/setting_up/source/cmake.rst
+++ b/docs/user_guide/source/setting_up/source/cmake.rst
@@ -76,6 +76,8 @@ And finally, use the standard invocation to install:
 CMake Options
 *************
 
+.. _sec:source_cmake_options:
+
 The following options can be specified with CMake's ``-DVAR=VALUE`` syntax. The default option is highlighted.
 
 ============================= ================ ==========================================================================================================================================================================================================================
diff --git a/docs/user_guide/source/tutorials/attributes.rst b/docs/user_guide/source/tutorials/attributes.rst
new file mode 100644
index 0000000000..802ca924dd
--- /dev/null
+++ b/docs/user_guide/source/tutorials/attributes.rst
@@ -0,0 +1,230 @@
+Attributes
+==========
+
+.. _sec:tutorials_basics_attributes:
+
+In the previous tutorial, we learned how to write/read variables.
+
+In this tutorial, we will explore how to write/read attributes. Attributes are metadata related to the whole dataset or
+to a specific variable. In this tutorial, we will only focus on attributes related to the whole dataset, but we will
+explain how variable's attributes can be used too.
+
+Start editing the skeleton file `ADIOS2/examples/hello/bpAttributeWriteRead/bpAttributeWriteRead_tutorialSkeleton.cpp <https://github.com/ornladios/ADIOS2/blob/master/examples/hello/bpAttributeWriteRead/bpAttributeWriteRead_tutorialSkeleton.cpp>`_.
+
+1. In an MPI application first we need to always initialize MPI. We do that with the following lines:
+
+.. code-block:: cpp
+
+   int rank, size;
+   int provided;
+
+   // MPI_THREAD_MULTIPLE is only required if you enable the SST MPI_DP
+   MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
+   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+   MPI_Comm_size(MPI_COMM_WORLD, &size);
+
+2.  Now we need to create a application variable which will be used to define an ADIOS2 variable.
+
+.. code-block:: cpp
+
+   std::vector<float> myFloats = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+
+3. Then, we need to create an ADIOS2 instance.
+
+.. code-block:: cpp
+
+   adios2::ADIOS adios(MPI_COMM_WORLD);
+
+4. Then, we create the following writer function:
+
+.. code-block:: cpp
+
+   void writer(adios2::ADIOS &adios, int rank, int size, std::vector<float> &myFloats)
+   {
+      ...
+   }
+
+5. In this writer function, we define an IO object, and a float vector variable as follows:
+
+.. code-block:: cpp
+
+   adios2::IO bpIO = adios.DeclareIO("BPFile_N2N");
+
+   const std::size_t Nx = myFloats.size();
+   adios2::Variable<float> bpFloats = bpIO.DefineVariable<float>(
+     "bpFloats", {size * Nx}, {rank * Nx}, {Nx}, adios2::ConstantDims);
+
+6. Now, we will define various types of attributes as follows:
+
+.. code-block:: cpp
+
+   bpIO.DefineAttribute<std::string>("Single_String", "File generated with ADIOS2");
+
+   std::vector<std::string> myStrings = {"one", "two", "three"};
+   bpIO.DefineAttribute<std::string>("Array_of_Strings", myStrings.data(), myStrings.size());
+
+   bpIO.DefineAttribute<double>("Attr_Double", 0.f);
+   std::vector<double> myDoubles = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+   bpIO.DefineAttribute<double>("Array_of_Doubles", myDoubles.data(), myDoubles.size());
+
+.. note::
+
+   if we want to define an attribute for a specific variable, we can use one of the following API:
+
+   .. code-block:: cpp
+
+      template <class T>
+      Attribute<T> DefineAttribute(const std::string &name, const T *data, const size_t size,
+                                   const std::string &variableName = "", const std::string separator = "/",
+                                   const bool allowModification = false);
+
+      template <class T>
+      Attribute<T> DefineAttribute(const std::string &name, const T &value,
+                                   const std::string &variableName = "", const std::string separator = "/",
+                                   const bool allowModification = false);
+
+   As we can see, by default the attributes don't change over multiple steps, but we can change that by setting
+   ``allowModification`` to ``true``.
+
+7. Then, we open a file for writing:
+
+.. code-block:: cpp
+
+   adios2::Engine bpWriter = bpIO.Open("fileAttributes.bp", adios2::Mode::Write);
+
+8. Now, we write the data and close the file:
+
+.. code-block:: cpp
+
+   bpWriter.BeginStep();
+   bpWriter.Put<float>(bpFloats, myFloats.data());
+   bpWriter.EndStep();
+   bpWriter.Close();
+
+9. Steps 1-8 are used for writing, we will define a reader function in the rest of the steps:
+
+.. code-block:: cpp
+
+   void reader(adios2::ADIOS &adios, int rank, int size)
+   {
+      ...
+   }
+
+10. In this reader function, we define an IO object, and open the file for reading:
+
+.. code-block:: cpp
+
+   adios2::IO bpIO = adios.DeclareIO("BPFile_N2N");
+   adios2::Engine bpReader = bpIO.Open("fileAttributes.bp", adios2::Mode::Read);
+
+11. Now, we check the available attributes as follows:
+
+.. code-block:: cpp
+
+   bpReader.BeginStep();
+   const auto attributesInfo = bpIO.AvailableAttributes();
+
+   for (const auto &attributeInfoPair : attributesInfo)
+   {
+     std::cout << "Attribute: " << attributeInfoPair.first;
+     for (const auto &attributePair : attributeInfoPair.second)
+     {
+         std::cout << "\tKey: " << attributePair.first << "\tValue: " << attributePair.second
+                   << "\n";
+     }
+     std::cout << "\n";
+   }
+
+12. Now we will inquire and get the attributes as follows:
+
+.. code-block:: cpp
+
+    adios2::Attribute<float> singleString = bpIO.InquireAttribute<float>("Single_String");
+    if (singleString)
+    {
+        std::cout << singleString.Name() << ": " << singleString.Data()[0] << "\n";
+    }
+    adios2::Attribute<std::string> arrayOfStrings =
+        bpIO.InquireAttribute<std::string>("Array_of_Strings");
+    if (arrayOfStrings)
+    {
+        std::cout << arrayOfStrings.Name() << ": ";
+        for (const auto &value : arrayOfStrings.Data())
+        {
+            std::cout << value << " ";
+        }
+        std::cout << "\n";
+    }
+    adios2::Attribute<double> attrDouble = bpIO.InquireAttribute<double>("Attr_Double");
+    if (attrDouble)
+    {
+        std::cout << attrDouble.Name() << ": " << attrDouble.Data()[0] << "\n";
+    }
+    adios2::Attribute<double> arrayOfDoubles = bpIO.InquireAttribute<double>("Array_of_Doubles");
+    if (arrayOfDoubles)
+    {
+        std::cout << arrayOfDoubles.Name() << ": ";
+        for (const auto &value : arrayOfDoubles.Data())
+        {
+            std::cout << value << " ";
+        }
+        std::cout << "\n";
+    }
+
+13. Afterward, we will inquire and get the variable as follows:
+
+.. code-block:: cpp
+
+    adios2::Variable<float> bpFloats = bpIO.InquireVariable<float>("bpFloats");
+    const std::size_t Nx = 10;
+    std::vector<float> myFloats(Nx);
+    if (bpFloats)
+    {
+        bpFloats.SetSelection({{Nx * rank}, {Nx}});
+        bpReader.Get(bpFloats, myFloats.data());
+    }
+    bpReader.EndStep();
+
+14. Finally, we close the file:
+
+.. code-block:: cpp
+
+   bpReader.Close();
+
+15. In the main function, we call the writer and reader functions as follows:
+
+.. code-block:: cpp
+
+   writer(adios, rank, size, myFloats);
+   reader(adios, rank, size);
+
+16. Finally, we finalize MPI:
+
+.. code-block:: cpp
+
+   MPI_Finalize();
+
+17. The final code should look as follows (excluding try/catch and optional usage MPI), and it was derived from the
+    example `ADIOS2/examples/hello/bpAttributeWriteRead/bpAttributeWriteRead.cpp <https://github.com/ornladios/ADIOS2/blob/master/examples/hello/bpAttributeWriteRead/bpAttributeWriteRead.cpp>`_.
+
+.. literalinclude:: ../../../../examples/hello/bpAttributeWriteRead/bpAttributeWriteRead.cpp
+   :language: cpp
+
+18. You can compile and run it as follows:
+
+.. code-block:: bash
+
+   cd Path-To-ADIOS2/examples/hello/bpAttributeWriteRead
+   mkdir build
+   cd build
+   cmake -DADIOS2_DIR=Path-To-ADIOS2/build/ ..
+   cmake --build .
+   mpirun -np 2 ./adios2_hello_bpAttributeWriteRead_mpi
+
+19. You can check the content of the output file "fileAttributes.bp" using *bpls* as follows:
+
+.. code-block:: bash
+
+   Path-To-ADIOS2/build/bin/bpls ./fileAttributes.bp
+
+     float    bpFloats  {20}
diff --git a/docs/user_guide/source/tutorials/basicTutorials.rst b/docs/user_guide/source/tutorials/basicTutorials.rst
new file mode 100644
index 0000000000..c1c7a44de2
--- /dev/null
+++ b/docs/user_guide/source/tutorials/basicTutorials.rst
@@ -0,0 +1,10 @@
+Basic Tutorials
+===============
+
+.. toctree::
+
+   helloWorld
+   variables
+   attributes
+   operators
+   steps
diff --git a/docs/user_guide/source/tutorials/downloadAndBuild.rst b/docs/user_guide/source/tutorials/downloadAndBuild.rst
new file mode 100644
index 0000000000..637955e069
--- /dev/null
+++ b/docs/user_guide/source/tutorials/downloadAndBuild.rst
@@ -0,0 +1,42 @@
+Download And Build
+==================
+
+.. _sec:tutorials_download_and_build:
+
+First, you need to clone the ADIOS2 repository. You can do this by running the following command:
+
+.. code-block:: bash
+
+  git clone https://github.com/ornladios/ADIOS2.git ADIOS2
+
+.. note::
+
+   ADIOS2 uses `CMake <https://cmake.org/>`_ for building, testing and installing the library and utilities.
+   So you need to have CMake installed on your system.
+
+Then, create a build directory, run CMake, and build ADIOS2:
+
+.. code-block:: bash
+
+  cd ADIOS2
+  mkdir build
+  cd build
+  cmake -DADIOS2_USE_MPI=ON ..
+  cmake --build .
+
+.. note::
+
+  If you want to know more about the ADIOS2's CMake options, see section
+  :ref:`CMake Options <sec:source_cmake_options>`.
+
+All the tutorials that we will explore are existing ADIOS2 examples located in the ``ADIOS2/examples`` directory.
+
+To build any of the examples, e.g. the ``helloWorld`` example, you can run the following commands:
+
+.. code-block:: bash
+
+  cd Path-To-ADIOS2/examples/hello/helloWorld
+  mkdir build
+  cd build
+  cmake -DADIOS2_DIR=Path-To-ADIOS2/build/ ..
+  cmake --build .
diff --git a/docs/user_guide/source/tutorials/helloWorld.rst b/docs/user_guide/source/tutorials/helloWorld.rst
new file mode 100644
index 0000000000..a0e424a097
--- /dev/null
+++ b/docs/user_guide/source/tutorials/helloWorld.rst
@@ -0,0 +1,143 @@
+Hello World
+===========
+
+.. _sec:tutorials_basics_hello_world:
+
+Like in any language, the first program you write is the "Hello World" program.
+
+In this tutorial, we will see how to write "Hello World from ADIOS2" and read it back with ADIOS2.
+So let's dig in!
+
+Start editing the skeleton file `ADIOS2/examples/hello/helloWorld/hello-world_tutorialSkeleton.cpp <https://github.com/ornladios/ADIOS2/blob/master/examples/hello/helloWorld/hello-world_tutorialSkeleton.cpp>`_.
+
+
+1. We create an ADIOS instance, and define the greeting message in our main function as follows:
+
+.. code-block:: c++
+
+  int main()
+  {
+    adios2::ADIOS adios();
+    const std::string greeting("Hello World from ADIOS2");
+    ...
+    return 0;
+  }
+
+2. Then we create a writer function in which we pass the adios instance, and the greeting message as follows:
+
+.. code-block:: c++
+
+  void writer(adios2::ADIOS& adios, const std::string& greeting)
+  {
+    ...
+  }
+
+3. In this writer function, we define an IO object, a string variable for the message as follows:
+
+.. code-block:: c++
+
+  adios2::IO io = adios.DeclareIO("hello-world-writer");
+  adios2::Variable<std::string> varGreeting = io.DefineVariable<std::string>("Greeting");
+
+.. note::
+
+  Using the IO object, we can define the engine type that we want to utilize using the *io.SetEngine()* function.
+  If *SetEngine()* is not used, the default engine type is *BPFile* which is an alias for the latest version of the BP
+  engine of the ADIOS2 library. See :ref:`Available Engines` and :ref:`Supported Engines` for more information.
+  It's important to note that the file extension of an output file, although it's not a good practice, it can differ
+  from the engine type, e.g. write a foo.h5 file with the BPFile engine. When reading foo.h5 you should explicitly
+  specify the engine type as BPFile to read it properly.
+
+4. Then we open a file with the name "hello-world-cpp.bp" and write the greeting message to it as follows:
+
+.. code-block:: c++
+
+  adios2::Engine writer = io.Open("hello-world-cpp.bp", adios2::Mode::Write);
+  writer.BeginStep();
+  writer.Put(varGreeting, greeting);
+  writer.EndStep();
+  writer.Close();
+
+.. note::
+
+  The ``BeginStep`` and ``EndStep`` calls are optional when **writing** one step, but they are required
+  for multiple steps, so it is a good practice to always use them.
+
+5. Now we create a reader function in which we pass the adios instance, and get the greeting message back as follows:
+
+.. code-block:: c++
+
+  std::string reader(adios2::ADIOS& adios)
+  {
+    ...
+  }
+
+6. In this reader function, we define an IO object and inquire a string variable for the message as follows:
+
+.. code-block:: c++
+
+  adios2::IO io = adios.DeclareIO("hello-world-reader");
+  reader.BeginStep();
+  adios2::Variable<std::string> varGreeting = io.InquireVariable<std::string>("Greeting");
+
+7. Then we open the file with the name "hello-world-cpp.bp", read the greeting message from it and return it as follows:
+
+.. code-block:: c++
+
+  adios2::Engine reader = io.Open("hello-world-cpp.bp", adios2::Mode::Read);
+  std::string greeting;
+  reader.Get(varGreeting, greeting);
+  reader.EndStep();
+  reader.Close();
+  return greeting;
+
+.. note::
+
+  The ``BeginStep`` and ``EndStep`` calls are required when **reading** one step and multiple steps. We will see in
+  another tutorial how to read multiple steps. It's important to note that the ``BeginStep`` should be called **before**
+  all ``Inquire*`` / ``Available*`` function calls.
+
+8. Finally, we call the writer and reader functions in our main function as follows:
+
+.. code-block:: c++
+
+  int main()
+  {
+    adios2::ADIOS adios();
+    const std::string greeting("Hello World from ADIOS2");
+    writer(adios, greeting);
+    std::string message = reader(adios);
+    std::cout << message << std::endl;
+    return 0;
+  }
+
+9. The final code should look as follows (excluding try/catch and the optional usage of MPI), and it was derived from
+   the example `ADIOS2/examples/hello/helloWorld/hello-world.cpp <https://github.com/ornladios/ADIOS2/blob/master/examples/hello/helloWorld/hello-world.cpp>`_.
+
+.. literalinclude:: ../../../../examples/hello/helloWorld/hello-world.cpp
+   :language: cpp
+
+10. You can compile and run it as follows:
+
+.. code-block:: bash
+
+   cd Path-To-ADIOS2/examples/hello/helloWorld
+   mkdir build
+   cd build
+   cmake -DADIOS2_DIR=Path-To-ADIOS2/build/ ..
+   cmake --build .
+   ./adios2_hello_helloWorld
+
+11. You can check the content of the output file "hello-world-cpp.bp" using *bpls* as follows:
+
+.. code-block:: bash
+
+   Path-To-ADIOS2/build/bin/bpls ./hello-world-cpp.bp
+
+      string   Greeting  scalar
+
+12. The Python version of this tutorial can be found at `ADIOS2/examples/hello/helloWorld/hello-world.py <https://github.com/ornladios/ADIOS2/blob/master/examples/hello/helloWorld/hello-world.py>`_.
+    and it looks as follows:
+
+.. literalinclude:: ../../../../examples/hello/helloWorld/hello-world.py
+   :language: python
diff --git a/docs/user_guide/source/tutorials/operators.rst b/docs/user_guide/source/tutorials/operators.rst
new file mode 100644
index 0000000000..8fa400fb53
--- /dev/null
+++ b/docs/user_guide/source/tutorials/operators.rst
@@ -0,0 +1,156 @@
+Operators
+=========
+
+.. _sec:tutorials_basics_operators:
+
+In the previous tutorial we learned how to write and read attributes.
+
+For this example to work, you would need to have the SZ compression library installed, which ADIOS automatically detects.
+The easiest way to install SZ is with Spack, and you can do that as follows:
+
+.. code-block:: bash
+
+   git clone https://github.com/spack/spack.git ~/spack
+   cd ~/spack
+   . share/spack/setup-env.sh
+   spack install sz
+   spack load sz
+
+In this tutorial we will learn how to use operators. Operators are used for Data compression/decompression, lossy and
+lossless. They act upon the user application data, either from a variable or a set of variables in a IO object.
+
+Additionally, we will explore how to simply write variables across multiple steps.
+
+So, let's dig in!
+
+Start editing the skeleton file `ADIOS2/examples/hello/bpOperatorSZWriter/bpOperatorSZWriter_tutorialSkeleton.cpp <https://github.com/ornladios/ADIOS2/blob/master/examples/hello/bpOperatorSZWriter/bpOperatorSZWriter_tutorialSkeleton.cpp>`_.
+
+1. In an MPI application first we need to always initialize MPI. We do that with the following lines:
+
+.. code-block:: cpp
+
+
+   int rank, size;
+   int rank, size;
+   int provided;
+
+   // MPI_THREAD_MULTIPLE is only required if you enable the SST MPI_DP
+   MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
+   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+   MPI_Comm_size(MPI_COMM_WORLD, &size);
+
+2. This application has command line arguments for the size of the data, and the compression accuracy,
+   which we can read as follows:
+
+.. code-block:: cpp
+
+   const std::size_t Nx = static_cast<std::size_t>(std::stoull(argv[1]));
+   const double accuracy = std::stod(argv[2]);
+
+3. Now we need to create some application variables which will be used to define ADIOS2 variables.
+
+.. code-block:: cpp
+
+   std::vector<float> myFloats(Nx);
+   std::vector<double> myDoubles(Nx);
+   std::iota(myFloats.begin(), myFloats.end(), 0.);
+   std::iota(myDoubles.begin(), myDoubles.end(), 0.);
+
+4. Now we need to create an ADIOS2 instance and IO object.
+
+.. code-block:: cpp
+
+   adios2::ADIOS adios(MPI_COMM_WORLD);
+   adios2::IO bpIO = adios.DeclareIO("BPFile_SZ");
+
+5. Now we need to define the variables we want to write.
+
+.. code-block:: cpp
+
+   adios2::Variable<float> bpFloats = bpIO.DefineVariable<float>(
+       "bpFloats", {size * Nx}, {rank * Nx}, {Nx}, adios2::ConstantDims);
+   adios2::Variable<double> bpDoubles = bpIO.DefineVariable<double>(
+       "bpDoubles", {size * Nx}, {rank * Nx}, {Nx}, adios2::ConstantDims);
+
+6. Now we need to define the compression operator we want to use. In this case we will use the SZ compressor.
+
+.. code-block:: cpp
+
+   adios2::Operator op = bpIO.DefineOperator("SZCompressor", "sz");
+   varFloats.AddOperation(op, {{"accuracy", std::to_string(accuracy)}});
+   varDoubles.AddOperation(op, {{"accuracy", std::to_string(accuracy)}});
+
+.. note::
+
+   ``DefineOperator()'`` s second parameter can be either zfp or sz. For more information regarding operators and their
+   properties you can look at :ref:`Basics: Interface Components: Operator <sec:basics_interface_components_operator>`.
+
+7. Let's also create an attribute to store the accuracy value.
+
+.. code-block:: cpp
+
+   adios2::Attribute<double> attribute = bpIO.DefineAttribute<double>("accuracy", accuracy);
+
+8. Now we need to open the file for writing.
+
+.. code-block:: cpp
+
+   adios2::Engine bpWriter = bpIO.Open("SZexample.bp", adios2::Mode::Write);
+
+9. Now we need to write the data. We will write the data for 3 steps, and edit them in between.
+
+.. code-block:: cpp
+
+   for (unsigned int step = 0; step < 3; ++step)
+   {
+       bpWriter.BeginStep();
+
+       bpWriter.Put<double>(bpDoubles, myDoubles.data());
+       bpWriter.Put<float>(bpFloats, myFloats.data());
+
+       bpWriter.EndStep();
+
+       // here you can modify myFloats, myDoubles per step
+       std::transform(myFloats.begin(), myFloats.end(), myFloats.begin(),
+                      [&](float v) -> float { return 2 * v; });
+       std::transform(myDoubles.begin(), myDoubles.end(), myDoubles.begin(),
+                      [&](double v) -> double { return 3 * v; });
+   }
+
+10. Now we need to close the file.
+
+.. code-block:: cpp
+
+   bpWriter.Close();
+
+11. Finally we need to finalize MPI.
+
+.. code-block:: cpp
+
+   MPI_Finalize();
+
+12. The final code should look as follows (excluding try/catch and optional usage of MPI), and it was derived from the
+    example `ADIOS2/examples/hello/bpOperatorSZWriter/bpOperatorSZWriter.cpp <https://github.com/ornladios/ADIOS2/blob/master/examples/hello/bpOperatorSZWriter/bpOperatorSZWriter.cpp>`_.
+
+.. literalinclude:: ../../../../examples/hello/bpOperatorSZWriter/bpOperatorSZWriter.cpp
+   :language: cpp
+
+13. You can compile and run it as follows:
+
+.. code-block:: bash
+
+   cd Path-To-ADIOS2/examples/hello/bpOperatorSZWriter
+   mkdir build
+   cd build
+   cmake -DADIOS2_DIR=Path-To-ADIOS2/build/ ..
+   cmake --build .
+   mpirun -np 2 ./adios2_hello_bpOperatorSZWriter_mpi 20 0.000001
+
+12. You can check the content of the output file "SZexample.bp" using *bpls* as follows:
+
+.. code-block:: bash
+
+   Path-To-ADIOS2/build/bin/bpls ./SZexample.bp
+
+     double   bpDoubles  3*{40}
+     float    bpFloats   3*{40}
diff --git a/docs/user_guide/source/tutorials/overview.rst b/docs/user_guide/source/tutorials/overview.rst
new file mode 100644
index 0000000000..b9cb47a573
--- /dev/null
+++ b/docs/user_guide/source/tutorials/overview.rst
@@ -0,0 +1,15 @@
+Overview
+========
+
+In this tutorial we will learn about how to build ADIOS2, and go through several tutorials explaining basic topics.
+
+More specifically, we will go through the following examples:
+
+1. :ref:`Download And Build <sec:tutorials_download_and_build>`
+2. Basic tutorials:
+
+   1. :ref:`Hello World <sec:tutorials_basics_hello_world>`
+   2. :ref:`Array Variables <sec:tutorials_basics_variables>`
+   3. :ref:`Attributes <sec:tutorials_basics_attributes>`
+   4. :ref:`Operators <sec:tutorials_basics_operators>`
+   5. :ref:`Steps <sec:tutorials_basics_steps>`
diff --git a/docs/user_guide/source/tutorials/steps.rst b/docs/user_guide/source/tutorials/steps.rst
new file mode 100644
index 0000000000..d7d6f8cac0
--- /dev/null
+++ b/docs/user_guide/source/tutorials/steps.rst
@@ -0,0 +1,212 @@
+Steps
+=====
+
+.. _sec:tutorials_basics_steps:
+
+In the previous tutorial, we introduced the concept of operators, and briefly touched upon the concept of steps.
+
+In this tutorial, we will explore how to write data for multiple steps, and how to read them back.
+
+So let's dig in!
+
+Start editing the skeleton file `ADIOS2/examples/hello/bpStepsWriteRead/bpStepsWriteRead_tutorialSkeleton.cpp <https://github.com/ornladios/ADIOS2/blob/master/examples/hello/bpStepsWriteRead/bpStepsWriteRead_tutorialSkeleton.cpp>`_.
+
+1. In an MPI application first we need to always initialize MPI. We do that with the following lines:
+
+.. code-block:: cpp
+
+   int rank, size;
+   int rank, size;
+   int provided;
+
+   // MPI_THREAD_MULTIPLE is only required if you enable the SST MPI_DP
+   MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
+   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+   MPI_Comm_size(MPI_COMM_WORLD, &size);
+
+2. This application has an optional command line argument for engine being used. If
+   no argument is provided, the default engine is BPFile.
+
+.. code-block:: cpp
+
+    const std::string engine = argv[1] ? argv[1] : "BPFile";
+
+3. We will define the number of steps and the size of the data that we will create.
+
+.. code-block:: cpp
+
+    const std::string filename = engine + "StepsWriteRead.bp";
+    const unsigned int nSteps = 10;
+    const unsigned int Nx = 60000;
+
+4. Now we need to create an ADIOS2 instance.
+
+.. code-block:: cpp
+
+   adios2::ADIOS adios(MPI_COMM_WORLD);
+
+5. Now we will populate the writer function with the following signature:
+
+.. code-block::
+
+   void writer(adios2::ADIOS &adios, const std::string &engine, const std::string &fname,
+             const size_t Nx, unsigned int nSteps, int rank, int size)
+   {
+     ...
+   }
+
+6. Let's create some simulation data. We will create a 1D array of size Nx, and fill it with 0.block
+
+.. code-block:: cpp
+
+   std::vector<double> simData(Nx, 0.0);
+
+7. Now we will create an IO object and set the engine type.block
+
+.. code-block:: cpp
+
+   adios2::IO bpIO = adios.DeclareIO("SimulationOutput");
+   io.SetEngine(engine);
+
+.. note::
+
+   The beauty of ADIOS2 is that you write the same code for all engines. The only thing that changes is the engine name.
+   The underlying engine handles all the intricacies of the engine's format, and the user enjoys the API's simplicity.
+
+8. Now we will create a variable for the simulation data and the step.
+
+.. code-block:: cpp
+
+    const adios2::Dims shape{static_cast<size_t>(size * Nx)};
+    const adios2::Dims start{static_cast<size_t>(rank * Nx)};
+    const adios2::Dims count{Nx};
+    auto bpFloats = bpIO.DefineVariable<float>("bpFloats", shape, start, count);
+
+    auto bpStep = bpIO.DefineVariable<unsigned int>("bpStep");
+
+9. Now we will open the file for writing.
+
+.. code-block:: cpp
+
+    adios2::Engine bpWriter = bpIO.Open(fname, adios2::Mode::Write);
+
+10. Now we will write the data for each step.
+
+.. code-block:: cpp
+
+   for (unsigned int step = 0; step < nSteps; ++step)
+   {
+       const adios2::Box<adios2::Dims> sel({0}, {Nx});
+       bpFloats.SetSelection(sel);
+
+       bpWriter.BeginStep();
+       bpWriter.Put(bpFloats, simData.data());
+       bpWriter.Put(bpStep, step);
+       bpWriter.EndStep();
+
+       // Update values in the simulation data
+       update_array(simData, 10);
+   }
+
+11. Now we will close the file.
+
+.. code-block:: cpp
+
+   bpWriter.Close();
+
+12. Now we will populate the reader function with the following signature:
+
+.. code-block:: cpp
+
+   void reader(adios2::ADIOS &adios, const std::string &engine, const std::string &fname,
+               const size_t Nx, unsigned int /*nSteps*/, int rank, int /*size*/)
+   {
+     ...
+   }
+
+13. Now we will create an IO object and set the engine type.
+
+.. code-block:: cpp
+
+   adios2::IO bpIO = adios.DeclareIO("SimulationOutput");
+   io.SetEngine(engine);
+
+14. Now we will open the file for reading.
+
+.. code-block:: cpp
+
+   adios2::Engine bpReader = bpIO.Open(fname, adios2::Mode::Read);
+
+15. Now we will create a vector to store simData and a variable for the step.
+
+.. code-block:: cpp
+
+   std::vector<float> simData(Nx, 0);
+   unsigned int inStep = 0;
+
+16. Now we will read the data for each step.
+
+.. code-block:: cpp
+
+   for (unsigned int step = 0; bpReader.BeginStep() == adios2::StepStatus::OK; ++step)
+   {
+       auto bpFloats = bpIO.InquireVariable<float>("bpFloats");
+       if (bpFloats)
+       {
+           const adios2::Box<adios2::Dims> sel({{Nx * rank}, {Nx}});
+           bpFloats.SetSelection(sel);
+           bpReader.Get(bpFloats, simData.data());
+       }
+       auto bpStep = bpIO.InquireVariable<unsigned int>("bpStep");
+       if (bpStep)
+       {
+           bpReader.Get(bpStep, &inStep);
+       }
+
+       bpReader.EndStep();
+   }
+
+17. Now we will close the file.
+
+.. code-block:: cpp
+
+   bpReader.Close();
+
+18. Now we will call the writer and reader functions:
+
+.. code-block:: cpp
+
+   writer(adios, engine, filename, Nx, nSteps, rank, size);
+   reader(adios, engine, filename, Nx, nSteps, rank, size);
+
+19. Finally we need to finalize MPI.
+
+.. code-block:: cpp
+
+   MPI_Finalize();
+
+20. The final code should look as follows (excluding try/catch and optional usage of MPI), and it was derived from the
+    example `ADIOS2/examples/hello/bpStepsWriteRead/bpStepsWriteRead.cpp <https://github.com/ornladios/ADIOS2/blob/master/examples/hello/bpStepsWriteRead/bpStepsWriteRead.cpp>`_.
+
+.. literalinclude:: ../../../../examples/hello/bpStepsWriteRead/bpStepsWriteRead.cpp
+   :language: cpp
+
+21. You can compile and run it as follows:
+
+.. code-block:: bash
+
+   cd Path-To-ADIOS2/examples/hello/bpStepsWriteRead
+   mkdir build
+   cd build
+   cmake -DADIOS2_DIR=Path-To-ADIOS2/build/ ..
+   cmake --build .
+   mpirun -np 2 ./adios2_hello_bpStepsWriteRead_mpi
+
+22. You can check the content of the output file "BPFileStepsWriteRead.bp" using *bpls* as follows:
+
+.. code-block:: bash
+
+   Path-To-ADIOS2/build/bin/bpls ./BPFileStepsWriteRead.bp
+
+     float     bpFloats  10*{120000}
+     uint32_t  bpStep    10*scalar
diff --git a/docs/user_guide/source/tutorials/variables.rst b/docs/user_guide/source/tutorials/variables.rst
new file mode 100644
index 0000000000..1d05acf8b3
--- /dev/null
+++ b/docs/user_guide/source/tutorials/variables.rst
@@ -0,0 +1,246 @@
+Variables
+=========
+
+.. _sec:tutorials_basics_variables:
+
+In the previous tutorial we learned how to define a simple string variable, write it, and read it back.
+
+In this tutorial we will go two steps further:
+
+1. We will define variables which include arrays, and we will write them and read them back.
+2. We will use MPI to write and read the above variables in parallel.
+
+Let's start with the writing part.
+
+Start editing the skeleton file `ADIOS2/examples/hello/bpWriter/bpWriter_tutorialSkeleton.cpp <https://github.com/ornladios/ADIOS2/blob/master/examples/hello/bpWriter/bpWriter_tutorialSkeleton.cpp>`_.
+
+1. In an MPI application first we need to always initialize MPI. We do that with the following lines:
+
+.. code-block:: cpp
+
+   int rank, size;
+   int provided;
+
+   // MPI_THREAD_MULTIPLE is only required if you enable the SST MPI_DP
+   MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
+   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+   MPI_Comm_size(MPI_COMM_WORLD, &size);
+
+2. Now we need to create some application variables which will be used to define ADIOS2 variables.
+
+.. code-block:: cpp
+
+   // Application variable
+   std::vector<float> myFloats = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+   std::vector<int> myInts = {0, -1, -2, -3, -4, -5, -6, -7, -8, -9};
+   const std::size_t Nx = myFloats.size();
+   const std::string myString("Hello Variable String from rank " + std::to_string(rank));
+
+3. Now we need to define an ADIOS2 instance and the ADIOS2 variables.
+
+.. code-block:: cpp
+
+   adios2::ADIOS adios(MPI_COMM_WORLD);
+   adios2::IO bpIO = adios.DeclareIO("BPFile_N2N");
+
+   adios2::Variable<float> bpFloats = bpIO.DefineVariable<float>(
+       "bpFloats", {size * Nx}, {rank * Nx}, {Nx}, adios2::ConstantDims);
+
+   adios2::Variable<int> bpInts = bpIO.DefineVariable<int>("bpInts", {size * Nx}, {rank * Nx},
+                                                           {Nx}, adios2::ConstantDims);
+
+   // For the sake of the tutorial we create an unused variable
+   adios2::Variable<std::string> bpString = bpIO.DefineVariable<std::string>("bpString");
+
+.. note::
+
+   The above int/float variables are global arrays. The 1st argument of the ``DefineVariable`` function is the variable
+   name, the 2nd are the global dimensions, the 3rd is the start index for a rank, the 4th are the rank/local
+   dimensions, and the 5th is a boolean variable to indicate if the dimensions are constant or not over multiple steps,
+   where ``adios2::ConstantDims == true`` We will explore other tutorials that don't use constant dimensions.
+
+4. Now we need to open the ADIOS2 engine and write the variables.
+
+.. code-block:: cpp
+
+   adios2::Engine bpWriter = bpIO.Open("myVector_cpp.bp", adios2::Mode::Write);
+
+   bpWriter.BeginStep();
+   bpWriter.Put(bpFloats, myFloats.data());
+   bpWriter.Put(bpInts, myInts.data());
+   // bpWriter.Put(bpString, myString);
+   bpWriter.EndStep();
+
+   bpWriter.Close();
+
+5. Finally we need to finalize MPI.
+
+.. code-block:: cpp
+
+   MPI_Finalize();
+
+6. The final code should look as follows (excluding try/catch and the optional usage of MPI), and it was derived
+   from the example `ADIOS2/examples/hello/bpWriter/bpWriter.cpp <https://github.com/ornladios/ADIOS2/blob/master/examples/hello/bpWriter/bpWriter.cpp>`_.
+
+.. literalinclude:: ../../../../examples/hello/bpWriter/bpWriter.cpp
+   :language: cpp
+
+7. You can compile and run it as follows:
+
+.. code-block:: bash
+
+   cd Path-To-ADIOS2/examples/hello/bpWriter
+   mkdir build
+   cd build
+   cmake -DADIOS2_DIR=Path-To-ADIOS2/build/ ..
+   cmake --build .
+   mpirun -np 2 ./adios2_hello_bpWriter_mpi
+
+8. You can check the content of the output file "myVector_cpp.bp" using *bpls* as follows:
+
+.. code-block:: bash
+
+   Path-To-ADIOS2/build/bin/bpls ./myVector_cpp.bp
+
+     float    bpFloats  {10}
+     int32_t  bpInts    {10}
+
+Now let's move to the reading part.
+
+Start editing the skeleton file `ADIOS2/examples/hello/bpReader/bpReader_tutorialSkeleton.cpp <https://github.com/ornladios/ADIOS2/blob/master/examples/hello/bpReader/bpReader_tutorialSkeleton.cpp>`_.
+
+9. In an MPI application first we need to always initialize MPI. We do that with the following line:
+
+.. code-block:: cpp
+
+   int rank, size;
+   int provided;
+
+   // MPI_THREAD_MULTIPLE is only required if you enable the SST MPI_DP
+   MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
+   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+   MPI_Comm_size(MPI_COMM_WORLD, &size);
+
+10. Now we need to define an ADIOS2 instance and open the ADIOS2 engine.
+
+.. code-block:: cpp
+
+   adios2::ADIOS adios(MPI_COMM_WORLD);
+   
+   adios2::IO bpIO = adios.DeclareIO("BPFile_N2N");
+   
+   adios2::Engine bpReader = bpIO.Open("myVector_cpp.bp", adios2::Mode::Read);
+
+11. Now we need to read the variables. In this case we know the variables that we need to inquire, so we can use the
+    ``InquireVariable`` function immediately. But let's explore how to check the available variables in a file first,
+    and then we will use the ``InquireVariable`` function.
+
+.. code-block:: cpp
+
+   bpReader.BeginStep();
+   const std::map<std::string, adios2::Params> variables = bpIO.AvailableVariables();
+
+   for (const auto &variablePair : variables)
+   {
+       std::cout << "Name: " << variablePair.first;
+       for (const auto &parameter : variablePair.second)
+       {
+           std::cout << "\t" << parameter.first << ": " << parameter.second << "\n";
+       }
+   }
+
+   adios2::Variable<float> bpFloats = bpIO.InquireVariable<float>("bpFloats");
+   adios2::Variable<int> bpInts = bpIO.InquireVariable<int>("bpInts");
+
+12. Now we need to read the variables from each rank. We will use the ``SetSelection`` to set the start index and rank
+    dimensions, then ``Get`` function to read the variables, and print the contents from rank 0.
+
+.. code-block:: cpp
+
+   const std::size_t Nx = 10;
+   if (bpFloats) // means found
+   {
+      std::vector<float> myFloats;
+
+      // read only the chunk corresponding to our rank
+      bpFloats.SetSelection({{Nx * rank}, {Nx}});
+      bpReader.Get(bpFloats, myFloats, adios2::Mode::Sync);
+
+      if (rank == 0)
+      {
+          std::cout << "MyFloats: \n";
+          for (const auto number : myFloats)
+          {
+              std::cout << number << " ";
+          }
+          std::cout << "\n";
+      }
+   }
+
+   if (bpInts) // means not found
+   {
+      std::vector<int> myInts;
+      // read only the chunk corresponding to our rank
+      bpInts.SetSelection({{Nx * rank}, {Nx}});
+
+      bpReader.Get(bpInts, myInts, adios2::Mode::Sync);
+
+      if (rank == 0)
+      {
+          std::cout << "myInts: \n";
+          for (const auto number : myInts)
+          {
+              std::cout << number << " ";
+          }
+          std::cout << "\n";
+      }
+   }
+
+.. note::
+
+   While using the ``Get`` function, we used the third parameter named ``Mode``. The mode parameter can also be used
+   for the ``Put`` function.
+
+   For the ``Put`` function, there are three modes: ``Deferred`` (default), ``Sync``, and ``Span``. and for the ``Get``
+   there are two modes: ``Deferred`` (default) and ``Sync``.
+
+   1. The ``Deferred`` mode is the default mode, because it is the fastest mode, as it allows ``Put`` / ``Get`` to be
+      grouped before potential data transport at the first encounter of ``PerformPuts`` / ``PerformGets``, ``EndStep``
+      or ``Close``.
+
+   2. The ``Sync`` mode forces ``Put`` / ``Get`` to be performed immediately so that the data are available immediately.
+
+   3. The ``Span`` mode is special mode of ``Deferred`` that allows population from non-contiguous memory structures.
+
+   For more information about the ``Mode`` parameter for both ``Put`` and ``Get`` functions, and when you should use
+   each option see :ref:`Basics: Interface Components: Engine <sec:basics_interface_components_engine>`.
+
+13. Now we need close the ADIOS2 engine.
+
+.. code-block:: cpp
+
+   bpReader.EndStep();
+   bpReader.Close();
+
+14. Finally we need to finalize MPI.
+
+.. code-block:: cpp
+
+   MPI_Finalize();
+
+15. The final code should look as follows (excluding try/catch), and it was derived from the example
+    `ADIOS2/examples/hello/bpWriter/bpWriter.cpp <https://github.com/ornladios/ADIOS2/blob/master/examples/hello/bpWriter/bpWriter.cpp>`_.
+
+.. literalinclude:: ../../../../examples/hello/bpReader/bpReader.cpp
+   :language: cpp
+
+16. You can compile and run it as follows:
+
+.. code-block:: bash
+
+   cd Path-To-ADIOS2/examples/hello/bpReader
+   mkdir build
+   cd build
+   cmake -DADIOS2_DIR=Path-To-ADIOS2/build/ ..
+   cmake --build .
+   mpirun -np 2 ./adios2_hello_bpReader_mpi
diff --git a/examples/hello/CMakeLists.txt b/examples/hello/CMakeLists.txt
index 297ed51882..e89ad4c90e 100644
--- a/examples/hello/CMakeLists.txt
+++ b/examples/hello/CMakeLists.txt
@@ -3,31 +3,39 @@
 # accompanying file Copyright.txt for details.
 #------------------------------------------------------------------------------#
 
-add_subdirectory(bpAttributeWriter)
+add_subdirectory(bpAttributeWriteRead)
 add_subdirectory(bpFlushWriter)
 
 if(ADIOS2_HAVE_MPI)
   add_subdirectory(bpFWriteCRead)
 endif()
 
+if(ADIOS2_HAVE_SZ)
+  add_subdirectory(bpOperatorSZWriter)
+endif()
+
 add_subdirectory(bpReader)
-add_subdirectory(bpThreadWrite)
-add_subdirectory(bpTimeWriter)
-add_subdirectory(bpWriter)
 
+add_subdirectory(bpStepsWriteRead)
 if(ADIOS2_HAVE_CUDA OR ADIOS2_HAVE_Kokkos_CUDA)
-  add_subdirectory(bpWriteReadCuda)
+  add_subdirectory(bpStepsWriteReadCuda)
 endif()
-
-find_package(hip QUIET)
-if(ADIOS2_HAVE_Kokkos_HIP OR hip_FOUND)
-  add_subdirectory(bpWriteReadHip)
+if(ADIOS2_HAVE_Kokkos_HIP)
+  add_subdirectory(bpStepsWriteReadHip)
 endif()
-
 if(ADIOS2_HAVE_Kokkos)
-  add_subdirectory(bpWriteReadKokkos)
+  add_subdirectory(bpStepsWriteReadKokkos)
+  if(ADIOS2_HAVE_SST)
+    add_subdirectory(sstKokkos)
+  endif()
+  if(ADIOS2_HAVE_DataMan)
+    add_subdirectory(datamanKokkos)
+  endif()
 endif()
 
+add_subdirectory(bpThreadWrite)
+add_subdirectory(bpWriter)
+
 if(ADIOS2_HAVE_DataMan)
   add_subdirectory(datamanReader)
   add_subdirectory(datamanWriter)
diff --git a/examples/hello/ReadMe.md b/examples/hello/ReadMe.md
index 90341a3957..ed45bec50c 100644
--- a/examples/hello/ReadMe.md
+++ b/examples/hello/ReadMe.md
@@ -11,59 +11,62 @@ They can be found in the following subdirectories, and they should be explored i
    * Languages: C++, C, Fortran, Python
 3. [bpReader](bpReader): The _bpReader_ examples demonstrate how to read a 1D/2D/3D variable using ADIOS2's BP engine.
    * Languages: C++, Fortran, Python
-4. [bpFWriteCRead](bpFWriteCRead): The _bpFWriteCRead_ example demonstrates how to write a 2D variable with Fortran and
-   read a subset of it with C++, and vice versa using ADIOS2's BP engine.
-   * Languages: C++, Fortran
-5. [bpTimeWriter](bpTimeWriter): The _bpTimeWriter_ example demonstrates how to write two Variables (one is timestep)
-   using time aggregation using ADIOS2's BP engine.
-   * Languages: C++, Python
-6. [bpAttributeWriter](bpAttributeWriter): The _bpAttributeWriter_ example demonstrates how to write attributes using
-   ADIOS2's BP engine.
+4. [bpAttributeWriterReader](bpAttributeWriterReader): The _bpAttributeWriterReader_ example demonstrates how to
+   write/read attributes using ADIOS2's BP engine.
+   * Languages: C++
+5. [bpOperatorSZWriter](bpOperatorSZWriter): The _bpOperatorSZWriter_ example demonstrates how to write variables with
+   the SZ operator using ADIOS2's BP engine.
    * Languages: C++
-7. [bpFlushWriter](bpFlushWriter): The _bpFlushWriter_ example demonstrates how to flush a variable using ADIOS2's BP
-   engine.
+6. [bpStepsWriteRead](bpStepsWriteRead): The _bpStepsWriteRead_ example demonstrates how to write and read
+   multiple steps using ADIOS2's BP engine.
+      * Languages: C++
+7. [bpStepsWriteReadCuda](bpStepsWriteReadCuda): The _bpStepsWriteReadCuda_ example demonstrates how to write and read a
+   variable with multiple steps using ADIOS2's BP engine and leveraging CUDA.
    * Languages: C++
-8. [bpWriteReadCuda](bpWriteReadCuda): The _bpWriteReadCuda_ example demonstrates how to write and read a variable with
-   multiple time steps using ADIOS2's BP engine and leveraging CUDA.
+8. [bpStepsWriteReadHip](bpStepsWriteReadHip): The _bpStepsWriteReadHip_ example demonstrates how to write and read a
+   variable with multiple steps using ADIOS2's BP engine and leveraging HIP.
    * Languages: C++
-9. [bpWriteReadHip](bpWriteReadHip): The _bpWriteReadHip_ example demonstrates how to write and read a variable with
-   multiple time steps using ADIOS2's BP engine and leveraging HIP.
+9. [bpStepsWriteReadKokkos](bpStepsWriteReadKokkos): The _bpStepsWriteReadKokkos_ example demonstrates how to write and
+   read a variable with multiple steps using ADIOS2's BP engine and leveraging Kokkos.
    * Languages: C++
-10. [bpWriteReadKokkos](bpWriteReadKokkos): The _bpWriteReadOmp_ example demonstrates how to write and read a variable
-    with multiple time steps using ADIOS2's BP engine and leveraging Kokkos.
+10. [bpFlushWriter](bpFlushWriter): The _bpFlushWriter_ example demonstrates how to flush a variable using ADIOS2's BP
+    engine.
     * Languages: C++
-11. [datamanReader](datamanReader): The _datamanReader_ example demonstrates how to read variables in real-time WAN
+11. [bpFWriteCRead](bpFWriteCRead): The _bpFWriteCRead_ example demonstrates how to write a 2D variable with
+    Fortran and read a subset of it with C++, and vice versa using ADIOS2's BP engine.
+    * Languages: C++, Fortran
+12. [datamanReader](datamanReader): The _datamanReader_ example demonstrates how to read variables in real-time WAN
     streams using ADIOS's DataMan engine.
     * Languages: C++, Python
-12. [datamanWriter](datamanWriter): The _datamanWriter_ example demonstrates how to write variables in real-time WAN
+13. [datamanWriter](datamanWriter): The _datamanWriter_ example demonstrates how to write variables in real-time WAN
     streams using ADIOS's DataMan engine.
     * Languages: C++, Python
-13. [dataspacesReader](dataspacesReader): The _dataspacesReader_ example demonstrates how to read a variable using
+14. [dataspacesReader](dataspacesReader): The _dataspacesReader_ example demonstrates how to read a variable using
     ADIOS2's DATASPACES engine.
     * Languages: C++
-14. [dataspacesWriter](dataspacesWriter): The _dataspacesWriter_ example demonstrates how to write a variable using
+15. [dataspacesWriter](dataspacesWriter): The _dataspacesWriter_ example demonstrates how to write a variable using
     ADIOS2's DATASPACES engine.
     * Languages: C++
-15. [hdf5Reader](hdf5Reader): The _hdf5Reader_ example demonstrates how to read variables using ADIOS2's HDF5 engine.
+16. [hdf5Reader](hdf5Reader): The _hdf5Reader_ example demonstrates how to read variables using ADIOS2's HDF5 engine.
     * Languages: C++
-16. [hdf5Writer](hdf5Writer): The _hdf5Writer_ example demonstrates how to write variables using ADIOS2's HDF5 engine.
+17. [hdf5Writer](hdf5Writer): The _hdf5Writer_ example demonstrates how to write variables using ADIOS2's HDF5 engine.
     * Languages: C++
-17. [hdf5SubFile](hdf5SubFile): The _hdf5SubFile_ example demonstrates how to write variables using ADIOS2's parallel
+18. [hdf5SubFile](hdf5SubFile): The _hdf5SubFile_ example demonstrates how to write variables using ADIOS2's parallel
     HDF5 engine leveraging the subfile feature.
     * Languages: C++
-18. [inlineMWE](inlineMWE): The _inlineMWE_ example demonstrates how to write and read a variable using ADIOS2's inline
+19. [inlineMWE](inlineMWE): The _inlineMWE_ example demonstrates how to write and read a variable using ADIOS2's inline
     engine.
     * Languages: C++
-19. [inlineFWriteCppRead](inlineFWriteCppRead): The _inlineFWriteCppRead_ example demonstrates how to write a 2D
+20. [inlineFWriteCppRead](inlineFWriteCppRead): The _inlineFWriteCppRead_ example demonstrates how to write a 2D
     variable with Fortran and read it back a subset of it with C++ using ADIOS2's inline engine.
     * Languages: C++, Fortran
-20. [inlineReaderWriter](inlineReaderWriter): The _inlineReaderWriter_ example demonstrates how to write two Variables
+21. [inlineReaderWriter](inlineReaderWriter): The _inlineReaderWriter_ example demonstrates how to write two Variables
     (one is timestep) using time aggregation and ADIOS2's inline engine.
     * Languages: C++
-21. [sstReader](sstReader): The _sstReader_ example demonstrates how to read a variable using ADIOS2's SST engine.
+22. [sstReader](sstReader): The _sstReader_ example demonstrates how to read a variable using ADIOS2's SST engine.
     * Languages: C++
-22. [sstWriter](sstWriter): The _sstWriter_ example demonstrates how to write a variable using ADIOS2's SST engine.
+23. [sstWriter](sstWriter): The _sstWriter_ example demonstrates how to write a variable using ADIOS2's SST engine.
     * Languages: C++
-23. [skeleton](skeleton): The _skeleton_ example demonstrates how to write and read a variable using an ADIOS2 skeleton
+24. [skeleton](skeleton): The _skeleton_ example demonstrates how to write and read a variable using an ADIOS2 skeleton
     engine.
     * Languages: C++
diff --git a/examples/hello/bpAttributeWriter/CMakeLists.txt b/examples/hello/bpAttributeWriteRead/CMakeLists.txt
similarity index 55%
rename from examples/hello/bpAttributeWriter/CMakeLists.txt
rename to examples/hello/bpAttributeWriteRead/CMakeLists.txt
index c5eaf598e3..cfc777fef2 100644
--- a/examples/hello/bpAttributeWriter/CMakeLists.txt
+++ b/examples/hello/bpAttributeWriteRead/CMakeLists.txt
@@ -4,7 +4,7 @@
 #------------------------------------------------------------------------------#
 
 cmake_minimum_required(VERSION 3.12)
-project(ADIOS2HelloBPAttributeWriterExample)
+project(ADIOS2HelloBPAttributeWriteReadExample)
 
 if(NOT TARGET adios2_core)
   set(_components CXX)
@@ -20,13 +20,12 @@ if(NOT TARGET adios2_core)
   find_package(ADIOS2 REQUIRED COMPONENTS ${_components})
 endif()
 
-add_executable(adios2_hello_bpAttributeWriter bpAttributeWriter_nompi.cpp)
-target_link_libraries(adios2_hello_bpAttributeWriter adios2::cxx11)
-install(TARGETS adios2_hello_bpAttributeWriter RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+add_executable(adios2_hello_bpAttributeWriteRead bpAttributeWriteRead.cpp)
+target_link_libraries(adios2_hello_bpAttributeWriteRead adios2::cxx11)
+install(TARGETS adios2_hello_bpAttributeWriteRead RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
 
 if(ADIOS2_HAVE_MPI)
-  add_executable(adios2_hello_bpAttributeWriter_mpi bpAttributeWriter.cpp)
-  target_link_libraries(adios2_hello_bpAttributeWriter_mpi adios2::cxx11_mpi MPI::MPI_C)
-  install(TARGETS adios2_hello_bpAttributeWriter_mpi RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+  add_executable(adios2_hello_bpAttributeWriteRead_mpi bpAttributeWriteRead.cpp)
+  target_link_libraries(adios2_hello_bpAttributeWriteRead_mpi adios2::cxx11_mpi MPI::MPI_C)
+  install(TARGETS adios2_hello_bpAttributeWriteRead_mpi RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
 endif()
-
diff --git a/examples/hello/bpAttributeWriteRead/bpAttributeWriteRead.cpp b/examples/hello/bpAttributeWriteRead/bpAttributeWriteRead.cpp
new file mode 100644
index 0000000000..0d55e703e8
--- /dev/null
+++ b/examples/hello/bpAttributeWriteRead/bpAttributeWriteRead.cpp
@@ -0,0 +1,188 @@
+/*
+ * Distributed under the OSI-approved Apache License, Version 2.0.  See
+ * accompanying file Copyright.txt for details.
+ *
+ * bpAttributeWriteRead.cpp: Simple self-descriptive example of how to write/read attributes and
+ * a variable to a BP File that lives in several MPI processes.
+ *
+ *  Created on: Feb 16, 2017
+ *      Author: William F Godoy godoywf@ornl.gov
+ */
+
+#include <ios>      //std::ios_base::failure
+#include <iostream> //std::cout
+#if ADIOS2_USE_MPI
+#include <mpi.h>
+#endif
+#include <stdexcept> //std::invalid_argument std::exception
+#include <string>
+#include <vector>
+
+#include <adios2.h>
+
+void writer(adios2::ADIOS &adios, int rank, int size, std::vector<float> &myFloats)
+{
+    /*** IO class object: settings and factory of Settings: Variables,
+     * Parameters, Transports, and Execution: Engines */
+    adios2::IO bpIO = adios.DeclareIO("BPFile_N2N");
+
+    const std::size_t Nx = myFloats.size();
+
+    /** global array : name, { shape (total) }, { start (local) }, { count
+     * (local) }, all are constant dimensions */
+    adios2::Variable<float> bpFloats = bpIO.DefineVariable<float>(
+        "bpFloats", {size * Nx}, {rank * Nx}, {Nx}, adios2::ConstantDims);
+
+    bpIO.DefineAttribute<std::string>("Single_String", "File generated with ADIOS2");
+
+    std::vector<std::string> myStrings = {"one", "two", "three"};
+    bpIO.DefineAttribute<std::string>("Array_of_Strings", myStrings.data(), myStrings.size());
+
+    bpIO.DefineAttribute<double>("Attr_Double", 0.f);
+    std::vector<double> myDoubles = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+    bpIO.DefineAttribute<double>("Array_of_Doubles", myDoubles.data(), myDoubles.size());
+
+    /** Engine derived class, spawned to start IO operations */
+    adios2::Engine bpWriter = bpIO.Open("fileAttributes.bp", adios2::Mode::Write);
+
+    bpWriter.BeginStep();
+
+    /** Write variable for buffering */
+    bpWriter.Put<float>(bpFloats, myFloats.data());
+
+    bpWriter.EndStep();
+
+    /** Create bp file, engine becomes unreachable after this*/
+    bpWriter.Close();
+}
+
+void reader(adios2::ADIOS &adios, int rank, int /*size*/)
+{
+    adios2::IO bpIO = adios.DeclareIO("BPReader");
+
+    adios2::Engine bpReader = bpIO.Open("fileAttributes.bp", adios2::Mode::Read);
+
+    bpReader.BeginStep();
+    const auto attributesInfo = bpIO.AvailableAttributes();
+
+    for (const auto &attributeInfoPair : attributesInfo)
+    {
+        std::cout << "Attribute: " << attributeInfoPair.first;
+        for (const auto &attributePair : attributeInfoPair.second)
+        {
+            std::cout << "\tKey: " << attributePair.first << "\tValue: " << attributePair.second
+                      << "\n";
+        }
+        std::cout << "\n";
+    }
+
+    adios2::Attribute<float> singleString = bpIO.InquireAttribute<float>("Single_String");
+    if (singleString)
+    {
+        std::cout << singleString.Name() << ": " << singleString.Data()[0] << "\n";
+    }
+    adios2::Attribute<std::string> arrayOfStrings =
+        bpIO.InquireAttribute<std::string>("Array_of_Strings");
+    if (arrayOfStrings)
+    {
+        std::cout << arrayOfStrings.Name() << ": ";
+        for (const auto &value : arrayOfStrings.Data())
+        {
+            std::cout << value << " ";
+        }
+        std::cout << "\n";
+    }
+    adios2::Attribute<double> attrDouble = bpIO.InquireAttribute<double>("Attr_Double");
+    if (attrDouble)
+    {
+        std::cout << attrDouble.Name() << ": " << attrDouble.Data()[0] << "\n";
+    }
+    adios2::Attribute<double> arrayOfDoubles = bpIO.InquireAttribute<double>("Array_of_Doubles");
+    if (arrayOfDoubles)
+    {
+        std::cout << arrayOfDoubles.Name() << ": ";
+        for (const auto &value : arrayOfDoubles.Data())
+        {
+            std::cout << value << " ";
+        }
+        std::cout << "\n";
+    }
+
+    adios2::Variable<float> bpFloats = bpIO.InquireVariable<float>("bpFloats");
+    const std::size_t Nx = 10;
+    std::vector<float> myFloats(Nx);
+    if (bpFloats)
+    {
+        bpFloats.SetSelection({{Nx * rank}, {Nx}});
+        bpReader.Get(bpFloats, myFloats.data());
+    }
+
+    bpReader.EndStep();
+
+    bpReader.Close();
+}
+
+int main(int argc, char *argv[])
+{
+    int rank, size;
+#if ADIOS2_USE_MPI
+    int provided;
+
+    // MPI_THREAD_MULTIPLE is only required if you enable the SST MPI_DP
+    MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+#else
+    rank = 0;
+    size = 1;
+#endif
+
+    /** Application variable */
+    std::vector<float> myFloats = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+    try
+    {
+        /** ADIOS class factory of IO class objects */
+#if ADIOS2_USE_MPI
+        adios2::ADIOS adios(MPI_COMM_WORLD);
+#else
+        adios2::ADIOS adios;
+#endif
+
+        writer(adios, rank, size, myFloats);
+        reader(adios, rank, size);
+    }
+    catch (std::invalid_argument &e)
+    {
+        std::cout << "Invalid argument exception, STOPPING PROGRAM from rank " << rank << "\n";
+        std::cout << e.what() << "\n";
+#if ADIOS2_USE_MPI
+        std::cerr << "STOPPING PROGRAM from rank " << rank << "\n";
+        MPI_Abort(MPI_COMM_WORLD, 1);
+#endif
+    }
+    catch (std::ios_base::failure &e)
+    {
+        std::cout << "IO System base failure exception, STOPPING PROGRAM from rank " << rank
+                  << "\n";
+        std::cout << e.what() << "\n";
+#if ADIOS2_USE_MPI
+        std::cerr << "STOPPING PROGRAM from rank " << rank << "\n";
+        MPI_Abort(MPI_COMM_WORLD, 1);
+#endif
+    }
+    catch (std::exception &e)
+    {
+        std::cout << "Exception, STOPPING PROGRAM from rank " << rank << "\n";
+        std::cout << e.what() << "\n";
+#if ADIOS2_USE_MPI
+        std::cerr << "STOPPING PROGRAM from rank " << rank << "\n";
+        MPI_Abort(MPI_COMM_WORLD, 1);
+#endif
+    }
+
+#if ADIOS2_USE_MPI
+    MPI_Finalize();
+#endif
+
+    return 0;
+}
diff --git a/examples/hello/bpAttributeWriteRead/bpAttributeWriteRead_tutorialSkeleton.cpp b/examples/hello/bpAttributeWriteRead/bpAttributeWriteRead_tutorialSkeleton.cpp
new file mode 100644
index 0000000000..ef78c5efe0
--- /dev/null
+++ b/examples/hello/bpAttributeWriteRead/bpAttributeWriteRead_tutorialSkeleton.cpp
@@ -0,0 +1,91 @@
+/*
+ * Distributed under the OSI-approved Apache License, Version 2.0.  See
+ * accompanying file Copyright.txt for details.
+ *
+ * bpAttributeWriteRead.cpp: Simple self-descriptive example of how to write/read attributes and
+ * a variable to a BP File that lives in several MPI processes.
+ *
+ *  Created on: Feb 16, 2017
+ *      Author: William F Godoy godoywf@ornl.gov
+ */
+
+#include <ios>      //std::ios_base::failure
+#include <iostream> //std::cout
+#include <mpi.h>
+#include <stdexcept> //std::invalid_argument std::exception
+#include <string>
+#include <vector>
+
+#include <adios2.h>
+
+void writer(adios2::ADIOS &adios, int rank, int size, std::vector<float> &myFloats)
+{
+    // Add code to create IO object
+
+    // Add code to create variable
+
+    // Add code to create attributes
+
+    // Add code to open file
+
+    // Add code to write variables
+
+    // Add code to close file
+}
+
+void reader(adios2::ADIOS &adios, int rank, int /*size*/)
+{
+    // Add code to create IO object
+
+    // Add code to open file
+
+    // add code to check available attributes
+
+    // Add code to read attributes
+
+    // Add code to read variables
+
+    // Add code to close file
+}
+
+int main(int argc, char *argv[])
+{
+    int rank, size;
+    int provided;
+
+    // Add code to init MPI
+
+    // Add code to create array
+    try
+    {
+        // Add code to create ADIOS object
+
+        // Call writer and reader functions
+    }
+    catch (std::invalid_argument &e)
+    {
+        std::cout << "Invalid argument exception, STOPPING PROGRAM from rank " << rank << "\n";
+        std::cout << e.what() << "\n";
+        std::cerr << "STOPPING PROGRAM from rank " << rank << "\n";
+        MPI_Abort(MPI_COMM_WORLD, 1);
+    }
+    catch (std::ios_base::failure &e)
+    {
+        std::cout << "IO System base failure exception, STOPPING PROGRAM from rank " << rank
+                  << "\n";
+        std::cout << e.what() << "\n";
+        std::cerr << "STOPPING PROGRAM from rank " << rank << "\n";
+        MPI_Abort(MPI_COMM_WORLD, 1);
+    }
+    catch (std::exception &e)
+    {
+        std::cout << "Exception, STOPPING PROGRAM from rank " << rank << "\n";
+        std::cout << e.what() << "\n";
+        std::cerr << "STOPPING PROGRAM from rank " << rank << "\n";
+        MPI_Abort(MPI_COMM_WORLD, 1);
+    }
+
+    // Add code to finalize MPI
+
+    return 0;
+}
diff --git a/examples/hello/bpAttributeWriter/bpAttributeWriter.cpp b/examples/hello/bpAttributeWriter/bpAttributeWriter.cpp
deleted file mode 100644
index 1ef46f2e87..0000000000
--- a/examples/hello/bpAttributeWriter/bpAttributeWriter.cpp
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Distributed under the OSI-approved Apache License, Version 2.0.  See
- * accompanying file Copyright.txt for details.
- *
- * bpAttributeWriter.cpp: Simple self-descriptive example of how to write a variable
- * to a BP File that lives in several MPI processes.
- *
- *  Created on: Feb 16, 2017
- *      Author: William F Godoy godoywf@ornl.gov
- */
-
-#include <ios>      //std::ios_base::failure
-#include <iostream> //std::cout
-#include <mpi.h>
-#include <stdexcept> //std::invalid_argument std::exception
-#include <string>
-#include <vector>
-
-#include <adios2.h>
-
-int main(int argc, char *argv[])
-{
-    int provided;
-
-    // MPI_THREAD_MULTIPLE is only required if you enable the SST MPI_DP
-    MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    /** Application variable */
-    std::vector<float> myFloats = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-    const std::size_t Nx = myFloats.size();
-
-    try
-    {
-        /** ADIOS class factory of IO class objects */
-        adios2::ADIOS adios(MPI_COMM_WORLD);
-
-        /*** IO class object: settings and factory of Settings: Variables,
-         * Parameters, Transports, and Execution: Engines */
-        adios2::IO bpIO = adios.DeclareIO("BPFile_N2N");
-
-        /** global array : name, { shape (total) }, { start (local) }, { count
-         * (local) }, all are constant dimensions */
-        adios2::Variable<float> bpFloats = bpIO.DefineVariable<float>(
-            "bpFloats", {size * Nx}, {rank * Nx}, {Nx}, adios2::ConstantDims);
-
-        bpIO.DefineAttribute<std::string>("Single_String", "File generated with ADIOS2");
-
-        std::vector<std::string> myStrings = {"one", "two", "three"};
-        bpIO.DefineAttribute<std::string>("Array_of_Strings", myStrings.data(), myStrings.size());
-
-        bpIO.DefineAttribute<double>("Attr_Double", 0.f);
-        std::vector<double> myDoubles = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
-        bpIO.DefineAttribute<double>("Array_of_Doubles", myDoubles.data(), myDoubles.size());
-
-        /** Engine derived class, spawned to start IO operations */
-        adios2::Engine bpWriter = bpIO.Open("fileAttributes.bp", adios2::Mode::Write);
-
-        bpWriter.BeginStep();
-
-        /** Write variable for buffering */
-        bpWriter.Put<float>(bpFloats, myFloats.data());
-
-        bpWriter.EndStep();
-
-        /** Create bp file, engine becomes unreachable after this*/
-        bpWriter.Close();
-
-        adios2::IO bpReader = adios.DeclareIO("BPReader");
-
-        adios2::Engine bpReaderEngine = bpReader.Open("fileAttributes.bp", adios2::Mode::Read);
-
-        bpReaderEngine.BeginStep();
-        const auto attributesInfo = bpReader.AvailableAttributes();
-
-        for (const auto &attributeInfoPair : attributesInfo)
-        {
-            std::cout << "Attribute: " << attributeInfoPair.first;
-            for (const auto &attributePair : attributeInfoPair.second)
-            {
-                std::cout << "\tKey: " << attributePair.first << "\tValue: " << attributePair.second
-                          << "\n";
-            }
-            std::cout << "\n";
-        }
-        bpReaderEngine.EndStep();
-
-        bpReaderEngine.Close();
-    }
-    catch (std::invalid_argument &e)
-    {
-        std::cout << "Invalid argument exception, STOPPING PROGRAM from rank " << rank << "\n";
-        std::cout << e.what() << "\n";
-    }
-    catch (std::ios_base::failure &e)
-    {
-        std::cout << "IO System base failure exception, STOPPING PROGRAM from rank " << rank
-                  << "\n";
-        std::cout << e.what() << "\n";
-    }
-    catch (std::exception &e)
-    {
-        std::cout << "Exception, STOPPING PROGRAM from rank " << rank << "\n";
-        std::cout << e.what() << "\n";
-    }
-
-    MPI_Finalize();
-
-    return 0;
-}
diff --git a/examples/hello/bpAttributeWriter/bpAttributeWriter_nompi.cpp b/examples/hello/bpAttributeWriter/bpAttributeWriter_nompi.cpp
deleted file mode 100644
index bb8aeab6fd..0000000000
--- a/examples/hello/bpAttributeWriter/bpAttributeWriter_nompi.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Distributed under the OSI-approved Apache License, Version 2.0.  See
- * accompanying file Copyright.txt for details.
- *
- * bpAttributeWriter_nompi.cpp sequential non-mpi version of bpAttributeWriter
- *
- *  Created on: Jan 9, 2017
- *      Author: William F Godoy godoywf@ornl.gov
- */
-
-#include <ios>       //std::ios_base::failure
-#include <iostream>  //std::cout
-#include <stdexcept> //std::invalid_argument std::exception
-#include <string>
-#include <vector>
-
-#include <adios2.h>
-
-int main(int argc, char *argv[])
-{
-    /** Application variable */
-    std::vector<float> myFloats = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-    const std::size_t Nx = myFloats.size();
-
-    try
-    {
-        /** ADIOS class factory of IO class objects */
-        adios2::ADIOS adios;
-
-        /*** IO class object: settings and factory of Settings: Variables,
-         * Parameters, Transports, and Execution: Engines */
-        adios2::IO bpIO = adios.DeclareIO("BPFile_N2N");
-
-        /** global array: name, { shape (total dimensions) }, { start (local) },
-         * { count (local) }, all are constant dimensions */
-        adios2::Variable<float> bpFloats =
-            bpIO.DefineVariable<float>("bpFloats", {}, {}, {Nx}, adios2::ConstantDims);
-
-        bpIO.DefineAttribute<std::string>("Single_String", "File generated with ADIOS2");
-
-        std::vector<std::string> myStrings = {"one", "two", "three"};
-        bpIO.DefineAttribute<std::string>("Array_of_Strings", myStrings.data(), myStrings.size());
-
-        bpIO.DefineAttribute<double>("Attr_Double", 0.f);
-        std::vector<double> myDoubles = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
-        bpIO.DefineAttribute<double>("Array_of_Doubles", myDoubles.data(), myDoubles.size());
-
-        /** Engine derived class, spawned to start IO operations */
-        adios2::Engine bpWriter = bpIO.Open("myVector.bp", adios2::Mode::Write);
-
-        bpWriter.BeginStep();
-
-        /** Write variable for buffering */
-        bpWriter.Put<float>(bpFloats, myFloats.data());
-
-        bpWriter.EndStep();
-
-        /** Create bp file, engine becomes unreachable after this*/
-        bpWriter.Close();
-    }
-    catch (std::invalid_argument &e)
-    {
-        std::cout << "Invalid argument exception, STOPPING PROGRAM\n";
-        std::cout << e.what() << "\n";
-    }
-    catch (std::ios_base::failure &e)
-    {
-        std::cout << "IO System base failure exception, STOPPING PROGRAM\n";
-        std::cout << e.what() << "\n";
-    }
-    catch (std::exception &e)
-    {
-        std::cout << "Exception, STOPPING PROGRAM from rank\n";
-        std::cout << e.what() << "\n";
-    }
-
-    return 0;
-}
diff --git a/examples/hello/bpOperatorSZWriter/CMakeLists.txt b/examples/hello/bpOperatorSZWriter/CMakeLists.txt
new file mode 100644
index 0000000000..c54aadf81e
--- /dev/null
+++ b/examples/hello/bpOperatorSZWriter/CMakeLists.txt
@@ -0,0 +1,36 @@
+#------------------------------------------------------------------------------#
+# Distributed under the OSI-approved Apache License, Version 2.0.  See
+# accompanying file Copyright.txt for details.
+#------------------------------------------------------------------------------#
+
+cmake_minimum_required(VERSION 3.12)
+project(ADIOS2HelloBPOperatorSZWriterExample)
+
+if(NOT TARGET adios2_core)
+  set(_components C)
+
+  find_package(MPI COMPONENTS ${_components})
+  if(MPI_FOUND)
+    # Workaround for various MPI implementations forcing the link of C++ bindings
+    add_definitions(-DOMPI_SKIP_MPICXX -DMPICH_SKIP_MPICXX)
+
+    list(APPEND _components MPI)
+  endif()
+  list(APPEND _components CXX)
+
+  find_package(SZ QUIET)
+
+  find_package(ADIOS2 REQUIRED COMPONENTS ${_components})
+endif()
+
+if(ADIOS2_HAVE_SZ)
+  add_executable(adios2_hello_bpOperatorSZWriter bpOperatorSZWriter.cpp)
+  target_link_libraries(adios2_hello_bpOperatorSZWriter adios2::cxx11)
+  install(TARGETS adios2_hello_bpOperatorSZWriter RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+
+  if (ADIOS2_HAVE_MPI)
+    add_executable(adios2_hello_bpOperatorSZWriter_mpi bpOperatorSZWriter.cpp)
+    target_link_libraries(adios2_hello_bpOperatorSZWriter_mpi adios2::cxx11_mpi MPI::MPI_C)
+    install(TARGETS adios2_hello_bpOperatorSZWriter_mpi RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+  endif ()
+endif()
diff --git a/examples/hello/bpWriter/bpSZ.cpp b/examples/hello/bpOperatorSZWriter/bpOperatorSZWriter.cpp
similarity index 79%
rename from examples/hello/bpWriter/bpSZ.cpp
rename to examples/hello/bpOperatorSZWriter/bpOperatorSZWriter.cpp
index 357f42f829..2fb96876a1 100644
--- a/examples/hello/bpWriter/bpSZ.cpp
+++ b/examples/hello/bpOperatorSZWriter/bpOperatorSZWriter.cpp
@@ -2,19 +2,20 @@
  * Distributed under the OSI-approved Apache License, Version 2.0.  See
  * accompanying file Copyright.txt for details.
  *
- * bpSZ.cpp : example passing runtime compression arguments
+ * bpOperatorSZWriter.cpp : example using operator by passing compression arguments
  *
  *  Created on: Aug 3, 2018
  *      Author: William F Godoy godoywf@ornl.gov
  */
 
+#include <algorithm> //std::transform
 #include <ios>       //std::ios_base::failure
 #include <iostream>  //std::cout
 #include <numeric>   //std::iota
 #include <stdexcept> //std::invalid_argument std::exception
 #include <vector>
 
-#include <adios2.h>
+#include "adios2.h"
 #if ADIOS2_USE_MPI
 #include <mpi.h>
 #endif
@@ -23,7 +24,7 @@ void Usage()
 {
     std::cout << "\n";
     std::cout << "USAGE:\n";
-    std::cout << "./helloBPSZ Nx sz_accuracy\n";
+    std::cout << "./adios2_hello_bpOperatorSZWriter Nx sz_accuracy\n";
     std::cout << "\t Nx: size of float and double arrays to be compressed\n";
     std::cout << "\t sz_accuracy: absolute accuracy e.g. 0.1, 0.001, to skip "
                  "compression: -1\n\n";
@@ -31,8 +32,13 @@ void Usage()
 
 int main(int argc, char *argv[])
 {
-    int rank, size;
+    if (argc != 3)
+    {
+        Usage();
+        return EXIT_SUCCESS;
+    }
 
+    int rank, size;
 #if ADIOS2_USE_MPI
     int provided;
 
@@ -47,12 +53,6 @@ int main(int argc, char *argv[])
 
     try
     {
-        if (argc != 3)
-        {
-            throw std::invalid_argument("ERROR: need sz accuracy e.g. 0.01, 0.1 as "
-                                        "2nd parameter in argv\n");
-        }
-
         const std::size_t Nx = static_cast<std::size_t>(std::stoull(argv[1]));
         const double accuracy = std::stod(argv[2]);
 
@@ -92,21 +92,26 @@ int main(int argc, char *argv[])
         (void)attribute;
 
         /** Engine derived class, spawned to start IO operations */
-        adios2::Engine bpFileWriter = bpIO.Open("SZexample.bp", adios2::Mode::Write);
+        adios2::Engine bpWriter = bpIO.Open("SZexample.bp", adios2::Mode::Write);
 
-        for (unsigned int t = 0; t < 3; ++t)
+        for (unsigned int step = 0; step < 3; ++step)
         {
-            bpFileWriter.BeginStep();
+            bpWriter.BeginStep();
 
-            // here you can modify myFloats, myDoubles per step
+            bpWriter.Put(varFloats, myFloats.data());
+            bpWriter.Put(varDoubles, myDoubles.data());
 
-            bpFileWriter.Put(varFloats, myFloats.data());
-            bpFileWriter.Put(varDoubles, myDoubles.data());
-            bpFileWriter.EndStep();
+            bpWriter.EndStep();
+
+            // here you can modify myFloats, myDoubles per step
+            std::transform(myFloats.begin(), myFloats.end(), myFloats.begin(),
+                           [&](float v) -> float { return 2 * v; });
+            std::transform(myDoubles.begin(), myDoubles.end(), myDoubles.begin(),
+                           [&](double v) -> double { return 3 * v; });
         }
 
         /** Create bp file, engine becomes unreachable after this*/
-        bpFileWriter.Close();
+        bpWriter.Close();
     }
     catch (std::invalid_argument &e)
     {
diff --git a/examples/hello/bpOperatorSZWriter/bpOperatorSZWriter_tutorialSkeleton.cxx b/examples/hello/bpOperatorSZWriter/bpOperatorSZWriter_tutorialSkeleton.cxx
new file mode 100644
index 0000000000..618ea7f50b
--- /dev/null
+++ b/examples/hello/bpOperatorSZWriter/bpOperatorSZWriter_tutorialSkeleton.cxx
@@ -0,0 +1,94 @@
+/*
+ * Distributed under the OSI-approved Apache License, Version 2.0.  See
+ * accompanying file Copyright.txt for details.
+ *
+ * bpSZ.cpp : example passing runtime compression arguments
+ *
+ *  Created on: Aug 3, 2018
+ *      Author: William F Godoy godoywf@ornl.gov
+ */
+
+#include <algorithm> //std::transform
+#include <ios>       //std::ios_base::failure
+#include <iostream>  //std::cout
+#include <numeric>   //std::iota
+#include <stdexcept> //std::invalid_argument std::exception
+#include <vector>
+
+#include <adios2.h>
+#include <mpi.h>
+
+void Usage()
+{
+    std::cout << "\n";
+    std::cout << "USAGE:\n";
+    std::cout << "./adios2_hello_bpSZ Nx sz_accuracy\n";
+    std::cout << "\t Nx: size of float and double arrays to be compressed\n";
+    std::cout << "\t sz_accuracy: absolute accuracy e.g. 0.1, 0.001, to skip "
+                 "compression: -1\n\n";
+}
+
+int main(int argc, char *argv[])
+{
+    if (argc != 3)
+    {
+        Usage();
+        return EXIT_SUCCESS;
+    }
+
+    int rank, size;
+    int provided;
+
+    // Add code to init MPI
+
+    try
+    {
+        // Add code to get command line arguments
+
+        // Add code to create arrays
+
+        // Add code to create ADIOS object
+
+        // Add code to create IO object
+
+        // Add code to create variables
+
+        // Add code to add SZ compressor operation
+
+        // Add code to add attribute
+
+        // Add code to open file
+
+        // Add code to write variables for 3 time steps and edit them
+
+        // Add code to close file
+    }
+    catch (std::invalid_argument &e)
+    {
+        std::cerr << "Invalid argument exception: " << e.what() << "\n";
+#if ADIOS2_USE_MPI
+        std::cerr << "STOPPING PROGRAM from rank " << rank << "\n";
+        MPI_Abort(MPI_COMM_WORLD, 1);
+#endif
+    }
+    catch (std::ios_base::failure &e)
+    {
+        std::cerr << "IO System base failure exception: " << e.what() << "\n";
+#if ADIOS2_USE_MPI
+        std::cerr << "STOPPING PROGRAM from rank " << rank << "\n";
+        MPI_Abort(MPI_COMM_WORLD, 1);
+#endif
+    }
+    catch (std::exception &e)
+    {
+        std::cerr << "Exception: " << e.what() << "\n";
+#if ADIOS2_USE_MPI
+        std::cerr << "STOPPING PROGRAM from rank " << rank << "\n";
+        MPI_Abort(MPI_COMM_WORLD, 1);
+#endif
+    }
+
+    // Add code to finalize MPI
+
+    return 0;
+}
diff --git a/examples/hello/bpReader/CMakeLists.txt b/examples/hello/bpReader/CMakeLists.txt
index 95be573537..47a5140944 100644
--- a/examples/hello/bpReader/CMakeLists.txt
+++ b/examples/hello/bpReader/CMakeLists.txt
@@ -30,7 +30,7 @@ if(NOT TARGET adios2::cxx11)
   find_package(ADIOS2 REQUIRED COMPONENTS ${_components})
 endif()
 
-add_executable(adios2_hello_bpReader bpReader_nompi.cpp)
+add_executable(adios2_hello_bpReader bpReader.cpp)
 target_link_libraries(adios2_hello_bpReader adios2::cxx11)
 install(TARGETS adios2_hello_bpReader RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
 
diff --git a/examples/hello/bpReader/bpReader.cpp b/examples/hello/bpReader/bpReader.cpp
index 1b57561fd0..a987faeba8 100644
--- a/examples/hello/bpReader/bpReader.cpp
+++ b/examples/hello/bpReader/bpReader.cpp
@@ -5,15 +5,14 @@
  * bpReader.cpp: Simple self-descriptive example of how to read a variable
  * from a BP File.
  *
- * Try running like this from the build directory:
- *   mpirun -np 3 ./bin/hello_bpReader
- *
  *  Created on: Feb 16, 2017
  *      Author: William F Godoy godoywf@ornl.gov
  */
 #include <ios>      //std::ios_base::failure
 #include <iostream> //std::cout
+#if ADIOS2_USE_MPI
 #include <mpi.h>
+#endif
 #include <stdexcept> //std::invalid_argument std::exception
 #include <vector>
 
@@ -21,25 +20,34 @@
 
 int main(int argc, char *argv[])
 {
-    int provided;
+    int rank, size;
 
+#if ADIOS2_USE_MPI
+    int provided;
     // MPI_THREAD_MULTIPLE is only required if you enable the SST MPI_DP
     MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
-    int rank, size;
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
     MPI_Comm_size(MPI_COMM_WORLD, &size);
-    std::string filename = "myVector_cpp.bp";
+#else
+    rank = 0;
+    size = 1;
+#endif
+    std::cout << "rank " << rank << " size " << size << "\n";
     try
     {
+#if ADIOS2_USE_MPI
         /** ADIOS class factory of IO class objects */
         adios2::ADIOS adios(MPI_COMM_WORLD);
+#else
+        adios2::ADIOS adios;
+#endif
 
         /*** IO class object: settings and factory of Settings: Variables,
          * Parameters, Transports, and Execution: Engines */
-        adios2::IO bpIO = adios.DeclareIO("ReadBP");
+        adios2::IO bpIO = adios.DeclareIO("BPFile_N2N");
 
         /** Engine derived class, spawned to start IO operations */
-        adios2::Engine bpReader = bpIO.Open(filename, adios2::Mode::Read);
+        adios2::Engine bpReader = bpIO.Open("myVector_cpp.bp", adios2::Mode::Read);
 
         bpReader.BeginStep();
         const std::map<std::string, adios2::Params> variables = bpIO.AvailableVariables();
@@ -66,7 +74,7 @@ int main(int argc, char *argv[])
             // read only the chunk corresponding to our rank
             bpFloats.SetSelection({{Nx * rank}, {Nx}});
             // myFloats.data is pre-allocated
-            bpReader.Get<float>(bpFloats, myFloats, adios2::Mode::Sync);
+            bpReader.Get(bpFloats, myFloats, adios2::Mode::Sync);
 
             if (rank == 0)
             {
@@ -85,7 +93,7 @@ int main(int argc, char *argv[])
             // read only the chunk corresponding to our rank
             bpInts.SetSelection({{Nx * rank}, {Nx}});
 
-            bpReader.Get<int>(bpInts, myInts, adios2::Mode::Sync);
+            bpReader.Get(bpInts, myInts, adios2::Mode::Sync);
 
             if (rank == 0)
             {
@@ -109,7 +117,9 @@ int main(int argc, char *argv[])
             std::cerr << "Invalid argument exception, STOPPING PROGRAM from rank " << rank << "\n";
             std::cerr << e.what() << "\n";
         }
+#if ADIOS2_USE_MPI
         MPI_Abort(MPI_COMM_WORLD, 1);
+#endif
     }
     catch (std::ios_base::failure &e)
     {
@@ -119,12 +129,14 @@ int main(int argc, char *argv[])
                          "from rank "
                       << rank << "\n";
             std::cerr << e.what() << "\n";
-            std::cerr << "The file " << filename << " does not exist."
-                      << " Presumably this is because hello_bpWriter has not "
+            std::cerr << "The file myVector_cpp.bp does not exist."
+                      << " Presumably this is because adios2_hello_bpWriter has not "
                          "been run."
-                      << " Run ./hello_bpWriter before running this program.\n";
+                      << " Run ./adios2_hello_bpWriter before running this program.\n";
         }
+#if ADIOS2_USE_MPI
         MPI_Abort(MPI_COMM_WORLD, 1);
+#endif
     }
     catch (std::exception &e)
     {
@@ -133,10 +145,14 @@ int main(int argc, char *argv[])
             std::cerr << "Exception, STOPPING PROGRAM from rank " << rank << "\n";
             std::cerr << e.what() << "\n";
         }
+#if ADIOS2_USE_MPI
         MPI_Abort(MPI_COMM_WORLD, 1);
+#endif
     }
 
+#if ADIOS2_USE_MPI
     MPI_Finalize();
+#endif
 
     return 0;
 }
diff --git a/examples/hello/bpReader/bpReader_nompi.cpp b/examples/hello/bpReader/bpReader_nompi.cpp
deleted file mode 100644
index 6f36aa3c63..0000000000
--- a/examples/hello/bpReader/bpReader_nompi.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Distributed under the OSI-approved Apache License, Version 2.0.  See
- * accompanying file Copyright.txt for details.
- *
- * bpReader_nompi.cpp: Simple self-descriptive example of how to read a variable
- * from a BP File.
- *
- *  Created on: Feb 16, 2017
- *      Author: William F Godoy godoywf@ornl.gov
- */
-
-#include <ios>       //std::ios_base::failure
-#include <iostream>  //std::cout
-#include <stdexcept> //std::invalid_argument std::exception
-#include <vector>
-
-#include <adios2.h>
-
-int main(int argc, char *argv[])
-{
-    std::string filename = "myVector_cpp.bp";
-
-    try
-    {
-        /** ADIOS class factory of IO class objects */
-        adios2::ADIOS adios;
-
-        /*** IO class object: settings and factory of Settings: Variables,
-         * Parameters, Transports, and Execution: Engines */
-        adios2::IO bpIO = adios.DeclareIO("ReadBP");
-
-        /** Engine derived class, spawned to start IO operations */
-        adios2::Engine bpReader = bpIO.Open(filename, adios2::Mode::Read);
-        bpReader.BeginStep();
-
-        const std::map<std::string, adios2::Params> variables = bpIO.AvailableVariables(true);
-
-        std::cout << "List of variables:";
-        for (const auto &variablePair : variables)
-        {
-            std::cout << "  " << variablePair.first;
-        }
-        std::cout << std::endl;
-
-        /** Write variable for buffering */
-        adios2::Variable<float> bpFloats = bpIO.InquireVariable<float>("bpFloats");
-
-        adios2::Variable<int> bpInts = bpIO.InquireVariable<int>("bpInts");
-
-        if (bpFloats)
-        {
-            std::vector<float> myFloats;
-            bpReader.Get<float>(bpFloats, myFloats, adios2::Mode::Sync);
-            std::cout << "Float vector inside " << filename << ": {";
-            for (auto &x : myFloats)
-            {
-                std::cout << x << ", ";
-            }
-            std::cout << "}\n";
-        }
-
-        if (bpInts)
-        {
-            std::vector<int> myInts;
-            bpReader.Get<int>(bpInts, myInts, adios2::Mode::Sync);
-        }
-        else
-        {
-            std::cout << "There are no integer datasets in " << filename << ".\n";
-        }
-        bpReader.EndStep();
-
-        /** Close bp file, engine becomes unreachable after this*/
-        bpReader.Close();
-    }
-    catch (std::invalid_argument &e)
-    {
-        std::cerr << "Invalid argument exception, STOPPING PROGRAM\n";
-        std::cerr << e.what() << "\n";
-    }
-    catch (std::ios_base::failure &e)
-    {
-        std::cerr << "IO System base failure exception, STOPPING PROGRAM\n";
-        std::cerr << e.what() << "\n";
-        std::cerr << "The file " << filename << " does not exist."
-                  << " Presumably this is because hello_bpWriter has not been "
-                     "run. Run ./hello_bpWriter before running this program.\n";
-    }
-    catch (std::exception &e)
-    {
-        std::cerr << "Exception, STOPPING PROGRAM\n";
-        std::cerr << e.what() << "\n";
-    }
-
-    return 0;
-}
diff --git a/examples/hello/bpReader/bpReader_tutorialSkeleton.cpp b/examples/hello/bpReader/bpReader_tutorialSkeleton.cpp
new file mode 100644
index 0000000000..d0f4713ec3
--- /dev/null
+++ b/examples/hello/bpReader/bpReader_tutorialSkeleton.cpp
@@ -0,0 +1,76 @@
+/*
+ * Distributed under the OSI-approved Apache License, Version 2.0.  See
+ * accompanying file Copyright.txt for details.
+ *
+ * bpReader.cpp: Simple self-descriptive example of how to read a variable
+ * from a BP File.
+ *
+ *  Created on: Feb 16, 2017
+ *      Author: William F Godoy godoywf@ornl.gov
+ */
+#include <ios>      //std::ios_base::failure
+#include <iostream> //std::cout
+#include <mpi.h>
+#include <stdexcept> //std::invalid_argument std::exception
+#include <vector>
+
+#include <adios2.h>
+
+int main(int argc, char *argv[])
+{
+    int rank, size;
+    int provided;
+
+    // Add code to init MPI
+    try
+    {
+        // Add code to create ADIOS object
+
+        // Add code to create IO object
+
+        // Add code to open file
+
+        // Add code to inquire variables and optionally check all available variables
+
+        // Add code to read variables
+
+        // Add code to close file
+    }
+    catch (std::invalid_argument &e)
+    {
+        if (rank == 0)
+        {
+            std::cerr << "Invalid argument exception, STOPPING PROGRAM from rank " << rank << "\n";
+            std::cerr << e.what() << "\n";
+        }
+        MPI_Abort(MPI_COMM_WORLD, 1);
+    }
+    catch (std::ios_base::failure &e)
+    {
+        if (rank == 0)
+        {
+            std::cerr << "IO System base failure exception, STOPPING PROGRAM "
+                         "from rank "
+                      << rank << "\n";
+            std::cerr << e.what() << "\n";
+            std::cerr << "The file myVector_cpp.bp does not exist."
+                      << " Presumably this is because adios2_hello_bpWriter has not "
+                         "been run."
+                      << " Run ./adios2_hello_bpWriter before running this program.\n";
+        }
+        MPI_Abort(MPI_COMM_WORLD, 1);
+    }
+    catch (std::exception &e)
+    {
+        if (rank == 0)
+        {
+            std::cerr << "Exception, STOPPING PROGRAM from rank " << rank << "\n";
+            std::cerr << e.what() << "\n";
+        }
+        MPI_Abort(MPI_COMM_WORLD, 1);
+    }
+
+    MPI_Finalize();
+
+    return 0;
+}
diff --git a/examples/hello/bpTimeWriter/CMakeLists.txt b/examples/hello/bpStepsWriteRead/CMakeLists.txt
similarity index 57%
rename from examples/hello/bpTimeWriter/CMakeLists.txt
rename to examples/hello/bpStepsWriteRead/CMakeLists.txt
index 30a9f1d361..6acf9698dd 100644
--- a/examples/hello/bpTimeWriter/CMakeLists.txt
+++ b/examples/hello/bpStepsWriteRead/CMakeLists.txt
@@ -4,7 +4,7 @@
 #------------------------------------------------------------------------------#
 
 cmake_minimum_required(VERSION 3.12)
-project(ADIOS2HelloBPTimeWriterExample)
+project(ADIOS2HelloBPStepsWriteReadExample)
 
 if(NOT TARGET adios2_core)
   set(_components CXX)
@@ -20,12 +20,12 @@ if(NOT TARGET adios2_core)
   find_package(ADIOS2 REQUIRED COMPONENTS ${_components})
 endif()
 
-add_executable(adios2_hello_bpTimeWriter bpTimeWriter_nompi.cpp)
-target_link_libraries(adios2_hello_bpTimeWriter adios2::cxx11)
-install(TARGETS adios2_hello_bpTimeWriter RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+add_executable(adios2_hello_bpStepsWriteRead bpStepsWriteRead.cpp)
+target_link_libraries(adios2_hello_bpStepsWriteRead adios2::cxx11)
+install(TARGETS adios2_hello_bpStepsWriteRead RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
 
 if(ADIOS2_HAVE_MPI)
-  add_executable(adios2_hello_bpTimeWriter_mpi bpTimeWriter.cpp)
-  target_link_libraries(adios2_hello_bpTimeWriter_mpi adios2::cxx11_mpi MPI::MPI_C)
-  install(TARGETS adios2_hello_bpTimeWriter_mpi RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+  add_executable(adios2_hello_bpStepsWriteRead_mpi bpStepsWriteRead.cpp)
+  target_link_libraries(adios2_hello_bpStepsWriteRead_mpi adios2::cxx11_mpi MPI::MPI_C)
+  install(TARGETS adios2_hello_bpStepsWriteRead_mpi RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
 endif()
diff --git a/examples/hello/bpStepsWriteRead/bpStepsWriteRead.cpp b/examples/hello/bpStepsWriteRead/bpStepsWriteRead.cpp
new file mode 100644
index 0000000000..e06a2e80da
--- /dev/null
+++ b/examples/hello/bpStepsWriteRead/bpStepsWriteRead.cpp
@@ -0,0 +1,154 @@
+/*
+ * Distributed under the OSI-approved Apache License, Version 2.0.  See
+ * accompanying file Copyright.txt for details.
+ *
+ * bpStepsWriteRead.cpp  Simple example of writing and reading data through ADIOS2 BP engine with
+ * multiple simulations steps for every IO step.
+ *
+ *  Created on: Feb 16, 2017
+ *      Author: William F Godoy godoywf@ornl.gov
+ */
+
+#include <algorithm> // std::for_each
+#include <ios>       // std::ios_base::failure
+#include <iostream>  // std::cout
+#if ADIOS2_USE_MPI
+#include <mpi.h>
+#endif
+#include <stdexcept> //std::invalid_argument std::exception
+#include <vector>
+
+#include <adios2.h>
+
+void update_array(std::vector<float> &array, int val)
+{
+    std::transform(array.begin(), array.end(), array.begin(),
+                   [val](float v) -> float { return v + static_cast<float>(val); });
+}
+
+void writer(adios2::ADIOS &adios, const std::string &engine, const std::string &fname,
+            const size_t Nx, unsigned int nSteps, int rank, int size)
+{
+    std::vector<float> simData(Nx, 0);
+
+    adios2::IO bpIO = adios.DeclareIO("WriteIO");
+    bpIO.SetEngine(engine);
+
+    const adios2::Dims shape{static_cast<size_t>(size * Nx)};
+    const adios2::Dims start{static_cast<size_t>(rank * Nx)};
+    const adios2::Dims count{Nx};
+    auto bpFloats = bpIO.DefineVariable<float>("bpFloats", shape, start, count);
+
+    auto bpStep = bpIO.DefineVariable<unsigned int>("bpStep");
+
+    adios2::Engine bpWriter = bpIO.Open(fname, adios2::Mode::Write);
+
+    for (unsigned int step = 0; step < nSteps; ++step)
+    {
+        const adios2::Box<adios2::Dims> sel({0}, {Nx});
+        bpFloats.SetSelection(sel);
+
+        bpWriter.BeginStep();
+        bpWriter.Put(bpFloats, simData.data());
+        bpWriter.Put(bpStep, step);
+        bpWriter.EndStep();
+
+        // Update values in the simulation data
+        update_array(simData, 10);
+    }
+
+    bpWriter.Close();
+}
+
+void reader(adios2::ADIOS &adios, const std::string &engine, const std::string &fname,
+            const size_t Nx, unsigned int /*nSteps*/, int rank, int /*size*/)
+{
+    adios2::IO bpIO = adios.DeclareIO("ReadIO");
+    bpIO.SetEngine(engine);
+
+    adios2::Engine bpReader = bpIO.Open(fname, adios2::Mode::Read);
+
+    std::vector<float> simData(Nx, 0);
+    unsigned int inStep = 0;
+    for (unsigned int step = 0; bpReader.BeginStep() == adios2::StepStatus::OK; ++step)
+    {
+        auto bpFloats = bpIO.InquireVariable<float>("bpFloats");
+        if (bpFloats)
+        {
+            const adios2::Box<adios2::Dims> sel({{Nx * rank}, {Nx}});
+            bpFloats.SetSelection(sel);
+            bpReader.Get(bpFloats, simData.data());
+        }
+        auto bpStep = bpIO.InquireVariable<unsigned int>("bpStep");
+        if (bpStep)
+        {
+            bpReader.Get(bpStep, &inStep);
+        }
+
+        bpReader.EndStep();
+        if (inStep != step)
+        {
+            std::cout << "ERROR: step mismatch\n";
+            return;
+        }
+    }
+    bpReader.Close();
+}
+
+int main(int argc, char *argv[])
+{
+    int rank, size;
+
+#if ADIOS2_USE_MPI
+    int provided;
+    // MPI_THREAD_MULTIPLE is only required if you enable the SST MPI_DP
+    MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+#else
+    rank = 0;
+    size = 1;
+#endif
+
+    const std::string engine = argv[1] ? argv[1] : "BPFile";
+    std::cout << "Using engine " << engine << std::endl;
+
+    const std::string filename = engine + "StepsWriteRead.bp";
+    const unsigned int nSteps = 10;
+    const unsigned int Nx = 60000;
+    try
+    {
+        /** ADIOS class factory of IO class objects */
+#if ADIOS2_USE_MPI
+        adios2::ADIOS adios(MPI_COMM_WORLD);
+#else
+        adios2::ADIOS adios;
+#endif
+
+        writer(adios, engine, filename, Nx, nSteps, rank, size);
+        reader(adios, engine, filename, Nx, nSteps, rank, size);
+    }
+    catch (std::invalid_argument &e)
+    {
+        std::cout << "Invalid argument exception, STOPPING PROGRAM from rank " << rank << "\n";
+        std::cout << e.what() << "\n";
+    }
+    catch (std::ios_base::failure &e)
+    {
+        std::cout << "IO System base failure exception, STOPPING PROGRAM "
+                     "from rank "
+                  << rank << "\n";
+        std::cout << e.what() << "\n";
+    }
+    catch (std::exception &e)
+    {
+        std::cout << "Exception, STOPPING PROGRAM from rank " << rank << "\n";
+        std::cout << e.what() << "\n";
+    }
+
+#if ADIOS2_USE_MPI
+    MPI_Finalize();
+#endif
+
+    return 0;
+}
diff --git a/examples/hello/bpStepsWriteRead/bpStepsWriteRead_tutorialSkeleton.cxx b/examples/hello/bpStepsWriteRead/bpStepsWriteRead_tutorialSkeleton.cxx
new file mode 100644
index 0000000000..faceff681d
--- /dev/null
+++ b/examples/hello/bpStepsWriteRead/bpStepsWriteRead_tutorialSkeleton.cxx
@@ -0,0 +1,98 @@
+/*
+* Distributed under the OSI-approved Apache License, Version 2.0.  See
+* accompanying file Copyright.txt for details.
+*
+* bpStepsWriteRead.cpp  Simple example of writing and reading data through ADIOS2 BP engine with
+* multiple simulations steps for every IO step.
+*
+*  Created on: Feb 16, 2017
+*      Author: William F Godoy godoywf@ornl.gov
+ */
+
+#include <algorithm> //std::for_each
+#include <ios>       //std::ios_base::failure
+#include <iostream>  //std::cout
+#include <mpi.h>
+#include <stdexcept> //std::invalid_argument std::exception
+#include <vector>
+
+#include <adios2.h>
+
+void update_array(std::vector<float> &array, int val)
+{
+    std::transform(array.begin(), array.end(), array.begin(),
+                   [val](float v) -> float { return v + static_cast<float>(val); });
+}
+
+void writer(adios2::ADIOS &adios, const std::string &engine, const std::string &fname,
+            const size_t Nx, unsigned int nSteps, int rank, int size)
+{
+    // Add code to create the simulation data
+
+    // Add code to create ADIOS io and set engine type
+
+    // Add code to define sim data variable
+
+    // Add code to define step variable
+
+    // Add code to open file
+
+    // Add code to write data across multiple steps, and update the simulation data
+
+    // Add code to close file
+}
+
+void reader(adios2::ADIOS &adios, const std::string &engine, const std::string &fname,
+            const size_t Nx, unsigned int /*nSteps*/, int rank, int /*size*/)
+{
+    // Add code to create ADIOS io and set engine type
+
+    // Add code to open file
+
+    // Add code to create variable for sim data and step
+
+    // Add code to read data across multiple steps
+
+    // Add code to close file
+}
+
+int main(int argc, char *argv[])
+{
+    int rank, size;
+    int provided;
+
+    // Add code to initialize MPI
+
+    const std::string engine = argv[1] ? argv[1] : "BPFile";
+    std::cout << "Using engine " << engine << std::endl;
+
+    // Add Code to set filename, nSteps, Nx
+    try
+    {
+        // Add code to create ADIOS object
+
+        // Add code to call writer
+        // Add code to call reader
+    }
+    catch (std::invalid_argument &e)
+    {
+        std::cout << "Invalid argument exception, STOPPING PROGRAM from rank " << rank << "\n";
+        std::cout << e.what() << "\n";
+    }
+    catch (std::ios_base::failure &e)
+    {
+        std::cout << "IO System base failure exception, STOPPING PROGRAM "
+                     "from rank "
+                  << rank << "\n";
+        std::cout << e.what() << "\n";
+    }
+    catch (std::exception &e)
+    {
+        std::cout << "Exception, STOPPING PROGRAM from rank " << rank << "\n";
+        std::cout << e.what() << "\n";
+    }
+
+    // Add code to finalize MPI
+
+    return 0;
+}
diff --git a/examples/hello/bpWriteReadCuda/CMakeLists.txt b/examples/hello/bpStepsWriteReadCuda/CMakeLists.txt
similarity index 58%
rename from examples/hello/bpWriteReadCuda/CMakeLists.txt
rename to examples/hello/bpStepsWriteReadCuda/CMakeLists.txt
index 453867f0b4..590adb671a 100644
--- a/examples/hello/bpWriteReadCuda/CMakeLists.txt
+++ b/examples/hello/bpStepsWriteReadCuda/CMakeLists.txt
@@ -4,7 +4,7 @@
 #------------------------------------------------------------------------------#
 
 cmake_minimum_required(VERSION 3.12)
-project(ADIOS2HelloBPWriteReadCudaExample)
+project(ADIOS2HelloBPStepsWriteReadCudaExample)
 
 if(NOT TARGET adios2_core)
   set(_components CXX)
@@ -18,8 +18,8 @@ if(NOT TARGET adios2_core)
 endif()
 
 if(ADIOS2_HAVE_CUDA OR ADIOS2_HAVE_Kokkos_CUDA)
-  add_executable(adios2_hello_bpWriteReadCuda bpWriteReadCuda.cu)
-  target_link_libraries(adios2_hello_bpWriteReadCuda PUBLIC adios2::cxx11 CUDA::cudart)
-  set_target_properties(adios2_hello_bpWriteReadCuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
-  install(TARGETS adios2_hello_bpWriteReadCuda RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+  add_executable(adios2_hello_bpStepsWriteReadCuda bpStepsWriteReadCuda.cu)
+  target_link_libraries(adios2_hello_bpStepsWriteReadCuda PUBLIC adios2::cxx11 CUDA::cudart)
+  set_target_properties(adios2_hello_bpStepsWriteReadCuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+  install(TARGETS adios2_hello_bpStepsWriteReadCuda RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
 endif()
diff --git a/examples/hello/bpStepsWriteReadCuda/bpStepsWriteReadCuda.cu b/examples/hello/bpStepsWriteReadCuda/bpStepsWriteReadCuda.cu
new file mode 100644
index 0000000000..5d911e64cf
--- /dev/null
+++ b/examples/hello/bpStepsWriteReadCuda/bpStepsWriteReadCuda.cu
@@ -0,0 +1,142 @@
+/*
+ * Distributed under the OSI-approved Apache License, Version 2.0.  See
+ * accompanying file Copyright.txt for details.
+ *
+ * bpStepsWriteReadCuda.cu  Simple example of writing and reading data through ADIOS2 BP engine with
+ * multiple simulations steps for every IO step using CUDA
+ */
+
+#include <ios>
+#include <iostream>
+#include <stdexcept> //std::invalid_argument std::exception
+#include <string>
+#include <vector>
+
+#include <adios2.h>
+
+#include <cuda_runtime.h>
+
+__global__ void update_array(float *vect, int val) { vect[blockIdx.x] += val; }
+
+void writer(adios2::ADIOS &adios, const std::string &engine, const std::string &fname,
+            const size_t Nx, unsigned int nSteps)
+{
+    // Initialize the simulation data
+    float *gpuSimData;
+    cudaMalloc(&gpuSimData, Nx * sizeof(float));
+    cudaMemset(gpuSimData, 0, Nx);
+
+    // Set up the ADIOS structures
+    adios2::IO bpIO = adios.DeclareIO("WriteIO");
+    bpIO.SetEngine(engine);
+
+    // Declare an array for the ADIOS data of size (NumOfProcesses * Nx)
+    const adios2::Dims shape{static_cast<size_t>(Nx)};
+    const adios2::Dims start{static_cast<size_t>(0)};
+    const adios2::Dims count{Nx};
+    auto bpFloats = bpIO.DefineVariable<float>("bpFloats", shape, start, count);
+    auto bpStep = bpIO.DefineVariable<unsigned int>("bpStep");
+
+    adios2::Engine bpWriter = bpIO.Open(fname, adios2::Mode::Write);
+
+    // Simulation steps
+    for (unsigned int step = 0; step < nSteps; ++step)
+    {
+        // Make a 1D selection to describe the local dimensions of the
+        // variable we write and its offsets in the global spaces
+        const adios2::Box<adios2::Dims> sel({0}, {Nx});
+        bpFloats.SetSelection(sel);
+
+        // Start IO step every write step
+        bpWriter.BeginStep();
+        bpFloats.SetMemorySpace(adios2::MemorySpace::GPU);
+        bpWriter.Put(bpFloats, gpuSimData);
+        bpWriter.Put(bpStep, step);
+        bpWriter.EndStep();
+
+        // Update values in the simulation data
+        update_array<<<Nx, 1>>>(gpuSimData, 10);
+    }
+
+    bpWriter.Close();
+}
+
+void reader(adios2::ADIOS &adios, const std::string &engine, const std::string &fname,
+            const size_t Nx, unsigned int /*nSteps*/)
+{
+    // Create ADIOS structures
+    adios2::IO bpIO = adios.DeclareIO("ReadIO");
+    bpIO.SetEngine(engine);
+
+    adios2::Engine bpReader = bpIO.Open(fname, adios2::Mode::Read);
+
+    unsigned int inStep = 0;
+    float *gpuSimData;
+    cudaMalloc(&gpuSimData, Nx * sizeof(float));
+    cudaMemset(gpuSimData, 0, Nx);
+    for (unsigned int step = 0; bpReader.BeginStep() == adios2::StepStatus::OK; ++step)
+    {
+        auto bpFloats = bpIO.InquireVariable<float>("bpFloats");
+        if (bpFloats)
+        {
+            const adios2::Dims start{0};
+            const adios2::Dims count{Nx};
+            const adios2::Box<adios2::Dims> sel(start, count);
+            bpFloats.SetSelection(sel);
+
+            bpFloats.SetMemorySpace(adios2::MemorySpace::GPU);
+            bpReader.Get(bpFloats, gpuSimData); //, adios2::Mode::Deferred);
+        }
+        auto bpStep = bpIO.InquireVariable<unsigned int>("bpStep");
+        if (bpStep)
+        {
+            bpReader.Get(bpStep, &inStep);
+        }
+
+        bpReader.EndStep();
+        if (inStep != step)
+        {
+            std::cout << "ERROR: step mismatch\n";
+            return;
+        }
+    }
+    bpReader.Close();
+}
+
+int main(int argc, char **argv)
+{
+    const int device_id = 1;
+    cudaSetDevice(device_id);
+
+    const std::string engine = argv[1] ? argv[1] : "BPFile";
+    std::cout << "Using engine " << engine << std::endl;
+
+    const std::string filename = engine + "StepsWriteReadCuda.bp";
+    const unsigned int nSteps = 10;
+    const unsigned int Nx = 6000;
+    try
+    {
+        /** ADIOS class factory of IO class objects */
+        adios2::ADIOS adios;
+
+        writer(adios, engine, filename, Nx, nSteps);
+        reader(adios, engine, filename, Nx, nSteps);
+    }
+    catch (std::invalid_argument &e)
+    {
+        std::cout << "Invalid argument exception, STOPPING PROGRAM\n";
+        std::cout << e.what() << "\n";
+    }
+    catch (std::ios_base::failure &e)
+    {
+        std::cout << "IO System base failure exception, STOPPING PROGRAM\n";
+        std::cout << e.what() << "\n";
+    }
+    catch (std::exception &e)
+    {
+        std::cout << "Exception, STOPPING PROGRAM\n";
+        std::cout << e.what() << "\n";
+    }
+
+    return 0;
+}
diff --git a/examples/hello/bpStepsWriteReadHip/CMakeLists.txt b/examples/hello/bpStepsWriteReadHip/CMakeLists.txt
new file mode 100644
index 0000000000..3762c25b94
--- /dev/null
+++ b/examples/hello/bpStepsWriteReadHip/CMakeLists.txt
@@ -0,0 +1,21 @@
+#------------------------------------------------------------------------------#
+# Distributed under the OSI-approved Apache License, Version 2.0.  See
+# accompanying file Copyright.txt for details.
+#------------------------------------------------------------------------------#
+
+cmake_minimum_required(VERSION 3.12)
+project(ADIOS2HelloBPStepsWriteReadHipExample)
+
+if(NOT TARGET adios2_core)
+  find_package(ADIOS2 REQUIRED COMPONENTS CXX)
+endif()
+
+enable_language(HIP)
+
+# Needed for the hip cmake targets
+find_package(hip REQUIRED)
+
+add_executable(adios2_hello_bpStepsWriteReadHip bpStepsWriteReadHip.cpp)
+target_link_libraries(adios2_hello_bpStepsWriteReadHip adios2::cxx11 hip::device)
+set_source_files_properties(bpStepsWriteReadHip.cpp PROPERTIES LANGUAGE HIP)
+install(TARGETS adios2_hello_bpStepsWriteReadHip RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
diff --git a/examples/hello/bpStepsWriteReadHip/bpStepsWriteReadHip.cpp b/examples/hello/bpStepsWriteReadHip/bpStepsWriteReadHip.cpp
new file mode 100644
index 0000000000..12fe720d51
--- /dev/null
+++ b/examples/hello/bpStepsWriteReadHip/bpStepsWriteReadHip.cpp
@@ -0,0 +1,161 @@
+/*
+ * Distributed under the OSI-approved Apache License, Version 2.0.  See
+ * accompanying file Copyright.txt for details.
+ *
+ * bpStepsWriteReadHip.cpp  Simple example of writing and reading bpFloats through ADIOS2 BP engine
+ * with multiple simulations steps for every IO step using HIP
+ */
+#include <algorithm>
+#include <ios>
+#include <iostream>
+#include <stdexcept> //std::invalid_argument std::exception
+#include <vector>
+
+#include <adios2.h>
+
+#include <hip/hip_runtime.h>
+
+__global__ void hip_initialize(float *vec) { vec[hipBlockIdx_x] = hipBlockIdx_x; }
+
+__global__ void hip_increment(float *vec, float val) { vec[hipBlockIdx_x] += val; }
+
+void writer(adios2::ADIOS &adios, const std::string &engine, const std::string &fname,
+            const size_t Nx, unsigned int nSteps)
+{
+    hipError_t hipExit;
+    float *gpuSimData;
+    hipExit = hipMalloc((void **)&gpuSimData, Nx * sizeof(float));
+    if (hipExit != hipSuccess)
+    {
+        std::cout << "[BPWrite] error: " << hipGetErrorString(hipExit) << std::endl;
+        return;
+    }
+    hipLaunchKernelGGL(hip_initialize, dim3(Nx), dim3(1), 0, 0, gpuSimData);
+    hipExit = hipDeviceSynchronize();
+    if (hipExit != hipSuccess)
+    {
+        std::cout << "[BPWrite] error: " << hipGetErrorString(hipExit) << std::endl;
+        return;
+    }
+
+    adios2::IO bpIO = adios.DeclareIO("WriteIO");
+    bpIO.SetEngine(engine);
+
+    const adios2::Dims shape{static_cast<size_t>(Nx)};
+    const adios2::Dims start{static_cast<size_t>(0)};
+    const adios2::Dims count{Nx};
+    auto bpFloats = bpIO.DefineVariable<float>("bpFloats", shape, start, count);
+    auto bpStep = bpIO.DefineVariable<unsigned int>("bpStep");
+
+    adios2::Engine bpWriter = bpIO.Open(fname, adios2::Mode::Write);
+
+    for (unsigned int step = 0; step < nSteps; ++step)
+    {
+        const adios2::Box<adios2::Dims> sel({0}, {Nx});
+        bpFloats.SetSelection(sel);
+
+        bpWriter.BeginStep();
+        bpWriter.Put(bpFloats, gpuSimData);
+        bpWriter.Put(bpStep, step);
+        bpWriter.EndStep();
+
+        hipLaunchKernelGGL(hip_increment, dim3(Nx), dim3(1), 0, 0, gpuSimData, 10);
+        hipExit = hipDeviceSynchronize();
+        if (hipExit != hipSuccess)
+        {
+            std::cout << "[BPWrite] error: " << hipGetErrorString(hipExit) << std::endl;
+            return;
+        }
+    }
+
+    bpWriter.Close();
+}
+
+void reader(adios2::ADIOS &adios, const std::string &engine, const std::string &fname,
+            const size_t Nx, unsigned int /*nSteps*/)
+{
+    hipError_t hipExit;
+    adios2::IO bpIO = adios.DeclareIO("ReadIO");
+    bpIO.SetEngine(engine);
+
+    adios2::Engine bpReader = bpIO.Open(fname, adios2::Mode::Read);
+
+    unsigned int inStep = 0;
+    float *gpuSimData;
+    hipExit = hipMalloc((void **)&gpuSimData, Nx * sizeof(float));
+    if (hipExit != hipSuccess)
+    {
+        std::cout << "[BPWrite] error: " << hipGetErrorString(hipExit) << std::endl;
+        return;
+    }
+    for (unsigned int step = 0; bpReader.BeginStep() == adios2::StepStatus::OK; ++step)
+    {
+        auto bpFloats = bpIO.InquireVariable<float>("bpFloats");
+        if (bpFloats)
+        {
+            const adios2::Dims start{0};
+            const adios2::Dims count{Nx};
+            const adios2::Box<adios2::Dims> sel(start, count);
+            bpFloats.SetSelection(sel);
+            bpFloats.SetMemorySpace(adios2::MemorySpace::GPU);
+            bpReader.Get(bpFloats, gpuSimData);
+        }
+        auto bpStep = bpIO.InquireVariable<unsigned int>("bpStep");
+        if (bpStep)
+        {
+            bpReader.Get(bpStep, &inStep);
+        }
+
+        bpReader.EndStep();
+        if (inStep != step)
+        {
+            std::cout << "ERROR: step mismatch\n";
+            return;
+        }
+    }
+    bpReader.Close();
+}
+
+int main(int argc, char **argv)
+{
+    hipError_t hipExit;
+    const int device_id = 0;
+    hipExit = hipSetDevice(device_id);
+    if (hipExit != hipSuccess)
+    {
+        std::cout << "[BPWrite] error: " << hipGetErrorString(hipExit) << std::endl;
+        return 1;
+    }
+
+    const std::string engine = argv[1] ? argv[1] : "BPFile";
+    std::cout << "Using engine " << engine << std::endl;
+
+    const std::string filename = engine + "StepsWriteReadHip.bp";
+    const unsigned int nSteps = 10;
+    const unsigned int Nx = 6000;
+    try
+    {
+        /** ADIOS class factory of IO class objects */
+        adios2::ADIOS adios;
+
+        writer(adios, engine, filename, Nx, nSteps);
+        reader(adios, engine, filename, Nx, nSteps);
+    }
+    catch (std::invalid_argument &e)
+    {
+        std::cout << "Invalid argument exception, STOPPING PROGRAM\n";
+        std::cout << e.what() << "\n";
+    }
+    catch (std::ios_base::failure &e)
+    {
+        std::cout << "IO System base failure exception, STOPPING PROGRAM\n";
+        std::cout << e.what() << "\n";
+    }
+    catch (std::exception &e)
+    {
+        std::cout << "Exception, STOPPING PROGRAM\n";
+        std::cout << e.what() << "\n";
+    }
+
+    return 0;
+}
diff --git a/examples/hello/bpWriteReadKokkos/CMakeLists.txt b/examples/hello/bpStepsWriteReadKokkos/CMakeLists.txt
similarity index 68%
rename from examples/hello/bpWriteReadKokkos/CMakeLists.txt
rename to examples/hello/bpStepsWriteReadKokkos/CMakeLists.txt
index a8e810b734..c420e6e762 100644
--- a/examples/hello/bpWriteReadKokkos/CMakeLists.txt
+++ b/examples/hello/bpStepsWriteReadKokkos/CMakeLists.txt
@@ -4,7 +4,7 @@
 #------------------------------------------------------------------------------#
 
 cmake_minimum_required(VERSION 3.12)
-project(ADIOS2HelloBPWriteReadKokkosExample)
+project(ADIOS2HelloBPStepsWriteReadKokkosExample)
 
 # CXX Compiler settings only in for this example
 set(CMAKE_CXX_STANDARD 17)
@@ -26,8 +26,8 @@ else()
 endif()
 
 if(ADIOS2_HAVE_Kokkos)
-  add_executable(adios2_hello_bpWriteReadKokkos bpWriteReadKokkos.cpp)
-  kokkos_compilation(SOURCE bpWriteReadKokkos.cpp)
-  target_link_libraries(adios2_hello_bpWriteReadKokkos adios2::cxx11 Kokkos::kokkos)
-  install(TARGETS adios2_hello_bpWriteReadKokkos RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+  add_executable(adios2_hello_bpStepsWriteReadKokkos bpStepsWriteReadKokkos.cpp)
+  kokkos_compilation(SOURCE bpStepsWriteReadKokkos.cpp)
+  target_link_libraries(adios2_hello_bpStepsWriteReadKokkos adios2::cxx11 Kokkos::kokkos)
+  install(TARGETS adios2_hello_bpStepsWriteReadKokkos RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
 endif()
diff --git a/examples/hello/bpStepsWriteReadKokkos/bpStepsWriteReadKokkos.cpp b/examples/hello/bpStepsWriteReadKokkos/bpStepsWriteReadKokkos.cpp
new file mode 100644
index 0000000000..268d75adf3
--- /dev/null
+++ b/examples/hello/bpStepsWriteReadKokkos/bpStepsWriteReadKokkos.cpp
@@ -0,0 +1,146 @@
+/*
+ * Distributed under the OSI-approved Apache License, Version 2.0.  See
+ * accompanying file Copyright.txt for details.
+ *
+ * bpStepsWriteReadKokkos.cpp  Simple example of writing and reading bpFloats through ADIOS2 BP
+ * engine with multiple simulations steps for every IO step using Kokkos
+ */
+#include <ios>
+#include <iostream>
+#include <stdexcept> //std::invalid_argument std::exception
+#include <vector>
+
+#include <adios2.h>
+
+#include <Kokkos_Core.hpp>
+
+void writer(adios2::ADIOS &adios, const std::string &engine, const std::string &fname,
+            const size_t Nx, unsigned int nSteps)
+{
+    // Initialize the simulation bpFloats with the default memory space
+    using mem_space = Kokkos::DefaultExecutionSpace::memory_space;
+    Kokkos::View<float *, mem_space> gpuSimData("simBuffer", Nx);
+    Kokkos::parallel_for(
+        "initBuffer", Kokkos::RangePolicy<Kokkos::DefaultExecutionSpace>(0, Nx),
+        KOKKOS_LAMBDA(int i) { gpuSimData(i) = static_cast<float>(i); });
+    Kokkos::fence();
+
+    // Set up the ADIOS structures
+    adios2::IO bpIO = adios.DeclareIO("WriteIO");
+    bpIO.SetEngine(engine);
+
+    const adios2::Dims shape{static_cast<size_t>(Nx)};
+    const adios2::Dims start{static_cast<size_t>(0)};
+    const adios2::Dims count{Nx};
+    auto bpFloats = bpIO.DefineVariable<float>("bpFloats", shape, start, count);
+    auto bpStep = bpIO.DefineVariable<unsigned int>("bpStep");
+
+    adios2::Engine bpWriter = bpIO.Open(fname, adios2::Mode::Write);
+
+    // Simulation steps
+    for (unsigned int step = 0; step < nSteps; ++step)
+    {
+        // Make a 1D selection to describe the local dimensions of the
+        // variable we write and its offsets in the global spaces
+        adios2::Box<adios2::Dims> sel({0}, {Nx});
+        bpFloats.SetSelection(sel);
+
+        // Start IO step every write step
+        bpWriter.BeginStep();
+        bpWriter.Put(bpFloats, gpuSimData.data());
+        bpWriter.Put(bpStep, step);
+        bpWriter.EndStep();
+
+        // Update values in the simulation bpFloats using the default
+        // execution space
+        Kokkos::parallel_for(
+            "updateBuffer", Kokkos::RangePolicy<Kokkos::DefaultExecutionSpace>(0, Nx),
+            KOKKOS_LAMBDA(int i) { gpuSimData(i) += 10; });
+        Kokkos::fence();
+    }
+
+    bpWriter.Close();
+    Kokkos::DefaultExecutionSpace exe_space;
+    std::cout << "Done writing on memory space: " << exe_space.name() << std::endl;
+}
+
+void reader(adios2::ADIOS &adios, const std::string &engine, const std::string &fname,
+            const size_t Nx, unsigned int /*nSteps*/)
+{
+    // Create ADIOS structures
+    adios2::IO bpIO = adios.DeclareIO("ReadIO");
+    bpIO.SetEngine(engine);
+
+    Kokkos::DefaultExecutionSpace exe_space;
+    std::cout << "Read on memory space: " << exe_space.name() << std::endl;
+
+    adios2::Engine bpReader = bpIO.Open(fname, adios2::Mode::Read);
+
+    using mem_space = Kokkos::DefaultExecutionSpace::memory_space;
+    Kokkos::View<float *, mem_space> gpuSimData("simBuffer", Nx);
+    unsigned int inStep = 0;
+    for (unsigned int step = 0; bpReader.BeginStep() == adios2::StepStatus::OK; ++step)
+    {
+        auto bpFloats = bpIO.InquireVariable<float>("bpFloats");
+        if (bpFloats)
+        {
+            const adios2::Dims start{0};
+            const adios2::Dims count{Nx};
+            const adios2::Box<adios2::Dims> sel(start, count);
+            bpFloats.SetSelection(sel);
+
+            bpReader.Get(bpFloats, gpuSimData.data());
+        }
+        auto bpStep = bpIO.InquireVariable<unsigned int>("bpStep");
+        if (bpStep)
+        {
+            bpReader.Get(bpStep, &inStep);
+        }
+        bpReader.EndStep();
+        if (inStep != step)
+        {
+            std::cout << "ERROR: step mismatch\n";
+            return;
+        }
+    }
+
+    bpReader.Close();
+}
+
+int main(int argc, char **argv)
+{
+    Kokkos::initialize(argc, argv);
+
+    const std::string engine = argv[1] ? argv[1] : "BPFile";
+    std::cout << "Using engine " << engine << std::endl;
+
+    const std::string filename = engine + "StepsWriteReadCuda.bp";
+    const unsigned int nSteps = 10;
+    const unsigned int Nx = 6000;
+    try
+    {
+        /** ADIOS class factory of IO class objects */
+        adios2::ADIOS adios;
+
+        writer(adios, engine, filename, Nx, nSteps);
+        reader(adios, engine, filename, Nx, nSteps);
+    }
+    catch (std::invalid_argument &e)
+    {
+        std::cout << "Invalid argument exception, STOPPING PROGRAM\n";
+        std::cout << e.what() << "\n";
+    }
+    catch (std::ios_base::failure &e)
+    {
+        std::cout << "IO System base failure exception, STOPPING PROGRAM\n";
+        std::cout << e.what() << "\n";
+    }
+    catch (std::exception &e)
+    {
+        std::cout << "Exception, STOPPING PROGRAM\n";
+        std::cout << e.what() << "\n";
+    }
+    Kokkos::finalize();
+
+    return 0;
+}
diff --git a/examples/hello/bpTimeWriter/bpTimeWriter.cpp b/examples/hello/bpTimeWriter/bpTimeWriter.cpp
deleted file mode 100644
index 77e37389cc..0000000000
--- a/examples/hello/bpTimeWriter/bpTimeWriter.cpp
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Distributed under the OSI-approved Apache License, Version 2.0.  See
- * accompanying file Copyright.txt for details.
- *
- * bpTimeWriter.cpp  example for writing a variable using the Advance
- * function for time aggregation. Time step is saved as an additional (global)
- * single value variable, just for tracking purposes.
- *
- *  Created on: Feb 16, 2017
- *      Author: William F Godoy godoywf@ornl.gov
- */
-
-#include <algorithm> //std::for_each
-#include <ios>       //std::ios_base::failure
-#include <iostream>  //std::cout
-#include <mpi.h>
-#include <stdexcept> //std::invalid_argument std::exception
-#include <vector>
-
-#include <adios2.h>
-
-int main(int argc, char *argv[])
-{
-    int provided;
-
-    // MPI_THREAD_MULTIPLE is only required if you enable the SST MPI_DP
-    MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    // Application variable
-    std::vector<float> myFloats = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-    const std::size_t Nx = myFloats.size();
-
-    try
-    {
-        /** ADIOS class factory of IO class objects */
-        adios2::ADIOS adios(MPI_COMM_WORLD);
-
-        /// WRITE
-        {
-            /*** IO class object: settings and factory of Settings: Variables,
-             * Parameters, Transports, and Execution: Engines */
-            adios2::IO bpIO = adios.DeclareIO("BPFile_N2N");
-            bpIO.SetParameters({{"Threads", "2"}});
-
-            /** global array: name, { shape (total dimensions) }, { start
-             * (local) },
-             * { count (local) }, all are constant dimensions */
-            const unsigned int variablesSize = 10;
-            std::vector<adios2::Variable<float>> bpFloats(variablesSize);
-
-            adios2::Variable<std::string> bpString = bpIO.DefineVariable<std::string>("bpString");
-
-            for (unsigned int v = 0; v < variablesSize; ++v)
-            {
-                std::string namev("bpFloats");
-                if (v < 10)
-                {
-                    namev += "00";
-                }
-                else if (v < 100)
-                {
-                    namev += "0";
-                }
-                namev += std::to_string(v);
-
-                bpFloats[v] = bpIO.DefineVariable<float>(namev, {size * Nx}, {rank * Nx}, {Nx},
-                                                         adios2::ConstantDims);
-            }
-
-            /** global single value variable: name */
-            adios2::Variable<unsigned int> bpTimeStep =
-                bpIO.DefineVariable<unsigned int>("timeStep");
-
-            /** Engine derived class, spawned to start IO operations */
-            adios2::Engine bpWriter = bpIO.Open("myVector.bp", adios2::Mode::Write);
-
-            for (unsigned int timeStep = 0; timeStep < 3; ++timeStep)
-            {
-                bpWriter.BeginStep();
-                if (rank == 0) // global single value, only saved by rank 0
-                {
-                    bpWriter.Put<unsigned int>(bpTimeStep, timeStep);
-                }
-
-                // template type is optional, but recommended
-                for (unsigned int v = 0; v < variablesSize; ++v)
-                {
-                    myFloats[0] = static_cast<float>(v + timeStep);
-                    // Note: Put is deferred, so all variables will see v == 9
-                    // and myFloats[0] == 9, 10, or 11
-                    bpWriter.Put<float>(bpFloats[v], myFloats.data());
-                }
-                const std::string myString("Hello from rank: " + std::to_string(rank) +
-                                           " and timestep: " + std::to_string(timeStep));
-
-                if (rank == 0)
-                {
-                    bpWriter.Put(bpString, myString);
-                }
-
-                bpWriter.EndStep();
-            }
-
-            bpWriter.Close();
-        }
-        // MPI_Barrier(MPI_COMM_WORLD);
-
-        if (false)
-        { /////////////////////READ
-            //            if (rank == 0)
-            //            {
-            adios2::IO ioReader = adios.DeclareIO("bpReader");
-
-            adios2::Engine bpReader = ioReader.Open("myVector.bp", adios2::Mode::Read);
-
-            adios2::Variable<float> bpFloats000 = ioReader.InquireVariable<float>("bpFloats000");
-
-            adios2::Variable<std::string> bpString =
-                ioReader.InquireVariable<std::string>("bpString");
-
-            if (bpFloats000)
-            {
-                bpFloats000.SetSelection({{rank * Nx}, {Nx}});
-                bpFloats000.SetStepSelection({2, 1});
-
-                std::vector<float> data(bpFloats000.SelectionSize());
-                bpReader.Get(bpFloats000, data.data(), adios2::Mode::Sync);
-
-                std::cout << "Data timestep " << bpFloats000.StepsStart() << " from rank " << rank
-                          << ": ";
-                for (const auto datum : data)
-                {
-                    std::cout << datum << " ";
-                }
-                std::cout << "\n";
-            }
-            else
-            {
-                std::cout << "Variable bpFloats000 not found\n";
-            }
-
-            if (bpString)
-            {
-                bpString.SetStepSelection({3, 1});
-
-                std::string myString;
-                bpReader.Get(bpString, myString, adios2::Mode::Sync);
-                std::cout << myString << "\n";
-            }
-
-            bpReader.Close();
-        }
-    }
-    catch (std::invalid_argument &e)
-    {
-        std::cout << "Invalid argument exception, STOPPING PROGRAM from rank " << rank << "\n";
-        std::cout << e.what() << "\n";
-    }
-    catch (std::ios_base::failure &e)
-    {
-        std::cout << "IO System base failure exception, STOPPING PROGRAM "
-                     "from rank "
-                  << rank << "\n";
-        std::cout << e.what() << "\n";
-    }
-    catch (std::exception &e)
-    {
-        std::cout << "Exception, STOPPING PROGRAM from rank " << rank << "\n";
-        std::cout << e.what() << "\n";
-    }
-
-    MPI_Finalize();
-
-    return 0;
-}
diff --git a/examples/hello/bpTimeWriter/bpTimeWriter.py b/examples/hello/bpTimeWriter/bpTimeWriter.py
deleted file mode 100644
index 87afc7f1c1..0000000000
--- a/examples/hello/bpTimeWriter/bpTimeWriter.py
+++ /dev/null
@@ -1,51 +0,0 @@
-#
-# Distributed under the OSI-approved Apache License, Version 2.0.  See
-# accompanying file Copyright.txt for details.
-#
-# bpTimeWriter.py
-#  Created on: Feb 2, 2017
-#      Author: William F Godoy godoywf@ornl.gov
-
-from mpi4py import MPI
-import adios2
-import numpy as np
-
-
-# MPI
-comm = MPI.COMM_WORLD
-rank = comm.Get_rank()
-size = comm.Get_size()
-
-# User data
-myArray = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
-nx = myArray.size
-time = np.array([0.0])
-
-# ADIOS
-adios = adios2.ADIOS(comm)
-
-# IO
-bpIO = adios.DeclareIO("BPN2N")
-
-# Variables
-bpArray = bpIO.DefineVariable(
-    "bpArray", myArray, [size * nx], [rank * nx], [nx], adios2.ConstantDims
-)
-bpTimeStep = bpIO.DefineVariable("bpTimeStep", time)
-
-# Engine
-bpFileWriter = bpIO.Open("myArray.bp", adios2.Mode.Write)
-# Doesn't work: bpFileWriter = bpIO.Open("myArray.bp", adios2.OpenModeWrite)
-# Doesn't work: bpFileWriter = bpIO.Open("myArray.bp", adiosOpenModeWrite,
-#                                                      MPI.COMM_WORLD)
-
-
-for t in range(0, 10):
-    bpFileWriter.BeginStep()
-    if rank == 0:
-        time[0] = t
-        bpFileWriter.Put(bpTimeStep, time)
-    bpFileWriter.Put(bpArray, myArray)
-    bpFileWriter.EndStep()
-
-bpFileWriter.Close()
diff --git a/examples/hello/bpTimeWriter/bpTimeWriter_nompi.cpp b/examples/hello/bpTimeWriter/bpTimeWriter_nompi.cpp
deleted file mode 100644
index ddfd113fa4..0000000000
--- a/examples/hello/bpTimeWriter/bpTimeWriter_nompi.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Distributed under the OSI-approved Apache License, Version 2.0.  See
- * accompanying file Copyright.txt for details.
- *
- * bpTimeWriter_nompi.cpp  no mpi version of bpTimeWriter.cpp
- *
- *  Created on: Feb 16, 2017
- *      Author: William F Godoy godoywf@ornl.gov
- */
-
-#include <ios>       //std::ios_base::failure
-#include <iostream>  //std::cout
-#include <stdexcept> //std::invalid_argument std::exception
-#include <vector>
-
-#include <adios2.h>
-
-int main(int argc, char *argv[])
-{
-    // Application variable
-    std::vector<float> myFloats = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-    const std::size_t Nx = myFloats.size();
-
-    try
-    {
-        /** ADIOS class factory of IO class objects */
-        adios2::ADIOS adios;
-
-        /*** IO class object: settings and factory of Settings: Variables,
-         * Parameters, Transports, and Execution: Engines */
-        adios2::IO bpIO = adios.DeclareIO("BPFile_N2N");
-
-        /** name, { shape (total dimensions) }, { start (local) }, { count
-         * {local} } */
-        adios2::Variable<float> bpFloats =
-            bpIO.DefineVariable<float>("bpFloats", {}, {}, {Nx}, adios2::ConstantDims);
-
-        adios2::Variable<unsigned int> bpTimeStep = bpIO.DefineVariable<unsigned int>("timeStep");
-
-        /** Engine derived class, spawned to start IO operations */
-        adios2::Engine bpWriter = bpIO.Open("myVector.bp", adios2::Mode::Write);
-
-        for (unsigned int timeStep = 0; timeStep < 10; ++timeStep)
-        {
-            bpWriter.BeginStep();
-
-            // template type is optional but recommended
-            bpWriter.Put<unsigned int>(bpTimeStep, timeStep);
-
-            // modifying data
-            myFloats[0] = static_cast<float>(timeStep);
-            bpWriter.Put<float>(bpFloats, myFloats.data(), adios2::Mode::Sync);
-
-            bpWriter.EndStep();
-        }
-
-        bpWriter.Close();
-    }
-    catch (std::invalid_argument &e)
-    {
-        std::cout << "Invalid argument exception, STOPPING PROGRAM\n";
-        std::cout << e.what() << "\n";
-    }
-    catch (std::ios_base::failure &e)
-    {
-        std::cout << "IO System base failure exception, STOPPING PROGRAM\n";
-        std::cout << e.what() << "\n";
-    }
-    catch (std::exception &e)
-    {
-        std::cout << "Exception, STOPPING PROGRAM from rank\n";
-        std::cout << e.what() << "\n";
-    }
-
-    return 0;
-}
diff --git a/examples/hello/bpWriteReadCuda/bpWriteReadCuda.cu b/examples/hello/bpWriteReadCuda/bpWriteReadCuda.cu
deleted file mode 100644
index 5db837c79d..0000000000
--- a/examples/hello/bpWriteReadCuda/bpWriteReadCuda.cu
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Distributed under the OSI-approved Apache License, Version 2.0.  See
- * accompanying file Copyright.txt for details.
- *
- * Simple example of writing and reading data
- * through ADIOS2 BP engine with multiple simulations steps
- * for every IO step.
- */
-
-#include <ios>
-#include <iostream>
-#include <string>
-#include <vector>
-
-#include <adios2.h>
-
-#include <cuda_runtime.h>
-
-__global__ void update_array(float *vect, int val) { vect[blockIdx.x] += val; }
-
-std::string engine("BPFile");
-
-int BPWrite(const std::string fname, const size_t N, int nSteps, const std::string engine)
-{
-    // Initialize the simulation data
-    float *gpuSimData;
-    cudaMalloc(&gpuSimData, N * sizeof(float));
-    cudaMemset(gpuSimData, 0, N);
-
-    // Set up the ADIOS structures
-    adios2::ADIOS adios;
-    adios2::IO io = adios.DeclareIO("WriteIO");
-    io.SetEngine(engine);
-
-    // Declare an array for the ADIOS data of size (NumOfProcesses * N)
-    const adios2::Dims shape{static_cast<size_t>(N)};
-    const adios2::Dims start{static_cast<size_t>(0)};
-    const adios2::Dims count{N};
-    auto data = io.DefineVariable<float>("data", shape, start, count);
-
-    adios2::Engine bpWriter = io.Open(fname, adios2::Mode::Write);
-
-    // Simulation steps
-    for (size_t step = 0; step < nSteps; ++step)
-    {
-        // Make a 1D selection to describe the local dimensions of the
-        // variable we write and its offsets in the global spaces
-        adios2::Box<adios2::Dims> sel({0}, {N});
-        data.SetSelection(sel);
-
-        // Start IO step every write step
-        bpWriter.BeginStep();
-        data.SetMemorySpace(adios2::MemorySpace::GPU);
-        bpWriter.Put(data, gpuSimData);
-        bpWriter.EndStep();
-
-        // Update values in the simulation data
-        update_array<<<N, 1>>>(gpuSimData, 10);
-    }
-
-    bpWriter.Close();
-    return 0;
-}
-
-int BPRead(const std::string fname, const size_t N, int nSteps, const std::string engine)
-{
-    // Create ADIOS structures
-    adios2::ADIOS adios;
-    adios2::IO io = adios.DeclareIO("ReadIO");
-    io.SetEngine(engine);
-
-    adios2::Engine bpReader = io.Open(fname, adios2::Mode::Read);
-
-    unsigned int step = 0;
-    float *gpuSimData;
-    cudaMalloc(&gpuSimData, N * sizeof(float));
-    cudaMemset(gpuSimData, 0, N);
-    for (; bpReader.BeginStep() == adios2::StepStatus::OK; ++step)
-    {
-        auto data = io.InquireVariable<float>("data");
-        std::vector<float> simData(N);
-        const adios2::Dims start{0};
-        const adios2::Dims count{N};
-        const adios2::Box<adios2::Dims> sel(start, count);
-        data.SetSelection(sel);
-
-        data.SetMemorySpace(adios2::MemorySpace::GPU);
-        bpReader.Get(data, gpuSimData); //, adios2::Mode::Deferred);
-        bpReader.EndStep();
-        cudaMemcpy(simData.data(), gpuSimData, N * sizeof(float), cudaMemcpyDeviceToHost);
-        std::cout << "Simualation step " << step << " : ";
-        std::cout << simData.size() << " elements: " << simData[1] << std::endl;
-    }
-    bpReader.Close();
-    return 0;
-}
-
-int main(int argc, char **argv)
-{
-    if (argv[1])
-        engine = argv[1];
-    std::cout << "Using engine " << engine << std::endl;
-
-    const std::string fname("Cuda" + engine + "wr.bp");
-    const int device_id = 1;
-    cudaSetDevice(device_id);
-    const size_t N = 6000;
-    int nSteps = 10, ret = 0;
-
-    ret += BPWrite(fname, N, nSteps, engine);
-    ret += BPRead(fname, N, nSteps, engine);
-    return ret;
-}
diff --git a/examples/hello/bpWriteReadHip/CMakeLists.txt b/examples/hello/bpWriteReadHip/CMakeLists.txt
deleted file mode 100644
index bb1b6d3acf..0000000000
--- a/examples/hello/bpWriteReadHip/CMakeLists.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-#------------------------------------------------------------------------------#
-# Distributed under the OSI-approved Apache License, Version 2.0.  See
-# accompanying file Copyright.txt for details.
-#------------------------------------------------------------------------------#
-
-cmake_minimum_required(VERSION 3.12)
-project(ADIOS2HelloBPWriteReadHipExample)
-
-if(NOT TARGET adios2_core)
-  set(_components CXX)
-
-  find_package(hip QUIET)
-  if(hip_FOUND)
-    enable_language(HIP)
-  endif()
-
-  find_package(ADIOS2 REQUIRED COMPONENTS ${_components})
-endif()
-
-if(ADIOS2_HAVE_Kokkos_HIP OR hip_FOUND)
-  add_executable(adios2_hello_bpWriteReadHip bpWriteReadHip.cpp)
-  target_link_libraries(adios2_hello_bpWriteReadHip adios2::cxx11 hip::device)
-  set_source_files_properties(bpWriteReadHip.cpp PROPERTIES LANGUAGE HIP)
-  install(TARGETS adios2_hello_bpWriteReadHip RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
-endif()
diff --git a/examples/hello/bpWriteReadHip/bpWriteReadHip.cpp b/examples/hello/bpWriteReadHip/bpWriteReadHip.cpp
deleted file mode 100644
index 755bb3f735..0000000000
--- a/examples/hello/bpWriteReadHip/bpWriteReadHip.cpp
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Distributed under the OSI-approved Apache License, Version 2.0.  See
- * accompanying file Copyright.txt for details.
- */
-#include <algorithm>
-#include <ios>
-#include <iostream>
-#include <vector>
-
-#include <adios2.h>
-
-#include <hip/hip_runtime.h>
-
-__global__ void hip_initialize(float *vec) { vec[hipBlockIdx_x] = hipBlockIdx_x; }
-
-__global__ void hip_increment(float *vec, float val) { vec[hipBlockIdx_x] += val; }
-
-int BPWrite(const std::string fname, const size_t N, int nSteps, const std::string engine)
-{
-    hipError_t hipExit;
-    float *gpuSimData;
-    hipExit = hipMalloc((void **)&gpuSimData, N * sizeof(float));
-    if (hipExit != hipSuccess)
-    {
-        std::cout << "[BPWrite] error: " << hipGetErrorString(hipExit) << std::endl;
-        return 1;
-    }
-    hipLaunchKernelGGL(hip_initialize, dim3(N), dim3(1), 0, 0, gpuSimData);
-    hipExit = hipDeviceSynchronize();
-    if (hipExit != hipSuccess)
-    {
-        std::cout << "[BPWrite] error: " << hipGetErrorString(hipExit) << std::endl;
-        return 1;
-    }
-
-    adios2::ADIOS adios;
-    adios2::IO io = adios.DeclareIO("WriteIO");
-    io.SetEngine(engine);
-
-    const adios2::Dims shape{static_cast<size_t>(N)};
-    const adios2::Dims start{static_cast<size_t>(0)};
-    const adios2::Dims count{N};
-    auto data = io.DefineVariable<float>("data", shape, start, count);
-
-    adios2::Engine bpWriter = io.Open(fname, adios2::Mode::Write);
-
-    for (size_t step = 0; step < nSteps; ++step)
-    {
-        adios2::Box<adios2::Dims> sel({0}, {N});
-        data.SetSelection(sel);
-
-        bpWriter.BeginStep();
-        bpWriter.Put(data, gpuSimData);
-        bpWriter.EndStep();
-
-        hipLaunchKernelGGL(hip_increment, dim3(N), dim3(1), 0, 0, gpuSimData, 10);
-        hipExit = hipDeviceSynchronize();
-        if (hipExit != hipSuccess)
-        {
-            std::cout << "[BPWrite] error: " << hipGetErrorString(hipExit) << std::endl;
-            return 1;
-        }
-    }
-
-    bpWriter.Close();
-    return 0;
-}
-
-int BPRead(const std::string fname, const size_t N, int nSteps, const std::string engine)
-{
-    hipError_t hipExit;
-    adios2::ADIOS adios;
-    adios2::IO io = adios.DeclareIO("ReadIO");
-    io.SetEngine(engine);
-
-    adios2::Engine bpReader = io.Open(fname, adios2::Mode::Read);
-
-    unsigned int step = 0;
-    float *gpuSimData;
-    hipExit = hipMalloc((void **)&gpuSimData, N * sizeof(float));
-    if (hipExit != hipSuccess)
-    {
-        std::cout << "[BPWrite] error: " << hipGetErrorString(hipExit) << std::endl;
-        return 1;
-    }
-    for (; bpReader.BeginStep() == adios2::StepStatus::OK; ++step)
-    {
-        auto data = io.InquireVariable<float>("data");
-        const adios2::Dims start{0};
-        const adios2::Dims count{N};
-        const adios2::Box<adios2::Dims> sel(start, count);
-        data.SetSelection(sel);
-
-        bpReader.Get(data, gpuSimData);
-        bpReader.EndStep();
-
-        std::vector<float> cpuData(N);
-        hipExit = hipMemcpy(cpuData.data(), gpuSimData, N * sizeof(float), hipMemcpyDeviceToHost);
-        if (hipExit != hipSuccess)
-        {
-            std::cout << "[BPWrite] error: " << hipGetErrorString(hipExit) << std::endl;
-            return 1;
-        }
-        std::cout << "Simualation step " << step << " : ";
-        std::cout << cpuData.size() << " elements: " << cpuData[0];
-        std::cout << " " << cpuData[1] << " ... ";
-        std::cout << cpuData[cpuData.size() - 1] << std::endl;
-    }
-    bpReader.Close();
-    return 0;
-}
-
-int main(int argc, char **argv)
-{
-    hipError_t hipExit;
-    const int device_id = 0;
-    hipExit = hipSetDevice(device_id);
-    if (hipExit != hipSuccess)
-    {
-        std::cout << "[BPWrite] error: " << hipGetErrorString(hipExit) << std::endl;
-        return 1;
-    }
-    const std::vector<std::string> list_of_engines = {"BPFile"};
-    const size_t N = 6000;
-    int nSteps = 2, ret = 0;
-
-    for (auto engine : list_of_engines)
-    {
-        std::cout << "Using engine " << engine << std::endl;
-        const std::string fname(engine + "_HIP_WR.bp");
-        ret += BPWrite(fname, N, nSteps, engine);
-        ret += BPRead(fname, N, nSteps, engine);
-    }
-    return ret;
-}
diff --git a/examples/hello/bpWriteReadKokkos/bpWriteReadKokkos.cpp b/examples/hello/bpWriteReadKokkos/bpWriteReadKokkos.cpp
deleted file mode 100644
index fd972ed9b0..0000000000
--- a/examples/hello/bpWriteReadKokkos/bpWriteReadKokkos.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Distributed under the OSI-approved Apache License, Version 2.0.  See
- * accompanying file Copyright.txt for details.
- */
-#include <ios>
-#include <iostream>
-#include <vector>
-
-#include <adios2.h>
-
-#include <Kokkos_Core.hpp>
-
-int BPWrite(const std::string fname, const size_t N, int nSteps, const std::string engine)
-{
-    // Initialize the simulation data with the default memory space
-    using mem_space = Kokkos::DefaultExecutionSpace::memory_space;
-    Kokkos::View<float *, mem_space> gpuSimData("simBuffer", N);
-    Kokkos::parallel_for(
-        "initBuffer", Kokkos::RangePolicy<Kokkos::DefaultExecutionSpace>(0, N),
-        KOKKOS_LAMBDA(int i) { gpuSimData(i) = static_cast<float>(i); });
-    Kokkos::fence();
-
-    // Set up the ADIOS structures
-    adios2::ADIOS adios;
-    adios2::IO io = adios.DeclareIO("WriteIO");
-    io.SetEngine(engine);
-
-    const adios2::Dims shape{static_cast<size_t>(N)};
-    const adios2::Dims start{static_cast<size_t>(0)};
-    const adios2::Dims count{N};
-    auto data = io.DefineVariable<float>("data", shape, start, count);
-
-    adios2::Engine bpWriter = io.Open(fname, adios2::Mode::Write);
-
-    // Simulation steps
-    for (int step = 0; step < nSteps; ++step)
-    {
-        // Make a 1D selection to describe the local dimensions of the
-        // variable we write and its offsets in the global spaces
-        adios2::Box<adios2::Dims> sel({0}, {N});
-        data.SetSelection(sel);
-
-        // Start IO step every write step
-        bpWriter.BeginStep();
-        bpWriter.Put(data, gpuSimData.data());
-        bpWriter.EndStep();
-
-        // Update values in the simulation data using the default
-        // execution space
-        Kokkos::parallel_for(
-            "updateBuffer", Kokkos::RangePolicy<Kokkos::DefaultExecutionSpace>(0, N),
-            KOKKOS_LAMBDA(int i) { gpuSimData(i) += 10; });
-        Kokkos::fence();
-    }
-
-    bpWriter.Close();
-    Kokkos::DefaultExecutionSpace exe_space;
-    std::cout << "Done writing on memory space: " << exe_space.name() << std::endl;
-    return 0;
-}
-
-int BPRead(const std::string fname, const size_t N, int nSteps, const std::string engine)
-{
-    // Create ADIOS structures
-    adios2::ADIOS adios;
-    adios2::IO io = adios.DeclareIO("ReadIO");
-    io.SetEngine(engine);
-
-    Kokkos::DefaultExecutionSpace exe_space;
-    std::cout << "Read on memory space: " << exe_space.name() << std::endl;
-
-    adios2::Engine bpReader = io.Open(fname, adios2::Mode::Read);
-
-    unsigned int step = 0;
-    using mem_space = Kokkos::DefaultExecutionSpace::memory_space;
-    Kokkos::View<float *, mem_space> gpuSimData("simBuffer", N);
-    for (; bpReader.BeginStep() == adios2::StepStatus::OK; ++step)
-    {
-        auto data = io.InquireVariable<float>("data");
-        const adios2::Dims start{0};
-        const adios2::Dims count{N};
-        const adios2::Box<adios2::Dims> sel(start, count);
-        data.SetSelection(sel);
-
-        bpReader.Get(data, gpuSimData.data());
-        bpReader.EndStep();
-
-        auto cpuData = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, gpuSimData);
-        std::cout << "Simualation step " << step << " : ";
-        std::cout << cpuData.size() << " elements: " << cpuData[0];
-        std::cout << " " << cpuData[1] << " ... ";
-        std::cout << cpuData[cpuData.size() - 1] << std::endl;
-    }
-
-    bpReader.Close();
-    return 0;
-}
-
-int main(int argc, char **argv)
-{
-    const std::vector<std::string> list_of_engines = {"BPFile"};
-    const size_t N = 6000;
-    int nSteps = 2, ret = 0;
-
-    Kokkos::initialize(argc, argv);
-    {
-        for (auto engine : list_of_engines)
-        {
-            std::cout << "Using engine " << engine << std::endl;
-            const std::string fname(engine + "_Kokkos_WR.bp");
-            ret += BPWrite(fname, N, nSteps, engine);
-            ret += BPRead(fname, N, nSteps, engine);
-        }
-    }
-    Kokkos::finalize();
-    return ret;
-}
diff --git a/examples/hello/bpWriter/CMakeLists.txt b/examples/hello/bpWriter/CMakeLists.txt
index 0e48312f8c..9eb0fbf91a 100644
--- a/examples/hello/bpWriter/CMakeLists.txt
+++ b/examples/hello/bpWriter/CMakeLists.txt
@@ -27,8 +27,6 @@ if(NOT TARGET adios2_core)
   endif()
   list(APPEND _components CXX)
 
-  find_package(SZ QUIET)
-
   find_package(ADIOS2 REQUIRED COMPONENTS ${_components})
 endif()
 
@@ -48,12 +46,6 @@ add_executable(adios2_hello_bpSubStreams bpSubStreams.cpp)
 target_link_libraries(adios2_hello_bpSubStreams adios2::cxx11)
 install(TARGETS adios2_hello_bpSubStreams RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
 
-if(ADIOS2_HAVE_SZ)
-  add_executable(adios2_hello_bpSZ bpSZ.cpp)
-  target_link_libraries(adios2_hello_bpSZ adios2::cxx11)
-  install(TARGETS adios2_hello_bpSZ RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
-endif()
-
 if(ADIOS2_HAVE_Fortran)
   add_executable(adios2_hello_bpWriter_f bpWriter.F90)
   target_link_libraries(adios2_hello_bpWriter_f adios2::fortran)
@@ -77,12 +69,6 @@ if(ADIOS2_HAVE_MPI)
   target_link_libraries(adios2_hello_bpSubStreams_mpi adios2::cxx11_mpi MPI::MPI_C)
   install(TARGETS adios2_hello_bpSubStreams_mpi RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
 
-  if(ADIOS2_HAVE_SZ)
-    add_executable(adios2_hello_bpSZ_mpi bpSZ.cpp)
-    target_link_libraries(adios2_hello_bpSZ_mpi adios2::cxx11_mpi MPI::MPI_C)
-    install(TARGETS adios2_hello_bpSZ_mpi RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
-  endif()
-
   if(ADIOS2_HAVE_Fortran)
     add_executable(adios2_hello_bpWriter_f_mpi bpWriter.F90)
     target_link_libraries(adios2_hello_bpWriter_f_mpi adios2::fortran_mpi MPI::MPI_Fortran)
diff --git a/examples/hello/bpWriter/bpWriter.cpp b/examples/hello/bpWriter/bpWriter.cpp
index 6afd440791..112feb21f5 100644
--- a/examples/hello/bpWriter/bpWriter.cpp
+++ b/examples/hello/bpWriter/bpWriter.cpp
@@ -69,22 +69,22 @@ int main(int argc, char *argv[])
 
         std::string filename = "myVector_cpp.bp";
         /** Engine derived class, spawned to start IO operations */
-        adios2::Engine bpFileWriter = bpIO.Open(filename, adios2::Mode::Write);
+        adios2::Engine bpWriter = bpIO.Open(filename, adios2::Mode::Write);
 
-        bpFileWriter.BeginStep();
+        bpWriter.BeginStep();
         /** Put variables for buffering, template type is optional */
-        bpFileWriter.Put<float>(bpFloats, myFloats.data());
-        bpFileWriter.Put(bpInts, myInts.data());
-        // bpFileWriter.Put(bpString, myString);
-        bpFileWriter.EndStep();
+        bpWriter.Put(bpFloats, myFloats.data());
+        bpWriter.Put(bpInts, myInts.data());
+        // bpWriter.Put(bpString, myString);
+        bpWriter.EndStep();
 
         /** Create bp file, engine becomes unreachable after this*/
-        bpFileWriter.Close();
+        bpWriter.Close();
         if (rank == 0)
         {
             std::cout << "Wrote file " << filename
                       << " to disk. It can now be read by running "
-                         "./bin/hello_bpReader.\n";
+                         "./bin/adios2_hello_bpReader.\n";
         }
     }
     catch (std::invalid_argument &e)
diff --git a/examples/hello/bpWriter/bpWriter_nompi.py b/examples/hello/bpWriter/bpWriter_nompi.py
deleted file mode 100644
index 1de8ce4dd6..0000000000
--- a/examples/hello/bpWriter/bpWriter_nompi.py
+++ /dev/null
@@ -1,30 +0,0 @@
-#
-# Distributed under the OSI-approved Apache License, Version 2.0.  See
-# accompanying file Copyright.txt for details.
-#
-# bpWriter_nonmpi.py : only works with non MPI version
-#  Created on: Feb 2, 2017
-#      Author: William F Godoy godoywf@ornl.gov
-
-import numpy
-import adios2
-
-# User data
-myArray = numpy.array([0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
-Nx = myArray.size
-
-#  adios
-adios = adios2.ADIOS()
-
-# ADIOS IO
-bpIO = adios.DeclareIO("BPFile_N2N")
-
-# ADIOS Variable name, shape, start, offset, constant dims
-ioArray = bpIO.DefineVariable("bpArray", myArray, [], [], [Nx], adios2.ConstantDims)
-
-# ADIOS Engine
-bpFileWriter = bpIO.Open("npArray.bp", adios2.Mode.Write)
-bpFileWriter.BeginStep()
-bpFileWriter.Put(ioArray, myArray, adios2.Mode.Sync)
-bpFileWriter.EndStep()
-bpFileWriter.Close()
diff --git a/examples/hello/bpWriter/bpWriter_tutorialSkeleton.cpp b/examples/hello/bpWriter/bpWriter_tutorialSkeleton.cpp
new file mode 100644
index 0000000000..d24a7f9bfe
--- /dev/null
+++ b/examples/hello/bpWriter/bpWriter_tutorialSkeleton.cpp
@@ -0,0 +1,61 @@
+/*
+ * Distributed under the OSI-approved Apache License, Version 2.0.  See
+ * accompanying file Copyright.txt for details.
+ *
+ * bpWriter.cpp: Simple self-descriptive example of how to write a variable
+ * to a BP File that lives in several MPI processes.
+ *
+ *  Created on: Feb 16, 2017
+ *      Author: William F Godoy godoywf@ornl.gov
+ */
+
+#include <ios>       //std::ios_base::failure
+#include <iostream>  //std::cout
+#include <stdexcept> //std::invalid_argument std::exception
+#include <vector>
+
+#include <adios2.h>
+#include <mpi.h>
+
+int main(int argc, char *argv[])
+{
+    int rank, size;
+    int provided;
+    // Add code to init MPI
+
+    // Add code to create arrays
+    try
+    {
+        // Add code to create ADIOS object
+
+        // Add code to create IO object
+
+        // Add code to create variables
+
+        // Add code to open file
+
+        // Add code to write variables
+
+        // Add code to close file
+    }
+    catch (std::invalid_argument &e)
+    {
+        std::cerr << "Invalid argument exception: " << e.what() << "\n";
+        MPI_Abort(MPI_COMM_WORLD, 1);
+    }
+    catch (std::ios_base::failure &e)
+    {
+        std::cerr << "IO System base failure exception: " << e.what() << "\n";
+        std::cerr << "STOPPING PROGRAM from rank " << rank << "\n";
+    }
+    catch (std::exception &e)
+    {
+        std::cerr << "Exception: " << e.what() << "\n";
+        std::cerr << "STOPPING PROGRAM from rank " << rank << "\n";
+        MPI_Abort(MPI_COMM_WORLD, 1);
+    }
+
+    // Add code to finalize MPI
+
+    return 0;
+}
diff --git a/examples/hello/datamanKokkos/CMakeLists.txt b/examples/hello/datamanKokkos/CMakeLists.txt
new file mode 100644
index 0000000000..9bbe1a2a3e
--- /dev/null
+++ b/examples/hello/datamanKokkos/CMakeLists.txt
@@ -0,0 +1,42 @@
+#------------------------------------------------------------------------------#
+# Distributed under the OSI-approved Apache License, Version 2.0.  See
+# accompanying file Copyright.txt for details.
+#------------------------------------------------------------------------------#
+
+cmake_minimum_required(VERSION 3.12)
+project(ADIOS2HelloDataManKokkosExample)
+
+if(NOT TARGET adios2_core)
+  set(_components CXX)
+
+  find_package(MPI COMPONENTS C)
+  if(MPI_FOUND)
+    # Workaround for various MPI implementations forcing the link of C++ bindings
+    add_definitions(-DOMPI_SKIP_MPICXX -DMPICH_SKIP_MPICXX)
+
+    list(APPEND _components MPI)
+  endif()
+
+  find_package(ZeroMQ 4.1 QUIET)
+
+  find_package(Kokkos 3.7 QUIET)
+  if(Kokkos_FOUND AND DEFINED Kokkos_CXX_COMPILER)
+    set(CMAKE_CXX_COMPILER "${Kokkos_CXX_COMPILER}")
+  endif()
+
+  find_package(ADIOS2 REQUIRED COMPONENTS ${_components})
+else()
+  if(DEFINED Kokkos_CXX_COMPILER)
+    set(CMAKE_CXX_COMPILER "${Kokkos_CXX_COMPILER}")
+  endif()
+endif()
+
+if(ADIOS2_HAVE_MPI AND ADIOS2_HAVE_DataMan)
+  add_executable(adios2_hello_datamanWriterKokkos dataManWriterKokkos.cpp)
+  target_link_libraries(adios2_hello_datamanWriterKokkos adios2::cxx11_mpi MPI::MPI_C Kokkos::kokkos)
+  install(TARGETS adios2_hello_datamanWriterKokkos RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+
+  add_executable(adios2_hello_datamanReaderKokkos dataManReaderKokkos.cpp)
+  target_link_libraries(adios2_hello_datamanReaderKokkos adios2::cxx11_mpi MPI::MPI_C Kokkos::kokkos)
+  install(TARGETS adios2_hello_datamanReaderKokkos RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+endif()
diff --git a/examples/hello/datamanKokkos/dataManReaderKokkos.cpp b/examples/hello/datamanKokkos/dataManReaderKokkos.cpp
new file mode 100644
index 0000000000..7030b2fe85
--- /dev/null
+++ b/examples/hello/datamanKokkos/dataManReaderKokkos.cpp
@@ -0,0 +1,76 @@
+#include <adios2.h>
+#include <chrono>
+#include <iostream>
+#include <mpi.h>
+#include <numeric>
+#include <thread>
+#include <vector>
+
+#include <adios2/cxx11/KokkosView.h>
+
+#include <Kokkos_Core.hpp>
+
+int mpiRank, mpiSize;
+
+template <class T, class MemSpace>
+void PrintData(Kokkos::View<T *, MemSpace> &gpuData, const size_t step)
+{
+    auto data = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, gpuData);
+    std::cout << "Rank: " << mpiRank << " Step: " << step << " [";
+    for (int i = 0; i < data.extent_int(0); ++i)
+    {
+        std::cout << data(i) << " ";
+    }
+    std::cout << "]" << std::endl;
+}
+
+int main(int argc, char *argv[])
+{
+    // initialize MPI
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank);
+    MPI_Comm_size(MPI_COMM_WORLD, &mpiSize);
+
+    // initialize adios2
+    adios2::ADIOS adios(MPI_COMM_WORLD);
+    adios2::IO dataManIO = adios.DeclareIO("whatever");
+    dataManIO.SetEngine("DataMan");
+    dataManIO.SetParameters({{"IPAddress", "127.0.0.1"}, {"Port", "12306"}, {"Timeout", "5"}});
+
+    // open stream
+    adios2::Engine dataManReader = dataManIO.Open("HelloDataMan", adios2::Mode::Read);
+
+    // define variable
+    adios2::Variable<float> floatArrayVar;
+
+    Kokkos::DefaultExecutionSpace exe_space;
+    std::cout << "Read on memory space: " << exe_space.name() << std::endl;
+    // read data
+    while (true)
+    {
+        auto status = dataManReader.BeginStep();
+        if (status == adios2::StepStatus::OK)
+        {
+            floatArrayVar = dataManIO.InquireVariable<float>("FloatArray");
+            auto shape = floatArrayVar.Shape();
+            size_t datasize =
+                std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
+            Kokkos::View<float *, Kokkos::DefaultExecutionSpace::memory_space> floatVector(
+                "simBuffer", datasize);
+            dataManReader.Get<float>(floatArrayVar, floatVector, adios2::Mode::Sync);
+            dataManReader.EndStep();
+            PrintData(floatVector, dataManReader.CurrentStep());
+        }
+        else if (status == adios2::StepStatus::EndOfStream)
+        {
+            std::cout << "End of stream" << std::endl;
+            break;
+        }
+    }
+
+    // clean up
+    dataManReader.Close();
+    MPI_Finalize();
+
+    return 0;
+}
diff --git a/examples/hello/datamanKokkos/dataManWriterKokkos.cpp b/examples/hello/datamanKokkos/dataManWriterKokkos.cpp
new file mode 100644
index 0000000000..5c51ec987a
--- /dev/null
+++ b/examples/hello/datamanKokkos/dataManWriterKokkos.cpp
@@ -0,0 +1,97 @@
+/*
+ * Distributed under the OSI-approved Apache License, Version 2.0.  See
+ * accompanying file Copyright.txt for details.
+ *
+ * datamanWriterKokkos.cpp  Simple example of writing multiple steps of a 2D float Kokkos::View
+ * through ADIOS2 DataMan
+ */
+#include <adios2.h>
+#include <adios2/cxx11/KokkosView.h>
+#include <iostream>
+#include <mpi.h>
+#include <numeric>
+#include <thread>
+#include <vector>
+
+#include <Kokkos_Core.hpp>
+
+size_t Nx = 10;
+size_t Ny = 10;
+size_t steps = 2;
+adios2::Dims shape;
+adios2::Dims start;
+adios2::Dims count;
+
+int mpiRank, mpiSize;
+
+template <class T, class MemSpace>
+void PrintData(Kokkos::View<T **, MemSpace> &gpuData, const size_t step)
+{
+    auto data = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, gpuData);
+    std::cout << "Rank: " << mpiRank << " Step: " << step << " [";
+    for (int i = 0; i < data.extent_int(0); ++i)
+        for (int j = 0; j < data.extent_int(1); ++j)
+            std::cout << data(i, j) << " ";
+    std::cout << "]" << std::endl;
+}
+
+template <class T, class MemSpace, class ExecSpace>
+Kokkos::View<T **, MemSpace> GenerateData(const size_t step, const size_t Ny, const size_t mpiRank)
+{
+    Kokkos::View<T **, MemSpace> gpuSimData("simBuffer", Nx, Ny);
+    static_assert(Kokkos::SpaceAccessibility<ExecSpace, MemSpace>::accessible, "");
+    Kokkos::parallel_for(
+        "initBuffer", Kokkos::RangePolicy<ExecSpace>(0, Nx), KOKKOS_LAMBDA(int i) {
+            for (int j = 0; j < Ny; j++)
+                gpuSimData(i, j) = static_cast<float>(i * Ny + j) + mpiRank * 10000 + step;
+        });
+    Kokkos::fence();
+    ExecSpace exe_space;
+    std::cout << "Create data for step " << step << " on memory space: " << exe_space.name()
+              << std::endl;
+    return gpuSimData;
+}
+
+int main(int argc, char *argv[])
+{
+    // initialize MPI
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank);
+    MPI_Comm_size(MPI_COMM_WORLD, &mpiSize);
+
+    // initialize data dimensions
+    count = {Nx, Ny};
+    start = {mpiRank * Nx, 0};
+    shape = {mpiSize * Nx, Ny};
+
+    // initialize adios2
+    adios2::ADIOS adios(MPI_COMM_WORLD);
+    adios2::IO dataManIO = adios.DeclareIO("whatever");
+    dataManIO.SetEngine("DataMan");
+    dataManIO.SetParameters({{"IPAddress", "127.0.0.1"},
+                             {"Port", "12306"},
+                             {"Timeout", "5"},
+                             {"RendezvousReaderCount", "1"}});
+
+    // open stream
+    adios2::Engine dataManWriter = dataManIO.Open("HelloDataMan", adios2::Mode::Write);
+
+    // define variable
+    auto floatArrayVar = dataManIO.DefineVariable<float>("FloatArray", shape, start, count);
+
+    // write data
+    for (size_t i = 0; i < steps; ++i)
+    {
+        auto floatVector = GenerateData<float, Kokkos::DefaultExecutionSpace::memory_space,
+                                        Kokkos::DefaultExecutionSpace>(i, Ny, mpiRank);
+        dataManWriter.BeginStep();
+        dataManWriter.Put(floatArrayVar, floatVector, adios2::Mode::Sync);
+        PrintData(floatVector, dataManWriter.CurrentStep());
+        dataManWriter.EndStep();
+    }
+
+    dataManWriter.Close();
+    MPI_Finalize();
+
+    return 0;
+}
diff --git a/examples/hello/helloWorld/hello-world.py b/examples/hello/helloWorld/hello-world.py
index e2b3bd7f8d..685d648597 100644
--- a/examples/hello/helloWorld/hello-world.py
+++ b/examples/hello/helloWorld/hello-world.py
@@ -39,6 +39,7 @@ def reader(ad):
     var_greeting = io.InquireVariable("Greeting")
     message = r.Get(var_greeting)
     r.EndStep()
+    r.Close()
     return message
 
 
diff --git a/examples/hello/helloWorld/hello-world_tutorialSkeleton.cpp b/examples/hello/helloWorld/hello-world_tutorialSkeleton.cpp
new file mode 100644
index 0000000000..df89e385e3
--- /dev/null
+++ b/examples/hello/helloWorld/hello-world_tutorialSkeleton.cpp
@@ -0,0 +1,39 @@
+/*
+ * Distributed under the OSI-approved Apache License, Version 2.0.  See
+ * accompanying file Copyright.txt for details.
+ *
+ * hello-world.cpp : adios2 low-level API example to write and read a
+ *                   std::string Variable with a greeting
+ *
+ *  Created on: Nov 14, 2019
+ *      Author: William F Godoy godoywf@ornl.gov
+ */
+
+#include <iostream>
+#include <stdexcept>
+
+#include <adios2.h>
+
+void writer(adios2::ADIOS &adios, const std::string &greeting)
+{
+    // Add code
+}
+
+std::string reader(adios2::ADIOS &adios)
+{
+    // Add code
+}
+
+int main(int argc, char *argv[])
+{
+    try
+    {
+        // Add code
+    }
+    catch (std::exception &e)
+    {
+        std::cout << "ERROR: ADIOS2 exception: " << e.what() << "\n";
+    }
+
+    return 0;
+}
diff --git a/examples/hello/sstKokkos/CMakeLists.txt b/examples/hello/sstKokkos/CMakeLists.txt
new file mode 100644
index 0000000000..4074583bb7
--- /dev/null
+++ b/examples/hello/sstKokkos/CMakeLists.txt
@@ -0,0 +1,37 @@
+#-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
+#Distributed under the OSI - approved Apache License, Version 2.0. See
+#accompanying file Copyright.txt for details.
+#-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
+
+cmake_minimum_required(VERSION 3.12)
+project(ADIOS2HelloSSTKokkosExample)
+
+#CXX Compiler settings only in for this example
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+if (NOT TARGET adios2_core)
+  set(_components CXX)
+
+  find_package(Kokkos 3.7 QUIET)
+  if (Kokkos_FOUND AND DEFINED Kokkos_CXX_COMPILER)
+      set(CMAKE_CXX_COMPILER "${Kokkos_CXX_COMPILER}")
+  endif()
+
+  find_package(ADIOS2 REQUIRED COMPONENTS ${_components})
+else()
+  if (DEFINED Kokkos_CXX_COMPILER)
+      set(CMAKE_CXX_COMPILER "${Kokkos_CXX_COMPILER}")
+  endif()
+endif()
+
+if (ADIOS2_HAVE_Kokkos)
+  add_executable(adios2_hello_sstWriterKokkos sstWriterKokkos.cpp)
+  add_executable(adios2_hello_sstReaderKokkos sstReaderKokkos.cpp)
+  kokkos_compilation(SOURCE sstWriterKokkos.cpp)
+  kokkos_compilation(SOURCE sstReaderKokkos.cpp)
+  target_link_libraries(adios2_hello_sstWriterKokkos adios2::cxx11 Kokkos::kokkos)
+  install(TARGETS adios2_hello_sstWriterKokkos RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+  target_link_libraries(adios2_hello_sstReaderKokkos adios2::cxx11 Kokkos::kokkos)
+  install(TARGETS adios2_hello_sstReaderKokkos RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+endif()
diff --git a/examples/hello/sstKokkos/sstReaderKokkos.cpp b/examples/hello/sstKokkos/sstReaderKokkos.cpp
new file mode 100644
index 0000000000..6dd6f9eed8
--- /dev/null
+++ b/examples/hello/sstKokkos/sstReaderKokkos.cpp
@@ -0,0 +1,88 @@
+/*
+ * Distributed under the OSI-approved Apache License, Version 2.0.  See
+ * accompanying file Copyright.txt for details.
+ *
+ * sstReaderKokkos.cpp  Simple example of reading bpFloats through ADIOS2 SST
+ * engine with multiple simulations steps for every IO step using Kokkos
+ */
+#include <ios>
+#include <iostream>
+#include <vector>
+
+#include <adios2.h>
+#include <adios2/cxx11/KokkosView.h>
+
+#include <Kokkos_Core.hpp>
+
+template <class MemSpace, class ExecSpace>
+int BPRead(adios2::ADIOS &adios, const std::string fname, const size_t Nx, const size_t Ny,
+           const size_t nSteps, const std::string engine)
+{
+    adios2::IO io = adios.DeclareIO("ReadIO");
+    io.SetEngine(engine);
+
+    ExecSpace exe_space;
+    std::cout << "Read on memory space: " << exe_space.name() << std::endl;
+
+    adios2::Engine bpReader = io.Open(fname, adios2::Mode::Read);
+
+    unsigned int step = 0;
+    bool correctValues = true;
+    Kokkos::View<float **, MemSpace> gpuSimData("simBuffer", Nx, Ny);
+    for (; bpReader.BeginStep() == adios2::StepStatus::OK; ++step)
+    {
+        auto data = io.InquireVariable<float>("bpFloats");
+        const adios2::Dims start{0, 0};
+        const adios2::Dims count{Nx, Ny};
+        const adios2::Box<adios2::Dims> sel(start, count);
+        data.SetSelection(sel);
+
+        // var.SetMemorySpace(adios2::MemorySpace::GPU);
+        bpReader.Get(data, gpuSimData);
+        bpReader.EndStep();
+
+        auto cpuData = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, gpuSimData);
+        if (cpuData(0, 0) != step * 10)
+        {
+            std::cout << "Value mismatch at step " << step << std::endl;
+            correctValues = false;
+            break;
+        }
+    }
+    if (correctValues)
+        std::cout << "Read " << step << " steps successfully" << std::endl;
+
+    bpReader.Close();
+    return 0;
+}
+
+int main(int argc, char **argv)
+{
+    const std::string engine = argv[1] ? argv[1] : "SST";
+    std::cout << "Using engine " << engine << std::endl;
+    const size_t Nx = 600, Ny = 100, nSteps = 2;
+    const std::string memorySpace = "Device";
+
+    const std::string filename = engine + "StepsWriteReadKokkos";
+    Kokkos::initialize(argc, argv);
+    {
+        adios2::ADIOS adios;
+
+        std::cout << "Using engine " << engine << std::endl;
+        if (memorySpace == "Device")
+        {
+            using mem_space = Kokkos::DefaultExecutionSpace::memory_space;
+            std::cout << "Memory space: DefaultMemorySpace" << std::endl;
+            BPRead<mem_space, Kokkos::DefaultExecutionSpace>(adios, filename + "_DD.bp", Nx, Ny,
+                                                             nSteps, engine);
+        }
+        else
+        {
+            std::cout << "Memory space: HostSpace" << std::endl;
+            BPRead<Kokkos::HostSpace, Kokkos::Serial>(adios, filename + "_HH.bp", Nx, Ny, nSteps,
+                                                      engine);
+        }
+    }
+    Kokkos::finalize();
+    return 0;
+}
diff --git a/examples/hello/sstKokkos/sstWriterKokkos.cpp b/examples/hello/sstKokkos/sstWriterKokkos.cpp
new file mode 100644
index 0000000000..5354d7e228
--- /dev/null
+++ b/examples/hello/sstKokkos/sstWriterKokkos.cpp
@@ -0,0 +1,96 @@
+/*
+ * Distributed under the OSI-approved Apache License, Version 2.0.  See
+ * accompanying file Copyright.txt for details.
+ *
+ * sstWriterKokkos.cpp  Simple example of writing bpFloats through ADIOS2 SST
+ * engine with multiple simulations steps for every IO step using Kokkos
+ */
+#include <ios>
+#include <iostream>
+#include <vector>
+
+#include <adios2.h>
+#include <adios2/cxx11/KokkosView.h>
+
+#include <Kokkos_Core.hpp>
+
+template <class MemSpace, class ExecSpace>
+int BPWrite(adios2::ADIOS &adios, const std::string fname, const size_t Nx, const size_t Ny,
+            const size_t nSteps, const std::string engine)
+{
+    // Initialize the simulation data
+    Kokkos::View<float **, MemSpace> gpuSimData("simBuffer", Nx, Ny);
+    static_assert(Kokkos::SpaceAccessibility<ExecSpace, MemSpace>::accessible, "");
+    Kokkos::parallel_for(
+        "initBuffer", Kokkos::RangePolicy<ExecSpace>(0, Nx), KOKKOS_LAMBDA(int i) {
+            for (int j = 0; j < Ny; j++)
+                gpuSimData(i, j) = static_cast<float>(i);
+        });
+    Kokkos::fence();
+
+    adios2::IO io = adios.DeclareIO("WriteIO");
+    io.SetEngine(engine);
+
+    const adios2::Dims shape{Nx, Ny};
+    const adios2::Dims start{0, 0};
+    const adios2::Dims count{Nx, Ny};
+    auto data = io.DefineVariable<float>("bpFloats", shape, start, count);
+
+    adios2::Engine bpWriter = io.Open(fname, adios2::Mode::Write);
+
+    // Simulation steps
+    for (int step = 0; step < nSteps; ++step)
+    {
+        adios2::Box<adios2::Dims> sel({0, 0}, {Nx, Ny});
+        data.SetSelection(sel);
+
+        bpWriter.BeginStep();
+        // var.SetMemorySpace(adios2::MemorySpace::GPU);
+        bpWriter.Put(data, gpuSimData);
+        bpWriter.EndStep();
+
+        // Update values in the simulation data
+        Kokkos::parallel_for(
+            "updateBuffer", Kokkos::RangePolicy<ExecSpace>(0, Nx), KOKKOS_LAMBDA(int i) {
+                for (int j = 0; j < Ny; j++)
+                    gpuSimData(i, j) += 10;
+            });
+        Kokkos::fence();
+    }
+
+    bpWriter.Close();
+    ExecSpace exe_space;
+    std::cout << "Done writing on memory space: " << exe_space.name() << std::endl;
+    return 0;
+}
+
+int main(int argc, char **argv)
+{
+    const std::string engine = argv[1] ? argv[1] : "SST";
+    std::cout << "Using engine " << engine << std::endl;
+    const size_t Nx = 600, Ny = 100, nSteps = 2;
+    const std::string memorySpace = "Device";
+
+    const std::string filename = engine + "StepsWriteReadKokkos";
+    Kokkos::initialize(argc, argv);
+    {
+        adios2::ADIOS adios;
+
+        std::cout << "Using engine " << engine << std::endl;
+        if (memorySpace == "Device")
+        {
+            using mem_space = Kokkos::DefaultExecutionSpace::memory_space;
+            std::cout << "Memory space: DefaultMemorySpace" << std::endl;
+            BPWrite<mem_space, Kokkos::DefaultExecutionSpace>(adios, filename + "_DD.bp", Nx, Ny,
+                                                              nSteps, engine);
+        }
+        else
+        {
+            std::cout << "Memory space: HostSpace" << std::endl;
+            BPWrite<Kokkos::HostSpace, Kokkos::Serial>(adios, filename + "_HH.bp", Nx, Ny, nSteps,
+                                                       engine);
+        }
+    }
+    Kokkos::finalize();
+    return 0;
+}
diff --git a/examples/simulations/gray-scott-struct/CMakeLists.txt b/examples/simulations/gray-scott-struct/CMakeLists.txt
index a892b757dd..5d67c49447 100644
--- a/examples/simulations/gray-scott-struct/CMakeLists.txt
+++ b/examples/simulations/gray-scott-struct/CMakeLists.txt
@@ -43,28 +43,9 @@ if(ADIOS2_HAVE_MPI)
     "simulation/settings-inline.json"
     "plot/decomp.py" "plot/gsplot.py" "plot/pdfplot.py"
     "ReadMe.md"
-    DESTINATION ${CMAKE_INSTALL_PREFIX}/share/adios2/examples/simulations/gray-scott-struct)
+    DESTINATION ${CMAKE_INSTALL_PREFIX}/share/adios2/gray-scott-struct)
 
-  install(DIRECTORY "catalyst" DESTINATION ${CMAKE_INSTALL_PREFIX}/share/adios2/examples/simulations/gray-scott-struct)
+  install(DIRECTORY "catalyst" DESTINATION ${CMAKE_INSTALL_PREFIX}/share/adios2/gray-scott-struct)
 
-  install(PROGRAMS "cleanup.sh" DESTINATION ${CMAKE_INSTALL_PREFIX}/share/adios2/examples/simulations/gray-scott-struct)
-
-  find_package(VTK QUIET)
-  if(VTK_FOUND)
-    add_executable(adios2_simulations_gray-scott-struct_curvature analysis/curvature.cpp)
-    target_link_libraries(adios2_simulations_gray-scott-struct_curvature adios2::cxx11_mpi MPI::MPI_C ${VTK_LIBRARIES})
-    install(TARGETS adios2_simulations_gray-scott-struct_curvature RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
-
-    add_executable(adios2_simulations_gray-scott-struct_find_blobs analysis/find_blobs.cpp)
-    target_link_libraries(adios2_simulations_gray-scott-struct_find_blobs adios2::cxx11_mpi MPI::MPI_C ${VTK_LIBRARIES})
-    install(TARGETS adios2_simulations_gray-scott-struct_find_blobs RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
-
-    add_executable(adios2_simulations_gray-scott-struct_isosurface analysis/isosurface.cpp)
-    target_link_libraries(adios2_simulations_gray-scott-struct_isosurface adios2::cxx11_mpi MPI::MPI_C ${VTK_LIBRARIES})
-    install(TARGETS adios2_simulations_gray-scott-struct_isosurface RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
-
-    # add_executable(adios2_simulations_gray-scott-struct_render_isosurface plot/render_isosurface.cpp)
-    # target_link_libraries(adios2_simulations_gray-scott_struct_render-isosurface adios2::cxx11_mpi MPI::MPI_C ${VTK_LIBRARIES})
-    # install(TARGETS adios2_simulations_gray-scott-struct_render_isosurface RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
-  endif()
+  install(PROGRAMS "cleanup.sh" DESTINATION ${CMAKE_INSTALL_PREFIX}/share/adios2/gray-scott-struct)
 endif()
diff --git a/examples/simulations/gray-scott-struct/analysis/curvature.cpp b/examples/simulations/gray-scott-struct/analysis/curvature.cpp
deleted file mode 100644
index 586e2fe7d2..0000000000
--- a/examples/simulations/gray-scott-struct/analysis/curvature.cpp
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Distributed under the OSI-approved Apache License, Version 2.0.  See
- * accompanying file Copyright.txt for details.
- *
- * Analysis code for the Gray-Scott simulation.
- * Computes mean curvature at each point on an isosurface.
- *
- * Keichi Takahashi <keichi@is.naist.jp>
- *
- */
-
-#include <fstream>
-#include <iostream>
-
-#include <adios2.h>
-
-#include <vtkCellArray.h>
-#include <vtkCurvatures.h>
-#include <vtkDoubleArray.h>
-#include <vtkPointData.h>
-#include <vtkPoints.h>
-#include <vtkPolyData.h>
-#include <vtkSmartPointer.h>
-#include <vtkUnstructuredGrid.h>
-
-#include "../common/timer.hpp"
-
-vtkSmartPointer<vtkPolyData> read_mesh(const std::vector<double> &bufPoints,
-                                       const std::vector<int> &bufCells,
-                                       const std::vector<double> &bufNormals)
-{
-    int nPoints = static_cast<int>(bufPoints.size() / 3);
-    int nCells = static_cast<int>(bufCells.size() / 3);
-
-    auto points = vtkSmartPointer<vtkPoints>::New();
-    points->SetNumberOfPoints(nPoints);
-    for (vtkIdType i = 0; i < nPoints; i++)
-    {
-        points->SetPoint(i, &bufPoints[i * 3]);
-    }
-
-    auto polys = vtkSmartPointer<vtkCellArray>::New();
-    for (vtkIdType i = 0; i < nCells; i++)
-    {
-        vtkIdType a = bufCells[i * 3 + 0];
-        vtkIdType b = bufCells[i * 3 + 1];
-        vtkIdType c = bufCells[i * 3 + 2];
-
-        polys->InsertNextCell(3);
-        polys->InsertCellPoint(a);
-        polys->InsertCellPoint(b);
-        polys->InsertCellPoint(c);
-    }
-
-    auto normals = vtkSmartPointer<vtkDoubleArray>::New();
-    normals->SetNumberOfComponents(3);
-    for (vtkIdType i = 0; i < nPoints; i++)
-    {
-        normals->InsertNextTuple(&bufNormals[i * 3]);
-    }
-
-    auto polyData = vtkSmartPointer<vtkPolyData>::New();
-    polyData->SetPoints(points);
-    polyData->SetPolys(polys);
-    polyData->GetPointData()->SetNormals(normals);
-
-    return polyData;
-}
-
-void compute_curvature(const vtkSmartPointer<vtkPolyData> polyData)
-{
-    vtkSmartPointer<vtkCurvatures> curvaturesFilter = vtkSmartPointer<vtkCurvatures>::New();
-    curvaturesFilter->SetInputData(polyData);
-    // curvaturesFilter->SetCurvatureTypeToMinimum();
-    // curvaturesFilter->SetCurvatureTypeToMaximum();
-    // curvaturesFilter->SetCurvatureTypeToGaussian();
-    curvaturesFilter->SetCurvatureTypeToMean();
-    curvaturesFilter->Update();
-}
-
-int main(int argc, char *argv[])
-{
-    int provided;
-    MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
-
-    int rank, procs, wrank;
-
-    MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
-
-    const unsigned int color = 7;
-    MPI_Comm comm;
-    MPI_Comm_split(MPI_COMM_WORLD, color, wrank, &comm);
-
-    MPI_Comm_rank(comm, &rank);
-    MPI_Comm_size(comm, &procs);
-
-    if (argc < 3)
-    {
-        if (rank == 0)
-        {
-            std::cerr << "Too few arguments" << std::endl;
-            std::cout << "Usage: curvature input output" << std::endl;
-        }
-        MPI_Abort(MPI_COMM_WORLD, -1);
-    }
-
-    const std::string input_fname(argv[1]);
-    const std::string output_fname(argv[2]);
-
-    adios2::ADIOS adios("adios2.xml", comm);
-
-    adios2::IO inIO = adios.DeclareIO("IsosurfaceOutput");
-    adios2::Engine reader = inIO.Open(input_fname, adios2::Mode::Read);
-
-    adios2::IO outIO = adios.DeclareIO("CurvatureOutput");
-    adios2::Engine writer = outIO.Open(output_fname, adios2::Mode::Write);
-
-    std::vector<double> points;
-    std::vector<int> cells;
-    std::vector<double> normals;
-    int step;
-
-#ifdef ENABLE_TIMERS
-    Timer timer_total;
-    Timer timer_read;
-    Timer timer_compute;
-
-    std::ostringstream log_fname;
-    log_fname << "compute_curvature_pe_" << rank << ".log";
-
-    std::ofstream log(log_fname.str());
-    log << "step\ttotal_curv\tread_curv\tcompute_curv" << std::endl;
-#endif
-
-    while (true)
-    {
-#ifdef ENABLE_TIMERS
-        MPI_Barrier(comm);
-        timer_total.start();
-        timer_read.start();
-#endif
-
-        adios2::StepStatus status = reader.BeginStep();
-
-        if (status != adios2::StepStatus::OK)
-        {
-            break;
-        }
-
-        auto varPoint = inIO.InquireVariable<double>("point");
-        auto varCell = inIO.InquireVariable<int>("cell");
-        auto varNormal = inIO.InquireVariable<double>("normal");
-        auto varStep = inIO.InquireVariable<int>("step");
-
-        if (varPoint.Shape().size() > 0 || varCell.Shape().size() > 0)
-        {
-            varPoint.SetSelection({{0, 0}, {varPoint.Shape()[0], varPoint.Shape()[1]}});
-            varCell.SetSelection({{0, 0}, {varCell.Shape()[0], varCell.Shape()[1]}});
-            varNormal.SetSelection({{0, 0}, {varNormal.Shape()[0], varNormal.Shape()[1]}});
-
-            reader.Get<double>(varPoint, points);
-            reader.Get<int>(varCell, cells);
-            reader.Get<double>(varNormal, normals);
-        }
-
-        reader.Get<int>(varStep, &step);
-
-        reader.EndStep();
-
-#ifdef ENABLE_TIMERS
-        double time_read = timer_read.stop();
-        MPI_Barrier(comm);
-        timer_compute.start();
-#endif
-
-        vtkSmartPointer<vtkPolyData> polyData = read_mesh(points, cells, normals);
-        compute_curvature(polyData);
-
-        if (!rank)
-        {
-            std::cout << "compute_curvature at step " << step << std::endl;
-        }
-
-#ifdef ENABLE_TIMERS
-        double time_compute = timer_compute.stop();
-        double time_step = timer_total.stop();
-        MPI_Barrier(comm);
-
-        log << step << "\t" << time_step << "\t" << time_read << "\t" << time_compute << std::endl;
-#endif
-    }
-
-#ifdef ENABLE_TIMERS
-    log << "total\t" << timer_total.elapsed() << "\t" << timer_read.elapsed() << "\t"
-        << timer_compute.elapsed() << "\t" << std::endl;
-
-    log.close();
-#endif
-
-    writer.Close();
-    reader.Close();
-}
diff --git a/examples/simulations/gray-scott-struct/analysis/find_blobs.cpp b/examples/simulations/gray-scott-struct/analysis/find_blobs.cpp
deleted file mode 100644
index 8b5c8c54b2..0000000000
--- a/examples/simulations/gray-scott-struct/analysis/find_blobs.cpp
+++ /dev/null
@@ -1,245 +0,0 @@
-/*
- * Distributed under the OSI-approved Apache License, Version 2.0.  See
- * accompanying file Copyright.txt for details.
- *
- * Analysis code for the Gray-Scott simulation.
- * Reads iso-surface mesh data and detects connected components.
- * Counts the total number of connected components and measures the surface
- * area of each component.
- *
- * Keichi Takahashi <keichi@is.naist.jp>
- *
- */
-
-#include <iostream>
-
-#include <adios2.h>
-
-#include <vtkCellArray.h>
-#include <vtkConnectivityFilter.h>
-#include <vtkDataSetSurfaceFilter.h>
-#include <vtkDoubleArray.h>
-#include <vtkMassProperties.h>
-#include <vtkPointData.h>
-#include <vtkPoints.h>
-#include <vtkPolyData.h>
-#include <vtkSmartPointer.h>
-#include <vtkThreshold.h>
-#include <vtkUnstructuredGrid.h>
-
-#include "../common/timer.hpp"
-
-vtkSmartPointer<vtkPolyData> read_mesh(const std::vector<double> &bufPoints,
-                                       const std::vector<int> &bufCells,
-                                       const std::vector<double> &bufNormals)
-{
-    int nPoints = static_cast<int>(bufPoints.size() / 3);
-    int nCells = static_cast<int>(bufCells.size() / 3);
-
-    auto points = vtkSmartPointer<vtkPoints>::New();
-    points->SetNumberOfPoints(nPoints);
-    for (vtkIdType i = 0; i < nPoints; i++)
-    {
-        points->SetPoint(i, &bufPoints[i * 3]);
-    }
-
-    auto polys = vtkSmartPointer<vtkCellArray>::New();
-    for (vtkIdType i = 0; i < nCells; i++)
-    {
-        vtkIdType a = bufCells[i * 3 + 0];
-        vtkIdType b = bufCells[i * 3 + 1];
-        vtkIdType c = bufCells[i * 3 + 2];
-
-        polys->InsertNextCell(3);
-        polys->InsertCellPoint(a);
-        polys->InsertCellPoint(b);
-        polys->InsertCellPoint(c);
-    }
-
-    auto normals = vtkSmartPointer<vtkDoubleArray>::New();
-    normals->SetNumberOfComponents(3);
-    for (vtkIdType i = 0; i < nPoints; i++)
-    {
-        normals->InsertNextTuple(&bufNormals[i * 3]);
-    }
-
-    auto polyData = vtkSmartPointer<vtkPolyData>::New();
-    polyData->SetPoints(points);
-    polyData->SetPolys(polys);
-    polyData->GetPointData()->SetNormals(normals);
-
-    return polyData;
-}
-
-void find_blobs(const vtkSmartPointer<vtkPolyData> polyData)
-{
-    auto connectivityFilter = vtkSmartPointer<vtkConnectivityFilter>::New();
-    connectivityFilter->SetInputData(polyData);
-    connectivityFilter->SetExtractionModeToAllRegions();
-    connectivityFilter->ColorRegionsOn();
-    connectivityFilter->Update();
-
-    int nBlobs = connectivityFilter->GetNumberOfExtractedRegions();
-
-    std::cout << "Found " << nBlobs << " blobs" << std::endl;
-
-    auto threshold = vtkSmartPointer<vtkThreshold>::New();
-    auto massProperties = vtkSmartPointer<vtkMassProperties>::New();
-    auto surfaceFilter = vtkSmartPointer<vtkDataSetSurfaceFilter>::New();
-
-    threshold->SetInputConnection(connectivityFilter->GetOutputPort());
-    surfaceFilter->SetInputConnection(threshold->GetOutputPort());
-    massProperties->SetInputConnection(surfaceFilter->GetOutputPort());
-
-    for (int i = 0; i < nBlobs; i++)
-    {
-        threshold->SetThresholdFunction(vtkThreshold::THRESHOLD_BETWEEN);
-        threshold->SetLowerThreshold(i);
-        threshold->SetUpperThreshold(i);
-
-        std::cout << "Surface area of blob #" << i << " is " << massProperties->GetSurfaceArea()
-                  << std::endl;
-    }
-}
-
-void find_largest_blob(const vtkSmartPointer<vtkPolyData> polyData)
-{
-    auto connectivityFilter = vtkSmartPointer<vtkConnectivityFilter>::New();
-    connectivityFilter->SetInputData(polyData);
-    connectivityFilter->SetExtractionModeToLargestRegion();
-    connectivityFilter->Update();
-
-    auto massProperties = vtkSmartPointer<vtkMassProperties>::New();
-    auto surfaceFilter = vtkSmartPointer<vtkDataSetSurfaceFilter>::New();
-
-    surfaceFilter->SetInputConnection(connectivityFilter->GetOutputPort());
-    massProperties->SetInputConnection(surfaceFilter->GetOutputPort());
-
-    std::cout << "Surface area of largest blob is " << massProperties->GetSurfaceArea()
-              << std::endl;
-}
-
-int main(int argc, char *argv[])
-{
-    int provided;
-    MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
-
-    int rank, procs, wrank;
-
-    MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
-
-    const unsigned int color = 6;
-    MPI_Comm comm;
-    MPI_Comm_split(MPI_COMM_WORLD, color, wrank, &comm);
-
-    MPI_Comm_rank(comm, &rank);
-    MPI_Comm_size(comm, &procs);
-
-    if (argc < 2)
-    {
-        if (rank == 0)
-        {
-            std::cerr << "Too few arguments" << std::endl;
-            std::cout << "Usage: find_blobs input" << std::endl;
-        }
-        MPI_Abort(MPI_COMM_WORLD, -1);
-    }
-
-    if (procs != 1)
-    {
-        if (rank == 0)
-        {
-            std::cerr << "find_blobs only supports serial execution" << std::endl;
-        }
-        MPI_Abort(MPI_COMM_WORLD, -1);
-    }
-
-    const std::string input_fname(argv[1]);
-
-    adios2::ADIOS adios("adios2.xml", comm);
-
-    adios2::IO inIO = adios.DeclareIO("IsosurfaceOutput");
-    adios2::Engine reader = inIO.Open(input_fname, adios2::Mode::Read);
-
-    std::vector<double> points;
-    std::vector<int> cells;
-    std::vector<double> normals;
-    int step;
-
-#ifdef ENABLE_TIMERS
-    Timer timer_total;
-    Timer timer_compute;
-    Timer timer_read;
-
-    std::ostringstream log_fname;
-    log_fname << "find_blobs_pe_" << rank << ".log";
-
-    std::ofstream log(log_fname.str());
-    log << "step\ttotal_blobs\tread_blobs\tcompute_blobs" << std::endl;
-#endif
-
-    while (true)
-    {
-#ifdef ENABLE_TIMERS
-        MPI_Barrier(comm);
-        timer_total.start();
-        timer_read.start();
-#endif
-
-        adios2::StepStatus status = reader.BeginStep();
-
-        if (status != adios2::StepStatus::OK)
-        {
-            break;
-        }
-
-        auto varPoint = inIO.InquireVariable<double>("point");
-        auto varCell = inIO.InquireVariable<int>("cell");
-        auto varNormal = inIO.InquireVariable<double>("normal");
-        auto varStep = inIO.InquireVariable<int>("step");
-
-        if (varPoint.Shape().size() > 0 || varCell.Shape().size() > 0)
-        {
-            varPoint.SetSelection({{0, 0}, {varPoint.Shape()[0], varPoint.Shape()[1]}});
-            varCell.SetSelection({{0, 0}, {varCell.Shape()[0], varCell.Shape()[1]}});
-            varNormal.SetSelection({{0, 0}, {varNormal.Shape()[0], varNormal.Shape()[1]}});
-
-            reader.Get<double>(varPoint, points);
-            reader.Get<int>(varCell, cells);
-            reader.Get<double>(varNormal, normals);
-        }
-
-        reader.Get<int>(varStep, &step);
-
-        reader.EndStep();
-
-#ifdef ENABLE_TIMERS
-        double time_read = timer_read.stop();
-        MPI_Barrier(comm);
-        timer_compute.start();
-#endif
-
-        std::cout << "find_blobs at step " << step << std::endl;
-
-        auto polyData = read_mesh(points, cells, normals);
-        // find_blobs(polyData);
-        find_largest_blob(polyData);
-
-#ifdef ENABLE_TIMERS
-        double time_compute = timer_compute.stop();
-        double time_step = timer_total.stop();
-        MPI_Barrier(comm);
-
-        log << step << "\t" << time_step << "\t" << time_read << "\t" << time_compute << std::endl;
-#endif
-    }
-
-#ifdef ENABLE_TIMERS
-    log << "total\t" << timer_total.elapsed() << "\t" << timer_read.elapsed() << "\t"
-        << timer_compute.elapsed() << std::endl;
-
-    log.close();
-#endif
-
-    reader.Close();
-}
diff --git a/examples/simulations/gray-scott-struct/analysis/isosurface.cpp b/examples/simulations/gray-scott-struct/analysis/isosurface.cpp
deleted file mode 100644
index e02a507aaa..0000000000
--- a/examples/simulations/gray-scott-struct/analysis/isosurface.cpp
+++ /dev/null
@@ -1,346 +0,0 @@
-/*
- * Distributed under the OSI-approved Apache License, Version 2.0.  See
- * accompanying file Copyright.txt for details.
- *
- * Analysis code for the Gray-Scott simulation.
- * Reads variable U and and extracts the iso-surface using VTK.
- * Writes the extracted iso-surface using ADIOS.
- *
- * Keichi Takahashi <keichi@is.naist.jp>
- *
- */
-
-#include <iostream>
-#include <sstream>
-
-#include <adios2.h>
-
-#include <vtkAppendPolyData.h>
-#include <vtkImageData.h>
-#include <vtkImageImport.h>
-#include <vtkMarchingCubes.h>
-#include <vtkPointData.h>
-#include <vtkPolyData.h>
-#include <vtkSmartPointer.h>
-#include <vtkXMLPolyDataWriter.h>
-
-#include "../common/timer.hpp"
-
-vtkSmartPointer<vtkPolyData> compute_isosurface(const adios2::Variable<double> &varField,
-                                                const std::vector<double> &field, double isovalue)
-{
-    // Convert field values to vtkImageData
-    auto importer = vtkSmartPointer<vtkImageImport>::New();
-    importer->SetDataSpacing(1, 1, 1);
-    importer->SetDataOrigin(static_cast<double>(varField.Start()[2]),
-                            static_cast<double>(varField.Start()[1]),
-                            static_cast<double>(varField.Start()[0]));
-    importer->SetWholeExtent(0, static_cast<int>(varField.Count()[2] - 1), 0,
-                             static_cast<int>(varField.Count()[1] - 1), 0,
-                             static_cast<int>(varField.Count()[0] - 1));
-    importer->SetDataExtentToWholeExtent();
-    importer->SetDataScalarTypeToDouble();
-    importer->SetNumberOfScalarComponents(1);
-    importer->SetImportVoidPointer(const_cast<double *>(field.data()));
-
-    // Run the marching cubes algorithm
-    auto mcubes = vtkSmartPointer<vtkMarchingCubes>::New();
-    mcubes->SetInputConnection(importer->GetOutputPort());
-    mcubes->ComputeNormalsOn();
-    mcubes->SetValue(0, isovalue);
-    mcubes->Update();
-
-    // Return the isosurface as vtkPolyData
-    return mcubes->GetOutput();
-}
-
-void write_vtk(const std::string &fname, const vtkSmartPointer<vtkPolyData> polyData)
-{
-    auto writer = vtkSmartPointer<vtkXMLPolyDataWriter>::New();
-    writer->SetFileName(fname.c_str());
-    writer->SetInputData(polyData);
-    writer->Write();
-}
-
-void write_adios(adios2::Engine &writer, const vtkSmartPointer<vtkPolyData> polyData,
-                 adios2::Variable<double> &varPoint, adios2::Variable<int> &varCell,
-                 adios2::Variable<double> &varNormal, adios2::Variable<int> &varOutStep, int step,
-                 MPI_Comm comm)
-{
-    int numCells = static_cast<int>(polyData->GetNumberOfPolys());
-    int numPoints = static_cast<int>(polyData->GetNumberOfPoints());
-    int rank;
-
-    MPI_Comm_rank(comm, &rank);
-
-    std::vector<double> points(static_cast<size_t>(numPoints * 3));
-    std::vector<double> normals(static_cast<size_t>(numPoints * 3));
-    std::vector<int> cells(static_cast<size_t>(numCells * 3)); // Assumes that cells are triangles
-
-    double coords[3];
-
-    auto cellArray = polyData->GetPolys();
-
-    cellArray->InitTraversal();
-
-    // Iterate through cells
-    for (vtkIdType i = 0; i < polyData->GetNumberOfPolys(); i++)
-    {
-        auto idList = vtkSmartPointer<vtkIdList>::New();
-
-        cellArray->GetNextCell(idList);
-
-        // Iterate through points of a cell
-        for (vtkIdType j = 0; j < idList->GetNumberOfIds(); j++)
-        {
-            auto id = idList->GetId(j);
-
-            cells[i * 3 + j] = static_cast<int>(id);
-
-            polyData->GetPoint(id, coords);
-
-            points[id * 3 + 0] = coords[0];
-            points[id * 3 + 1] = coords[1];
-            points[id * 3 + 2] = coords[2];
-        }
-    }
-
-    auto normalArray = polyData->GetPointData()->GetNormals();
-
-    // Extract normals
-    for (int i = 0; i < normalArray->GetNumberOfTuples(); i++)
-    {
-        normalArray->GetTuple(i, coords);
-
-        normals[i * 3 + 0] = coords[0];
-        normals[i * 3 + 1] = coords[1];
-        normals[i * 3 + 2] = coords[2];
-    }
-
-    int totalPoints, offsetPoints;
-    MPI_Allreduce(&numPoints, &totalPoints, 1, MPI_INT, MPI_SUM, comm);
-    MPI_Scan(&numPoints, &offsetPoints, 1, MPI_INT, MPI_SUM, comm);
-
-    writer.BeginStep();
-
-    varPoint.SetShape(
-        {static_cast<size_t>(totalPoints), static_cast<size_t>(totalPoints > 0 ? 3 : 0)});
-    varPoint.SetSelection(
-        {{static_cast<size_t>(offsetPoints - numPoints), 0},
-         {static_cast<size_t>(numPoints), static_cast<size_t>(numPoints > 0 ? 3 : 0)}});
-
-    varNormal.SetShape(varPoint.Shape());
-    varNormal.SetSelection({varPoint.Start(), varPoint.Count()});
-
-    if (numPoints)
-    {
-        writer.Put(varPoint, points.data());
-        writer.Put(varNormal, normals.data());
-    }
-
-    int totalCells, offsetCells;
-    MPI_Allreduce(&numCells, &totalCells, 1, MPI_INT, MPI_SUM, comm);
-    MPI_Scan(&numCells, &offsetCells, 1, MPI_INT, MPI_SUM, comm);
-
-    for (int &cell : cells)
-    {
-        cell += (offsetPoints - numPoints);
-    }
-
-    varCell.SetShape(
-        {static_cast<size_t>(totalCells), static_cast<size_t>(totalCells > 0 ? 3 : 0)});
-    varCell.SetSelection(
-        {{static_cast<size_t>(offsetCells - numCells), 0},
-         {static_cast<size_t>(numCells), static_cast<size_t>(numCells > 0 ? 3 : 0)}});
-
-    if (numCells)
-    {
-        writer.Put(varCell, cells.data());
-    }
-
-    if (!rank)
-    {
-        std::cout << "isosurface at step " << step << " writing out " << totalCells << " cells and "
-                  << totalPoints << " points" << std::endl;
-    }
-
-    writer.Put(varOutStep, step);
-
-    writer.EndStep();
-}
-
-int main(int argc, char *argv[])
-{
-    int provided;
-    MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
-
-    int rank, procs, wrank;
-
-    MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
-
-    const unsigned int color = 5;
-    MPI_Comm comm;
-    MPI_Comm_split(MPI_COMM_WORLD, color, wrank, &comm);
-
-    MPI_Comm_rank(comm, &rank);
-    MPI_Comm_size(comm, &procs);
-
-    int dims[3] = {0};
-    MPI_Dims_create(procs, 3, dims);
-    size_t npx = dims[0];
-    size_t npy = dims[1];
-    size_t npz = dims[2];
-
-    int coords[3] = {0};
-    int periods[3] = {0};
-    MPI_Comm cart_comm;
-    MPI_Cart_create(comm, 3, dims, periods, 0, &cart_comm);
-    MPI_Cart_coords(cart_comm, rank, 3, coords);
-    size_t px = coords[0];
-    size_t py = coords[1];
-    size_t pz = coords[2];
-
-    if (argc < 4)
-    {
-        if (rank == 0)
-        {
-            std::cerr << "Too few arguments" << std::endl;
-            std::cout << "Usage: isosurface input output isovalues..." << std::endl;
-        }
-        MPI_Abort(MPI_COMM_WORLD, -1);
-    }
-
-    const std::string input_fname(argv[1]);
-    const std::string output_fname(argv[2]);
-
-    std::vector<double> isovalues;
-    for (int i = 3; i < argc; i++)
-    {
-        isovalues.push_back(std::stod(argv[i]));
-    }
-
-    adios2::ADIOS adios("adios2.xml", comm);
-
-    adios2::IO inIO = adios.DeclareIO("SimulationOutput");
-    adios2::Engine reader = inIO.Open(input_fname, adios2::Mode::Read);
-
-    adios2::IO outIO = adios.DeclareIO("IsosurfaceOutput");
-    adios2::Engine writer = outIO.Open(output_fname, adios2::Mode::Write);
-
-    auto varPoint = outIO.DefineVariable<double>("point", {1, 3}, {0, 0}, {1, 3});
-    auto varCell = outIO.DefineVariable<int>("cell", {1, 3}, {0, 0}, {1, 3});
-    auto varNormal = outIO.DefineVariable<double>("normal", {1, 3}, {0, 0}, {1, 3});
-    auto varOutStep = outIO.DefineVariable<int>("step");
-
-    std::vector<double> u;
-    int step;
-
-#ifdef ENABLE_TIMERS
-    Timer timer_total;
-    Timer timer_read;
-    Timer timer_compute;
-    Timer timer_write;
-
-    std::ostringstream log_fname;
-    log_fname << "isosurface_pe_" << rank << ".log";
-
-    std::ofstream log(log_fname.str());
-    log << "step\ttotal_iso\tread_iso\tcompute_iso\twrite_iso" << std::endl;
-#endif
-
-    while (true)
-    {
-#ifdef ENABLE_TIMERS
-        MPI_Barrier(comm);
-        timer_total.start();
-        timer_read.start();
-#endif
-
-        adios2::StepStatus status = reader.BeginStep();
-
-        if (status != adios2::StepStatus::OK)
-        {
-            break;
-        }
-
-        adios2::Variable<double> varU = inIO.InquireVariable<double>("U");
-        const adios2::Variable<int> varStep = inIO.InquireVariable<int>("step");
-
-        adios2::Dims shape = varU.Shape();
-
-        size_t size_x = (shape[0] + npx - 1) / npx;
-        size_t size_y = (shape[1] + npy - 1) / npy;
-        size_t size_z = (shape[2] + npz - 1) / npz;
-
-        size_t offset_x = size_x * px;
-        size_t offset_y = size_y * py;
-        size_t offset_z = size_z * pz;
-
-        if (px == npx - 1)
-        {
-            size_x -= size_x * npx - shape[0];
-        }
-        if (py == npy - 1)
-        {
-            size_y -= size_y * npy - shape[1];
-        }
-        if (pz == npz - 1)
-        {
-            size_z -= size_z * npz - shape[2];
-        }
-
-        varU.SetSelection({{offset_x, offset_y, offset_z},
-                           {size_x + (px != npx - 1 ? 1 : 0), size_y + (py != npy - 1 ? 1 : 0),
-                            size_z + (pz != npz - 1 ? 1 : 0)}});
-
-        reader.Get<double>(varU, u);
-        reader.Get<int>(varStep, step);
-        reader.EndStep();
-
-#ifdef ENABLE_TIMERS
-        double time_read = timer_read.stop();
-        MPI_Barrier(comm);
-        timer_compute.start();
-#endif
-
-        auto appendFilter = vtkSmartPointer<vtkAppendPolyData>::New();
-
-        for (const auto isovalue : isovalues)
-        {
-            auto polyData = compute_isosurface(varU, u, isovalue);
-            appendFilter->AddInputData(polyData);
-        }
-
-        appendFilter->Update();
-
-#ifdef ENABLE_TIMERS
-        double time_compute = timer_compute.stop();
-        MPI_Barrier(comm);
-        timer_write.start();
-#endif
-
-        write_adios(writer, appendFilter->GetOutput(), varPoint, varCell, varNormal, varOutStep,
-                    step, comm);
-
-#ifdef ENABLE_TIMERS
-        double time_write = timer_write.stop();
-        double time_step = timer_total.stop();
-        MPI_Barrier(comm);
-
-        log << step << "\t" << time_step << "\t" << time_read << "\t" << time_compute << "\t"
-            << time_write << std::endl;
-#endif
-    }
-
-#ifdef ENABLE_TIMERS
-    log << "total\t" << timer_total.elapsed() << "\t" << timer_read.elapsed() << "\t"
-        << timer_compute.elapsed() << "\t" << timer_write.elapsed() << std::endl;
-
-    log.close();
-#endif
-
-    writer.Close();
-    reader.Close();
-
-    MPI_Finalize();
-}
diff --git a/examples/simulations/gray-scott-struct/plot/render_isosurface.cpp b/examples/simulations/gray-scott-struct/plot/render_isosurface.cpp
deleted file mode 100644
index 30a464b831..0000000000
--- a/examples/simulations/gray-scott-struct/plot/render_isosurface.cpp
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * Distributed under the OSI-approved Apache License, Version 2.0.  See
- * accompanying file Copyright.txt for details.
- *
- * Visualization code for the Gray-Scott simulation.
- * Reads and renders iso-surface mesh data.
- *
- * Keichi Takahashi <keichi@is.naist.jp>
- *
- */
-
-#include <iostream>
-
-#include <adios2.h>
-
-#include <vtkActor.h>
-#include <vtkAutoInit.h>
-#include <vtkCallbackCommand.h>
-#include <vtkCellArray.h>
-#include <vtkDoubleArray.h>
-#include <vtkInteractorStyleSwitch.h>
-#include <vtkPointData.h>
-#include <vtkPoints.h>
-#include <vtkPolyData.h>
-#include <vtkPolyDataMapper.h>
-#include <vtkProperty.h>
-#include <vtkRenderView.h>
-#include <vtkRenderWindow.h>
-#include <vtkRenderWindowInteractor.h>
-#include <vtkRenderer.h>
-#include <vtkSmartPointer.h>
-
-VTK_MODULE_INIT(vtkRenderingOpenGL2);
-
-typedef struct
-{
-    vtkRenderView *renderView;
-    vtkPolyDataMapper *mapper;
-    adios2::IO *inIO;
-    adios2::Engine *reader;
-} Context;
-
-vtkSmartPointer<vtkPolyData> read_mesh(const std::vector<double> &bufPoints,
-                                       const std::vector<int> &bufCells,
-                                       const std::vector<double> &bufNormals)
-{
-    int nPoints = bufPoints.size() / 3;
-    int nCells = bufCells.size() / 3;
-
-    auto points = vtkSmartPointer<vtkPoints>::New();
-    points->SetNumberOfPoints(nPoints);
-    for (vtkIdType i = 0; i < nPoints; i++)
-    {
-        points->SetPoint(i, &bufPoints[i * 3]);
-    }
-
-    auto polys = vtkSmartPointer<vtkCellArray>::New();
-    for (vtkIdType i = 0; i < nCells; i++)
-    {
-        vtkIdType a = bufCells[i * 3 + 0];
-        vtkIdType b = bufCells[i * 3 + 1];
-        vtkIdType c = bufCells[i * 3 + 2];
-
-        polys->InsertNextCell(3);
-        polys->InsertCellPoint(a);
-        polys->InsertCellPoint(b);
-        polys->InsertCellPoint(c);
-    }
-
-    auto normals = vtkSmartPointer<vtkDoubleArray>::New();
-    normals->SetNumberOfComponents(3);
-    for (vtkIdType i = 0; i < nPoints; i++)
-    {
-        normals->InsertNextTuple(&bufNormals[i * 3]);
-    }
-
-    auto polyData = vtkSmartPointer<vtkPolyData>::New();
-    polyData->SetPoints(points);
-    polyData->SetPolys(polys);
-    polyData->GetPointData()->SetNormals(normals);
-
-    return polyData;
-}
-
-void timer_func(vtkObject *object, unsigned long eid, void *clientdata, void *calldata)
-{
-    Context *context = static_cast<Context *>(clientdata);
-
-    std::vector<double> points;
-    std::vector<int> cells;
-    std::vector<double> normals;
-    int step;
-
-    adios2::StepStatus status = context->reader->BeginStep();
-
-    if (status != adios2::StepStatus::OK)
-    {
-        return;
-    }
-
-    auto varPoint = context->inIO->InquireVariable<double>("point");
-    auto varCell = context->inIO->InquireVariable<int>("cell");
-    auto varNormal = context->inIO->InquireVariable<double>("normal");
-    auto varStep = context->inIO->InquireVariable<int>("step");
-
-    if (varPoint.Shape().size() > 0 || varCell.Shape().size() > 0)
-    {
-        varPoint.SetSelection({{0, 0}, {varPoint.Shape()[0], varPoint.Shape()[1]}});
-        varCell.SetSelection({{0, 0}, {varCell.Shape()[0], varCell.Shape()[1]}});
-        varNormal.SetSelection({{0, 0}, {varNormal.Shape()[0], varNormal.Shape()[1]}});
-
-        context->reader->Get<double>(varPoint, points);
-        context->reader->Get<int>(varCell, cells);
-        context->reader->Get<double>(varNormal, normals);
-    }
-
-    context->reader->Get<int>(varStep, &step);
-
-    context->reader->EndStep();
-
-    std::cout << "render_isosurface at step " << step << std::endl;
-
-    vtkSmartPointer<vtkPolyData> polyData = read_mesh(points, cells, normals);
-
-    context->mapper->SetInputData(polyData);
-    context->renderView->ResetCamera();
-    context->renderView->Render();
-}
-
-int main(int argc, char *argv[])
-{
-    MPI_Init(&argc, &argv);
-
-    int rank, procs, wrank;
-
-    MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
-
-    const unsigned int color = 7;
-    MPI_Comm comm;
-    MPI_Comm_split(MPI_COMM_WORLD, color, wrank, &comm);
-
-    MPI_Comm_rank(comm, &rank);
-    MPI_Comm_size(comm, &procs);
-
-    if (argc < 2)
-    {
-        if (rank == 0)
-        {
-            std::cerr << "Too few arguments" << std::endl;
-            std::cout << "Usage: render_isosurface input" << std::endl;
-        }
-        MPI_Abort(MPI_COMM_WORLD, -1);
-    }
-
-    if (procs != 1)
-    {
-        if (rank == 0)
-        {
-            std::cerr << "render_isosurface only supports serial execution" << std::endl;
-        }
-        MPI_Abort(MPI_COMM_WORLD, -1);
-    }
-
-    const std::string input_fname(argv[1]);
-
-    adios2::ADIOS adios("adios2.xml", comm);
-
-    adios2::IO inIO = adios.DeclareIO("IsosurfaceOutput");
-    adios2::Engine reader = inIO.Open(input_fname, adios2::Mode::Read);
-
-    auto mapper = vtkSmartPointer<vtkPolyDataMapper>::New();
-
-    auto actor = vtkSmartPointer<vtkActor>::New();
-    actor->SetMapper(mapper);
-
-    auto renderView = vtkSmartPointer<vtkRenderView>::New();
-    renderView->GetRenderer()->AddActor(actor);
-    renderView->Update();
-
-    auto style = vtkSmartPointer<vtkInteractorStyleSwitch>::New();
-    style->SetCurrentStyleToTrackballCamera();
-
-    auto interactor = renderView->GetInteractor();
-    interactor->Initialize();
-    interactor->SetInteractorStyle(style);
-    interactor->CreateRepeatingTimer(100);
-
-    Context context = {
-        .renderView = renderView,
-        .mapper = mapper,
-        .inIO = &inIO,
-        .reader = &reader,
-    };
-
-    auto timerCallback = vtkSmartPointer<vtkCallbackCommand>::New();
-    timerCallback->SetCallback(timer_func);
-    timerCallback->SetClientData(&context);
-    interactor->AddObserver(vtkCommand::TimerEvent, timerCallback);
-
-    renderView->Render();
-    interactor->Start();
-
-    reader.Close();
-}
diff --git a/examples/simulations/gray-scott/CMakeLists.txt b/examples/simulations/gray-scott/CMakeLists.txt
index 7fed3add60..c39444698a 100644
--- a/examples/simulations/gray-scott/CMakeLists.txt
+++ b/examples/simulations/gray-scott/CMakeLists.txt
@@ -43,27 +43,9 @@ if(ADIOS2_HAVE_MPI)
     "simulation/settings-inline.json"
     "plot/decomp.py" "plot/gsplot.py" "plot/pdfplot.py"
     "ReadMe.md"
-    DESTINATION ${CMAKE_INSTALL_PREFIX}/share/adios2/examples/simulations/gray-scott)
+    DESTINATION ${CMAKE_INSTALL_PREFIX}/share/adios2/gray-scott)
 
-  install(DIRECTORY "catalyst" DESTINATION ${CMAKE_INSTALL_PREFIX}/share/adios2/examples/simulations/gray-scott)
+  install(DIRECTORY "catalyst" DESTINATION ${CMAKE_INSTALL_PREFIX}/share/adios2/gray-scott)
 
-  install(PROGRAMS "cleanup.sh" DESTINATION ${CMAKE_INSTALL_PREFIX}/share/adios2/examples/simulations/gray-scott)
-
-  find_package(VTK QUIET)
-  if(VTK_FOUND)
-    add_executable(adios2_simulations_gray-scott_curvature analysis/curvature.cpp)
-    target_link_libraries(adios2_simulations_gray-scott_curvature adios2::cxx11_mpi MPI::MPI_C ${VTK_LIBRARIES})
-    install(TARGETS adios2_simulations_gray-scott_curvature RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
-
-    add_executable(adios2_simulations_gray-scott_find-blobs analysis/find_blobs.cpp)
-    target_link_libraries(adios2_simulations_gray-scott_find-blobs adios2::cxx11_mpi MPI::MPI_C ${VTK_LIBRARIES})
-    install(TARGETS adios2_simulations_gray-scott_find-blobs RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
-
-    add_executable(adios2_simulations_gray-scott_isosurface analysis/isosurface.cpp)
-    target_link_libraries(adios2_simulations_gray-scott_isosurface adios2::cxx11_mpi MPI::MPI_C ${VTK_LIBRARIES})
-    install(TARGETS adios2_simulations_gray-scott_isosurface RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
-
-    # add_executable(adios2_simulations_gray-scott_render-isosurface plot/render_isosurface.cpp)
-    # target_link_libraries(adios2_simulations_gray-scott_render-isosurface adios2::cxx11_mpi MPI::MPI_C ${VTK_LIBRARIES})
-  endif()
+  install(PROGRAMS "cleanup.sh" DESTINATION ${CMAKE_INSTALL_PREFIX}/share/adios2/gray-scott)
 endif()
diff --git a/examples/simulations/gray-scott/ReadMe.md b/examples/simulations/gray-scott/ReadMe.md
index db615b1f12..4fca5b4348 100644
--- a/examples/simulations/gray-scott/ReadMe.md
+++ b/examples/simulations/gray-scott/ReadMe.md
@@ -17,7 +17,7 @@ care of this aspect).
 From a scratch directory copy the config files from your installation of adios2:
 
 ```
-$ cp -r <adios2-install-prefix>/share/adios2/examples/simulations/gray-scott .
+$ cp -r <adios2-install-prefix>/share/adios2/gray-scott .
 $ cd gray-scott
 $ mpirun -n 4 adios2_simulations_gray-scott settings-files.json
 ========================================
diff --git a/examples/simulations/gray-scott/analysis/curvature.cpp b/examples/simulations/gray-scott/analysis/curvature.cpp
deleted file mode 100644
index 586e2fe7d2..0000000000
--- a/examples/simulations/gray-scott/analysis/curvature.cpp
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Distributed under the OSI-approved Apache License, Version 2.0.  See
- * accompanying file Copyright.txt for details.
- *
- * Analysis code for the Gray-Scott simulation.
- * Computes mean curvature at each point on an isosurface.
- *
- * Keichi Takahashi <keichi@is.naist.jp>
- *
- */
-
-#include <fstream>
-#include <iostream>
-
-#include <adios2.h>
-
-#include <vtkCellArray.h>
-#include <vtkCurvatures.h>
-#include <vtkDoubleArray.h>
-#include <vtkPointData.h>
-#include <vtkPoints.h>
-#include <vtkPolyData.h>
-#include <vtkSmartPointer.h>
-#include <vtkUnstructuredGrid.h>
-
-#include "../common/timer.hpp"
-
-vtkSmartPointer<vtkPolyData> read_mesh(const std::vector<double> &bufPoints,
-                                       const std::vector<int> &bufCells,
-                                       const std::vector<double> &bufNormals)
-{
-    int nPoints = static_cast<int>(bufPoints.size() / 3);
-    int nCells = static_cast<int>(bufCells.size() / 3);
-
-    auto points = vtkSmartPointer<vtkPoints>::New();
-    points->SetNumberOfPoints(nPoints);
-    for (vtkIdType i = 0; i < nPoints; i++)
-    {
-        points->SetPoint(i, &bufPoints[i * 3]);
-    }
-
-    auto polys = vtkSmartPointer<vtkCellArray>::New();
-    for (vtkIdType i = 0; i < nCells; i++)
-    {
-        vtkIdType a = bufCells[i * 3 + 0];
-        vtkIdType b = bufCells[i * 3 + 1];
-        vtkIdType c = bufCells[i * 3 + 2];
-
-        polys->InsertNextCell(3);
-        polys->InsertCellPoint(a);
-        polys->InsertCellPoint(b);
-        polys->InsertCellPoint(c);
-    }
-
-    auto normals = vtkSmartPointer<vtkDoubleArray>::New();
-    normals->SetNumberOfComponents(3);
-    for (vtkIdType i = 0; i < nPoints; i++)
-    {
-        normals->InsertNextTuple(&bufNormals[i * 3]);
-    }
-
-    auto polyData = vtkSmartPointer<vtkPolyData>::New();
-    polyData->SetPoints(points);
-    polyData->SetPolys(polys);
-    polyData->GetPointData()->SetNormals(normals);
-
-    return polyData;
-}
-
-void compute_curvature(const vtkSmartPointer<vtkPolyData> polyData)
-{
-    vtkSmartPointer<vtkCurvatures> curvaturesFilter = vtkSmartPointer<vtkCurvatures>::New();
-    curvaturesFilter->SetInputData(polyData);
-    // curvaturesFilter->SetCurvatureTypeToMinimum();
-    // curvaturesFilter->SetCurvatureTypeToMaximum();
-    // curvaturesFilter->SetCurvatureTypeToGaussian();
-    curvaturesFilter->SetCurvatureTypeToMean();
-    curvaturesFilter->Update();
-}
-
-int main(int argc, char *argv[])
-{
-    int provided;
-    MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
-
-    int rank, procs, wrank;
-
-    MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
-
-    const unsigned int color = 7;
-    MPI_Comm comm;
-    MPI_Comm_split(MPI_COMM_WORLD, color, wrank, &comm);
-
-    MPI_Comm_rank(comm, &rank);
-    MPI_Comm_size(comm, &procs);
-
-    if (argc < 3)
-    {
-        if (rank == 0)
-        {
-            std::cerr << "Too few arguments" << std::endl;
-            std::cout << "Usage: curvature input output" << std::endl;
-        }
-        MPI_Abort(MPI_COMM_WORLD, -1);
-    }
-
-    const std::string input_fname(argv[1]);
-    const std::string output_fname(argv[2]);
-
-    adios2::ADIOS adios("adios2.xml", comm);
-
-    adios2::IO inIO = adios.DeclareIO("IsosurfaceOutput");
-    adios2::Engine reader = inIO.Open(input_fname, adios2::Mode::Read);
-
-    adios2::IO outIO = adios.DeclareIO("CurvatureOutput");
-    adios2::Engine writer = outIO.Open(output_fname, adios2::Mode::Write);
-
-    std::vector<double> points;
-    std::vector<int> cells;
-    std::vector<double> normals;
-    int step;
-
-#ifdef ENABLE_TIMERS
-    Timer timer_total;
-    Timer timer_read;
-    Timer timer_compute;
-
-    std::ostringstream log_fname;
-    log_fname << "compute_curvature_pe_" << rank << ".log";
-
-    std::ofstream log(log_fname.str());
-    log << "step\ttotal_curv\tread_curv\tcompute_curv" << std::endl;
-#endif
-
-    while (true)
-    {
-#ifdef ENABLE_TIMERS
-        MPI_Barrier(comm);
-        timer_total.start();
-        timer_read.start();
-#endif
-
-        adios2::StepStatus status = reader.BeginStep();
-
-        if (status != adios2::StepStatus::OK)
-        {
-            break;
-        }
-
-        auto varPoint = inIO.InquireVariable<double>("point");
-        auto varCell = inIO.InquireVariable<int>("cell");
-        auto varNormal = inIO.InquireVariable<double>("normal");
-        auto varStep = inIO.InquireVariable<int>("step");
-
-        if (varPoint.Shape().size() > 0 || varCell.Shape().size() > 0)
-        {
-            varPoint.SetSelection({{0, 0}, {varPoint.Shape()[0], varPoint.Shape()[1]}});
-            varCell.SetSelection({{0, 0}, {varCell.Shape()[0], varCell.Shape()[1]}});
-            varNormal.SetSelection({{0, 0}, {varNormal.Shape()[0], varNormal.Shape()[1]}});
-
-            reader.Get<double>(varPoint, points);
-            reader.Get<int>(varCell, cells);
-            reader.Get<double>(varNormal, normals);
-        }
-
-        reader.Get<int>(varStep, &step);
-
-        reader.EndStep();
-
-#ifdef ENABLE_TIMERS
-        double time_read = timer_read.stop();
-        MPI_Barrier(comm);
-        timer_compute.start();
-#endif
-
-        vtkSmartPointer<vtkPolyData> polyData = read_mesh(points, cells, normals);
-        compute_curvature(polyData);
-
-        if (!rank)
-        {
-            std::cout << "compute_curvature at step " << step << std::endl;
-        }
-
-#ifdef ENABLE_TIMERS
-        double time_compute = timer_compute.stop();
-        double time_step = timer_total.stop();
-        MPI_Barrier(comm);
-
-        log << step << "\t" << time_step << "\t" << time_read << "\t" << time_compute << std::endl;
-#endif
-    }
-
-#ifdef ENABLE_TIMERS
-    log << "total\t" << timer_total.elapsed() << "\t" << timer_read.elapsed() << "\t"
-        << timer_compute.elapsed() << "\t" << std::endl;
-
-    log.close();
-#endif
-
-    writer.Close();
-    reader.Close();
-}
diff --git a/examples/simulations/gray-scott/analysis/find_blobs.cpp b/examples/simulations/gray-scott/analysis/find_blobs.cpp
deleted file mode 100644
index 8b5c8c54b2..0000000000
--- a/examples/simulations/gray-scott/analysis/find_blobs.cpp
+++ /dev/null
@@ -1,245 +0,0 @@
-/*
- * Distributed under the OSI-approved Apache License, Version 2.0.  See
- * accompanying file Copyright.txt for details.
- *
- * Analysis code for the Gray-Scott simulation.
- * Reads iso-surface mesh data and detects connected components.
- * Counts the total number of connected components and measures the surface
- * area of each component.
- *
- * Keichi Takahashi <keichi@is.naist.jp>
- *
- */
-
-#include <iostream>
-
-#include <adios2.h>
-
-#include <vtkCellArray.h>
-#include <vtkConnectivityFilter.h>
-#include <vtkDataSetSurfaceFilter.h>
-#include <vtkDoubleArray.h>
-#include <vtkMassProperties.h>
-#include <vtkPointData.h>
-#include <vtkPoints.h>
-#include <vtkPolyData.h>
-#include <vtkSmartPointer.h>
-#include <vtkThreshold.h>
-#include <vtkUnstructuredGrid.h>
-
-#include "../common/timer.hpp"
-
-vtkSmartPointer<vtkPolyData> read_mesh(const std::vector<double> &bufPoints,
-                                       const std::vector<int> &bufCells,
-                                       const std::vector<double> &bufNormals)
-{
-    int nPoints = static_cast<int>(bufPoints.size() / 3);
-    int nCells = static_cast<int>(bufCells.size() / 3);
-
-    auto points = vtkSmartPointer<vtkPoints>::New();
-    points->SetNumberOfPoints(nPoints);
-    for (vtkIdType i = 0; i < nPoints; i++)
-    {
-        points->SetPoint(i, &bufPoints[i * 3]);
-    }
-
-    auto polys = vtkSmartPointer<vtkCellArray>::New();
-    for (vtkIdType i = 0; i < nCells; i++)
-    {
-        vtkIdType a = bufCells[i * 3 + 0];
-        vtkIdType b = bufCells[i * 3 + 1];
-        vtkIdType c = bufCells[i * 3 + 2];
-
-        polys->InsertNextCell(3);
-        polys->InsertCellPoint(a);
-        polys->InsertCellPoint(b);
-        polys->InsertCellPoint(c);
-    }
-
-    auto normals = vtkSmartPointer<vtkDoubleArray>::New();
-    normals->SetNumberOfComponents(3);
-    for (vtkIdType i = 0; i < nPoints; i++)
-    {
-        normals->InsertNextTuple(&bufNormals[i * 3]);
-    }
-
-    auto polyData = vtkSmartPointer<vtkPolyData>::New();
-    polyData->SetPoints(points);
-    polyData->SetPolys(polys);
-    polyData->GetPointData()->SetNormals(normals);
-
-    return polyData;
-}
-
-void find_blobs(const vtkSmartPointer<vtkPolyData> polyData)
-{
-    auto connectivityFilter = vtkSmartPointer<vtkConnectivityFilter>::New();
-    connectivityFilter->SetInputData(polyData);
-    connectivityFilter->SetExtractionModeToAllRegions();
-    connectivityFilter->ColorRegionsOn();
-    connectivityFilter->Update();
-
-    int nBlobs = connectivityFilter->GetNumberOfExtractedRegions();
-
-    std::cout << "Found " << nBlobs << " blobs" << std::endl;
-
-    auto threshold = vtkSmartPointer<vtkThreshold>::New();
-    auto massProperties = vtkSmartPointer<vtkMassProperties>::New();
-    auto surfaceFilter = vtkSmartPointer<vtkDataSetSurfaceFilter>::New();
-
-    threshold->SetInputConnection(connectivityFilter->GetOutputPort());
-    surfaceFilter->SetInputConnection(threshold->GetOutputPort());
-    massProperties->SetInputConnection(surfaceFilter->GetOutputPort());
-
-    for (int i = 0; i < nBlobs; i++)
-    {
-        threshold->SetThresholdFunction(vtkThreshold::THRESHOLD_BETWEEN);
-        threshold->SetLowerThreshold(i);
-        threshold->SetUpperThreshold(i);
-
-        std::cout << "Surface area of blob #" << i << " is " << massProperties->GetSurfaceArea()
-                  << std::endl;
-    }
-}
-
-void find_largest_blob(const vtkSmartPointer<vtkPolyData> polyData)
-{
-    auto connectivityFilter = vtkSmartPointer<vtkConnectivityFilter>::New();
-    connectivityFilter->SetInputData(polyData);
-    connectivityFilter->SetExtractionModeToLargestRegion();
-    connectivityFilter->Update();
-
-    auto massProperties = vtkSmartPointer<vtkMassProperties>::New();
-    auto surfaceFilter = vtkSmartPointer<vtkDataSetSurfaceFilter>::New();
-
-    surfaceFilter->SetInputConnection(connectivityFilter->GetOutputPort());
-    massProperties->SetInputConnection(surfaceFilter->GetOutputPort());
-
-    std::cout << "Surface area of largest blob is " << massProperties->GetSurfaceArea()
-              << std::endl;
-}
-
-int main(int argc, char *argv[])
-{
-    int provided;
-    MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
-
-    int rank, procs, wrank;
-
-    MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
-
-    const unsigned int color = 6;
-    MPI_Comm comm;
-    MPI_Comm_split(MPI_COMM_WORLD, color, wrank, &comm);
-
-    MPI_Comm_rank(comm, &rank);
-    MPI_Comm_size(comm, &procs);
-
-    if (argc < 2)
-    {
-        if (rank == 0)
-        {
-            std::cerr << "Too few arguments" << std::endl;
-            std::cout << "Usage: find_blobs input" << std::endl;
-        }
-        MPI_Abort(MPI_COMM_WORLD, -1);
-    }
-
-    if (procs != 1)
-    {
-        if (rank == 0)
-        {
-            std::cerr << "find_blobs only supports serial execution" << std::endl;
-        }
-        MPI_Abort(MPI_COMM_WORLD, -1);
-    }
-
-    const std::string input_fname(argv[1]);
-
-    adios2::ADIOS adios("adios2.xml", comm);
-
-    adios2::IO inIO = adios.DeclareIO("IsosurfaceOutput");
-    adios2::Engine reader = inIO.Open(input_fname, adios2::Mode::Read);
-
-    std::vector<double> points;
-    std::vector<int> cells;
-    std::vector<double> normals;
-    int step;
-
-#ifdef ENABLE_TIMERS
-    Timer timer_total;
-    Timer timer_compute;
-    Timer timer_read;
-
-    std::ostringstream log_fname;
-    log_fname << "find_blobs_pe_" << rank << ".log";
-
-    std::ofstream log(log_fname.str());
-    log << "step\ttotal_blobs\tread_blobs\tcompute_blobs" << std::endl;
-#endif
-
-    while (true)
-    {
-#ifdef ENABLE_TIMERS
-        MPI_Barrier(comm);
-        timer_total.start();
-        timer_read.start();
-#endif
-
-        adios2::StepStatus status = reader.BeginStep();
-
-        if (status != adios2::StepStatus::OK)
-        {
-            break;
-        }
-
-        auto varPoint = inIO.InquireVariable<double>("point");
-        auto varCell = inIO.InquireVariable<int>("cell");
-        auto varNormal = inIO.InquireVariable<double>("normal");
-        auto varStep = inIO.InquireVariable<int>("step");
-
-        if (varPoint.Shape().size() > 0 || varCell.Shape().size() > 0)
-        {
-            varPoint.SetSelection({{0, 0}, {varPoint.Shape()[0], varPoint.Shape()[1]}});
-            varCell.SetSelection({{0, 0}, {varCell.Shape()[0], varCell.Shape()[1]}});
-            varNormal.SetSelection({{0, 0}, {varNormal.Shape()[0], varNormal.Shape()[1]}});
-
-            reader.Get<double>(varPoint, points);
-            reader.Get<int>(varCell, cells);
-            reader.Get<double>(varNormal, normals);
-        }
-
-        reader.Get<int>(varStep, &step);
-
-        reader.EndStep();
-
-#ifdef ENABLE_TIMERS
-        double time_read = timer_read.stop();
-        MPI_Barrier(comm);
-        timer_compute.start();
-#endif
-
-        std::cout << "find_blobs at step " << step << std::endl;
-
-        auto polyData = read_mesh(points, cells, normals);
-        // find_blobs(polyData);
-        find_largest_blob(polyData);
-
-#ifdef ENABLE_TIMERS
-        double time_compute = timer_compute.stop();
-        double time_step = timer_total.stop();
-        MPI_Barrier(comm);
-
-        log << step << "\t" << time_step << "\t" << time_read << "\t" << time_compute << std::endl;
-#endif
-    }
-
-#ifdef ENABLE_TIMERS
-    log << "total\t" << timer_total.elapsed() << "\t" << timer_read.elapsed() << "\t"
-        << timer_compute.elapsed() << std::endl;
-
-    log.close();
-#endif
-
-    reader.Close();
-}
diff --git a/examples/simulations/gray-scott/analysis/isosurface.cpp b/examples/simulations/gray-scott/analysis/isosurface.cpp
deleted file mode 100644
index e02a507aaa..0000000000
--- a/examples/simulations/gray-scott/analysis/isosurface.cpp
+++ /dev/null
@@ -1,346 +0,0 @@
-/*
- * Distributed under the OSI-approved Apache License, Version 2.0.  See
- * accompanying file Copyright.txt for details.
- *
- * Analysis code for the Gray-Scott simulation.
- * Reads variable U and and extracts the iso-surface using VTK.
- * Writes the extracted iso-surface using ADIOS.
- *
- * Keichi Takahashi <keichi@is.naist.jp>
- *
- */
-
-#include <iostream>
-#include <sstream>
-
-#include <adios2.h>
-
-#include <vtkAppendPolyData.h>
-#include <vtkImageData.h>
-#include <vtkImageImport.h>
-#include <vtkMarchingCubes.h>
-#include <vtkPointData.h>
-#include <vtkPolyData.h>
-#include <vtkSmartPointer.h>
-#include <vtkXMLPolyDataWriter.h>
-
-#include "../common/timer.hpp"
-
-vtkSmartPointer<vtkPolyData> compute_isosurface(const adios2::Variable<double> &varField,
-                                                const std::vector<double> &field, double isovalue)
-{
-    // Convert field values to vtkImageData
-    auto importer = vtkSmartPointer<vtkImageImport>::New();
-    importer->SetDataSpacing(1, 1, 1);
-    importer->SetDataOrigin(static_cast<double>(varField.Start()[2]),
-                            static_cast<double>(varField.Start()[1]),
-                            static_cast<double>(varField.Start()[0]));
-    importer->SetWholeExtent(0, static_cast<int>(varField.Count()[2] - 1), 0,
-                             static_cast<int>(varField.Count()[1] - 1), 0,
-                             static_cast<int>(varField.Count()[0] - 1));
-    importer->SetDataExtentToWholeExtent();
-    importer->SetDataScalarTypeToDouble();
-    importer->SetNumberOfScalarComponents(1);
-    importer->SetImportVoidPointer(const_cast<double *>(field.data()));
-
-    // Run the marching cubes algorithm
-    auto mcubes = vtkSmartPointer<vtkMarchingCubes>::New();
-    mcubes->SetInputConnection(importer->GetOutputPort());
-    mcubes->ComputeNormalsOn();
-    mcubes->SetValue(0, isovalue);
-    mcubes->Update();
-
-    // Return the isosurface as vtkPolyData
-    return mcubes->GetOutput();
-}
-
-void write_vtk(const std::string &fname, const vtkSmartPointer<vtkPolyData> polyData)
-{
-    auto writer = vtkSmartPointer<vtkXMLPolyDataWriter>::New();
-    writer->SetFileName(fname.c_str());
-    writer->SetInputData(polyData);
-    writer->Write();
-}
-
-void write_adios(adios2::Engine &writer, const vtkSmartPointer<vtkPolyData> polyData,
-                 adios2::Variable<double> &varPoint, adios2::Variable<int> &varCell,
-                 adios2::Variable<double> &varNormal, adios2::Variable<int> &varOutStep, int step,
-                 MPI_Comm comm)
-{
-    int numCells = static_cast<int>(polyData->GetNumberOfPolys());
-    int numPoints = static_cast<int>(polyData->GetNumberOfPoints());
-    int rank;
-
-    MPI_Comm_rank(comm, &rank);
-
-    std::vector<double> points(static_cast<size_t>(numPoints * 3));
-    std::vector<double> normals(static_cast<size_t>(numPoints * 3));
-    std::vector<int> cells(static_cast<size_t>(numCells * 3)); // Assumes that cells are triangles
-
-    double coords[3];
-
-    auto cellArray = polyData->GetPolys();
-
-    cellArray->InitTraversal();
-
-    // Iterate through cells
-    for (vtkIdType i = 0; i < polyData->GetNumberOfPolys(); i++)
-    {
-        auto idList = vtkSmartPointer<vtkIdList>::New();
-
-        cellArray->GetNextCell(idList);
-
-        // Iterate through points of a cell
-        for (vtkIdType j = 0; j < idList->GetNumberOfIds(); j++)
-        {
-            auto id = idList->GetId(j);
-
-            cells[i * 3 + j] = static_cast<int>(id);
-
-            polyData->GetPoint(id, coords);
-
-            points[id * 3 + 0] = coords[0];
-            points[id * 3 + 1] = coords[1];
-            points[id * 3 + 2] = coords[2];
-        }
-    }
-
-    auto normalArray = polyData->GetPointData()->GetNormals();
-
-    // Extract normals
-    for (int i = 0; i < normalArray->GetNumberOfTuples(); i++)
-    {
-        normalArray->GetTuple(i, coords);
-
-        normals[i * 3 + 0] = coords[0];
-        normals[i * 3 + 1] = coords[1];
-        normals[i * 3 + 2] = coords[2];
-    }
-
-    int totalPoints, offsetPoints;
-    MPI_Allreduce(&numPoints, &totalPoints, 1, MPI_INT, MPI_SUM, comm);
-    MPI_Scan(&numPoints, &offsetPoints, 1, MPI_INT, MPI_SUM, comm);
-
-    writer.BeginStep();
-
-    varPoint.SetShape(
-        {static_cast<size_t>(totalPoints), static_cast<size_t>(totalPoints > 0 ? 3 : 0)});
-    varPoint.SetSelection(
-        {{static_cast<size_t>(offsetPoints - numPoints), 0},
-         {static_cast<size_t>(numPoints), static_cast<size_t>(numPoints > 0 ? 3 : 0)}});
-
-    varNormal.SetShape(varPoint.Shape());
-    varNormal.SetSelection({varPoint.Start(), varPoint.Count()});
-
-    if (numPoints)
-    {
-        writer.Put(varPoint, points.data());
-        writer.Put(varNormal, normals.data());
-    }
-
-    int totalCells, offsetCells;
-    MPI_Allreduce(&numCells, &totalCells, 1, MPI_INT, MPI_SUM, comm);
-    MPI_Scan(&numCells, &offsetCells, 1, MPI_INT, MPI_SUM, comm);
-
-    for (int &cell : cells)
-    {
-        cell += (offsetPoints - numPoints);
-    }
-
-    varCell.SetShape(
-        {static_cast<size_t>(totalCells), static_cast<size_t>(totalCells > 0 ? 3 : 0)});
-    varCell.SetSelection(
-        {{static_cast<size_t>(offsetCells - numCells), 0},
-         {static_cast<size_t>(numCells), static_cast<size_t>(numCells > 0 ? 3 : 0)}});
-
-    if (numCells)
-    {
-        writer.Put(varCell, cells.data());
-    }
-
-    if (!rank)
-    {
-        std::cout << "isosurface at step " << step << " writing out " << totalCells << " cells and "
-                  << totalPoints << " points" << std::endl;
-    }
-
-    writer.Put(varOutStep, step);
-
-    writer.EndStep();
-}
-
-int main(int argc, char *argv[])
-{
-    int provided;
-    MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
-
-    int rank, procs, wrank;
-
-    MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
-
-    const unsigned int color = 5;
-    MPI_Comm comm;
-    MPI_Comm_split(MPI_COMM_WORLD, color, wrank, &comm);
-
-    MPI_Comm_rank(comm, &rank);
-    MPI_Comm_size(comm, &procs);
-
-    int dims[3] = {0};
-    MPI_Dims_create(procs, 3, dims);
-    size_t npx = dims[0];
-    size_t npy = dims[1];
-    size_t npz = dims[2];
-
-    int coords[3] = {0};
-    int periods[3] = {0};
-    MPI_Comm cart_comm;
-    MPI_Cart_create(comm, 3, dims, periods, 0, &cart_comm);
-    MPI_Cart_coords(cart_comm, rank, 3, coords);
-    size_t px = coords[0];
-    size_t py = coords[1];
-    size_t pz = coords[2];
-
-    if (argc < 4)
-    {
-        if (rank == 0)
-        {
-            std::cerr << "Too few arguments" << std::endl;
-            std::cout << "Usage: isosurface input output isovalues..." << std::endl;
-        }
-        MPI_Abort(MPI_COMM_WORLD, -1);
-    }
-
-    const std::string input_fname(argv[1]);
-    const std::string output_fname(argv[2]);
-
-    std::vector<double> isovalues;
-    for (int i = 3; i < argc; i++)
-    {
-        isovalues.push_back(std::stod(argv[i]));
-    }
-
-    adios2::ADIOS adios("adios2.xml", comm);
-
-    adios2::IO inIO = adios.DeclareIO("SimulationOutput");
-    adios2::Engine reader = inIO.Open(input_fname, adios2::Mode::Read);
-
-    adios2::IO outIO = adios.DeclareIO("IsosurfaceOutput");
-    adios2::Engine writer = outIO.Open(output_fname, adios2::Mode::Write);
-
-    auto varPoint = outIO.DefineVariable<double>("point", {1, 3}, {0, 0}, {1, 3});
-    auto varCell = outIO.DefineVariable<int>("cell", {1, 3}, {0, 0}, {1, 3});
-    auto varNormal = outIO.DefineVariable<double>("normal", {1, 3}, {0, 0}, {1, 3});
-    auto varOutStep = outIO.DefineVariable<int>("step");
-
-    std::vector<double> u;
-    int step;
-
-#ifdef ENABLE_TIMERS
-    Timer timer_total;
-    Timer timer_read;
-    Timer timer_compute;
-    Timer timer_write;
-
-    std::ostringstream log_fname;
-    log_fname << "isosurface_pe_" << rank << ".log";
-
-    std::ofstream log(log_fname.str());
-    log << "step\ttotal_iso\tread_iso\tcompute_iso\twrite_iso" << std::endl;
-#endif
-
-    while (true)
-    {
-#ifdef ENABLE_TIMERS
-        MPI_Barrier(comm);
-        timer_total.start();
-        timer_read.start();
-#endif
-
-        adios2::StepStatus status = reader.BeginStep();
-
-        if (status != adios2::StepStatus::OK)
-        {
-            break;
-        }
-
-        adios2::Variable<double> varU = inIO.InquireVariable<double>("U");
-        const adios2::Variable<int> varStep = inIO.InquireVariable<int>("step");
-
-        adios2::Dims shape = varU.Shape();
-
-        size_t size_x = (shape[0] + npx - 1) / npx;
-        size_t size_y = (shape[1] + npy - 1) / npy;
-        size_t size_z = (shape[2] + npz - 1) / npz;
-
-        size_t offset_x = size_x * px;
-        size_t offset_y = size_y * py;
-        size_t offset_z = size_z * pz;
-
-        if (px == npx - 1)
-        {
-            size_x -= size_x * npx - shape[0];
-        }
-        if (py == npy - 1)
-        {
-            size_y -= size_y * npy - shape[1];
-        }
-        if (pz == npz - 1)
-        {
-            size_z -= size_z * npz - shape[2];
-        }
-
-        varU.SetSelection({{offset_x, offset_y, offset_z},
-                           {size_x + (px != npx - 1 ? 1 : 0), size_y + (py != npy - 1 ? 1 : 0),
-                            size_z + (pz != npz - 1 ? 1 : 0)}});
-
-        reader.Get<double>(varU, u);
-        reader.Get<int>(varStep, step);
-        reader.EndStep();
-
-#ifdef ENABLE_TIMERS
-        double time_read = timer_read.stop();
-        MPI_Barrier(comm);
-        timer_compute.start();
-#endif
-
-        auto appendFilter = vtkSmartPointer<vtkAppendPolyData>::New();
-
-        for (const auto isovalue : isovalues)
-        {
-            auto polyData = compute_isosurface(varU, u, isovalue);
-            appendFilter->AddInputData(polyData);
-        }
-
-        appendFilter->Update();
-
-#ifdef ENABLE_TIMERS
-        double time_compute = timer_compute.stop();
-        MPI_Barrier(comm);
-        timer_write.start();
-#endif
-
-        write_adios(writer, appendFilter->GetOutput(), varPoint, varCell, varNormal, varOutStep,
-                    step, comm);
-
-#ifdef ENABLE_TIMERS
-        double time_write = timer_write.stop();
-        double time_step = timer_total.stop();
-        MPI_Barrier(comm);
-
-        log << step << "\t" << time_step << "\t" << time_read << "\t" << time_compute << "\t"
-            << time_write << std::endl;
-#endif
-    }
-
-#ifdef ENABLE_TIMERS
-    log << "total\t" << timer_total.elapsed() << "\t" << timer_read.elapsed() << "\t"
-        << timer_compute.elapsed() << "\t" << timer_write.elapsed() << std::endl;
-
-    log.close();
-#endif
-
-    writer.Close();
-    reader.Close();
-
-    MPI_Finalize();
-}
diff --git a/examples/simulations/gray-scott/plot/render_isosurface.cpp b/examples/simulations/gray-scott/plot/render_isosurface.cpp
deleted file mode 100644
index 30a464b831..0000000000
--- a/examples/simulations/gray-scott/plot/render_isosurface.cpp
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * Distributed under the OSI-approved Apache License, Version 2.0.  See
- * accompanying file Copyright.txt for details.
- *
- * Visualization code for the Gray-Scott simulation.
- * Reads and renders iso-surface mesh data.
- *
- * Keichi Takahashi <keichi@is.naist.jp>
- *
- */
-
-#include <iostream>
-
-#include <adios2.h>
-
-#include <vtkActor.h>
-#include <vtkAutoInit.h>
-#include <vtkCallbackCommand.h>
-#include <vtkCellArray.h>
-#include <vtkDoubleArray.h>
-#include <vtkInteractorStyleSwitch.h>
-#include <vtkPointData.h>
-#include <vtkPoints.h>
-#include <vtkPolyData.h>
-#include <vtkPolyDataMapper.h>
-#include <vtkProperty.h>
-#include <vtkRenderView.h>
-#include <vtkRenderWindow.h>
-#include <vtkRenderWindowInteractor.h>
-#include <vtkRenderer.h>
-#include <vtkSmartPointer.h>
-
-VTK_MODULE_INIT(vtkRenderingOpenGL2);
-
-typedef struct
-{
-    vtkRenderView *renderView;
-    vtkPolyDataMapper *mapper;
-    adios2::IO *inIO;
-    adios2::Engine *reader;
-} Context;
-
-vtkSmartPointer<vtkPolyData> read_mesh(const std::vector<double> &bufPoints,
-                                       const std::vector<int> &bufCells,
-                                       const std::vector<double> &bufNormals)
-{
-    int nPoints = bufPoints.size() / 3;
-    int nCells = bufCells.size() / 3;
-
-    auto points = vtkSmartPointer<vtkPoints>::New();
-    points->SetNumberOfPoints(nPoints);
-    for (vtkIdType i = 0; i < nPoints; i++)
-    {
-        points->SetPoint(i, &bufPoints[i * 3]);
-    }
-
-    auto polys = vtkSmartPointer<vtkCellArray>::New();
-    for (vtkIdType i = 0; i < nCells; i++)
-    {
-        vtkIdType a = bufCells[i * 3 + 0];
-        vtkIdType b = bufCells[i * 3 + 1];
-        vtkIdType c = bufCells[i * 3 + 2];
-
-        polys->InsertNextCell(3);
-        polys->InsertCellPoint(a);
-        polys->InsertCellPoint(b);
-        polys->InsertCellPoint(c);
-    }
-
-    auto normals = vtkSmartPointer<vtkDoubleArray>::New();
-    normals->SetNumberOfComponents(3);
-    for (vtkIdType i = 0; i < nPoints; i++)
-    {
-        normals->InsertNextTuple(&bufNormals[i * 3]);
-    }
-
-    auto polyData = vtkSmartPointer<vtkPolyData>::New();
-    polyData->SetPoints(points);
-    polyData->SetPolys(polys);
-    polyData->GetPointData()->SetNormals(normals);
-
-    return polyData;
-}
-
-void timer_func(vtkObject *object, unsigned long eid, void *clientdata, void *calldata)
-{
-    Context *context = static_cast<Context *>(clientdata);
-
-    std::vector<double> points;
-    std::vector<int> cells;
-    std::vector<double> normals;
-    int step;
-
-    adios2::StepStatus status = context->reader->BeginStep();
-
-    if (status != adios2::StepStatus::OK)
-    {
-        return;
-    }
-
-    auto varPoint = context->inIO->InquireVariable<double>("point");
-    auto varCell = context->inIO->InquireVariable<int>("cell");
-    auto varNormal = context->inIO->InquireVariable<double>("normal");
-    auto varStep = context->inIO->InquireVariable<int>("step");
-
-    if (varPoint.Shape().size() > 0 || varCell.Shape().size() > 0)
-    {
-        varPoint.SetSelection({{0, 0}, {varPoint.Shape()[0], varPoint.Shape()[1]}});
-        varCell.SetSelection({{0, 0}, {varCell.Shape()[0], varCell.Shape()[1]}});
-        varNormal.SetSelection({{0, 0}, {varNormal.Shape()[0], varNormal.Shape()[1]}});
-
-        context->reader->Get<double>(varPoint, points);
-        context->reader->Get<int>(varCell, cells);
-        context->reader->Get<double>(varNormal, normals);
-    }
-
-    context->reader->Get<int>(varStep, &step);
-
-    context->reader->EndStep();
-
-    std::cout << "render_isosurface at step " << step << std::endl;
-
-    vtkSmartPointer<vtkPolyData> polyData = read_mesh(points, cells, normals);
-
-    context->mapper->SetInputData(polyData);
-    context->renderView->ResetCamera();
-    context->renderView->Render();
-}
-
-int main(int argc, char *argv[])
-{
-    MPI_Init(&argc, &argv);
-
-    int rank, procs, wrank;
-
-    MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
-
-    const unsigned int color = 7;
-    MPI_Comm comm;
-    MPI_Comm_split(MPI_COMM_WORLD, color, wrank, &comm);
-
-    MPI_Comm_rank(comm, &rank);
-    MPI_Comm_size(comm, &procs);
-
-    if (argc < 2)
-    {
-        if (rank == 0)
-        {
-            std::cerr << "Too few arguments" << std::endl;
-            std::cout << "Usage: render_isosurface input" << std::endl;
-        }
-        MPI_Abort(MPI_COMM_WORLD, -1);
-    }
-
-    if (procs != 1)
-    {
-        if (rank == 0)
-        {
-            std::cerr << "render_isosurface only supports serial execution" << std::endl;
-        }
-        MPI_Abort(MPI_COMM_WORLD, -1);
-    }
-
-    const std::string input_fname(argv[1]);
-
-    adios2::ADIOS adios("adios2.xml", comm);
-
-    adios2::IO inIO = adios.DeclareIO("IsosurfaceOutput");
-    adios2::Engine reader = inIO.Open(input_fname, adios2::Mode::Read);
-
-    auto mapper = vtkSmartPointer<vtkPolyDataMapper>::New();
-
-    auto actor = vtkSmartPointer<vtkActor>::New();
-    actor->SetMapper(mapper);
-
-    auto renderView = vtkSmartPointer<vtkRenderView>::New();
-    renderView->GetRenderer()->AddActor(actor);
-    renderView->Update();
-
-    auto style = vtkSmartPointer<vtkInteractorStyleSwitch>::New();
-    style->SetCurrentStyleToTrackballCamera();
-
-    auto interactor = renderView->GetInteractor();
-    interactor->Initialize();
-    interactor->SetInteractorStyle(style);
-    interactor->CreateRepeatingTimer(100);
-
-    Context context = {
-        .renderView = renderView,
-        .mapper = mapper,
-        .inIO = &inIO,
-        .reader = &reader,
-    };
-
-    auto timerCallback = vtkSmartPointer<vtkCallbackCommand>::New();
-    timerCallback->SetCallback(timer_func);
-    timerCallback->SetClientData(&context);
-    interactor->AddObserver(vtkCommand::TimerEvent, timerCallback);
-
-    renderView->Render();
-    interactor->Start();
-
-    reader.Close();
-}
diff --git a/examples/simulations/heatTransfer/ReadMe.md b/examples/simulations/heatTransfer/ReadMe.md
index 8693d00265..b1a45a24e7 100644
--- a/examples/simulations/heatTransfer/ReadMe.md
+++ b/examples/simulations/heatTransfer/ReadMe.md
@@ -22,6 +22,13 @@ an application to the ADIOS2 library for its IO.
 
 #### Example
 
+##### 1. Build the example
+$ mkdir build
+$ cd build
+$ cmake -DCMAKE_PREFIX_PATH=<adios2-install-dir> -DCMAKE_INSTALL_PREFIX=install ..
+$ make -j 8
+$ cd ..
+
 ##### 1. Produce an output
 
 ```
@@ -44,24 +51,23 @@ The adios1, ph5 and hdf5 versions of the example do not use XML config files, so
 argument.
 
 ```
-$ mpirun -np 12 ./bin/adios2_simulations_heatTransferWrite ../examples/simulations/heatTransfer/heat_file.xml heat.bp 4
-3 5 10 10 10
-$ mpirun -np 12 ./bin/adios2_simulations_heatTransferWrite ../examples/simulations/heatTransfer/heat_file.xml heat.h5 4
-3 5 10 10 10
+$ mpirun -np 12 ./build/write/adios2_simulations_heatTransferWrite heat_file.xml heat.bp 4 3 5 10 10 10
+$ mpirun -np 12 ./build/write/adios2_simulations_heatTransferWrite heat_hdf5.xml heat.h5 4 3 5 10 10 10
 ```
 
 ##### 2. Read the output step-by-step and print data into text files (data.<rank> per reader process)
 
 ```
-Reader Usage:   heatRead config input N M
-  config: XML config file to use
-  input:  name of input data file/stream
-  N:      number of processes in X dimension
-  M:      number of processes in Y dimension
+Reader Usage: heatRead  config  input  output N  M
+  config:  XML config file to use
+  input:   name of input data file/stream
+  output:  name of output data file/stream
+  N:       number of processes in X dimension
+  M:       number of processes in Y dimension
 ```
 
 ```
-$ mpirun -np 2 ./bin/heatTransfer_read ../examples/simulations/heatTransfer/heat_file.xml heat 2 1
+$ mpirun -np 2 ./build/read/adios2_simulations_heatTransferRead  heat_file.xml heat.bp read.bp 2 1
 ```
 
 ##### Notes:
diff --git a/plugins/operators/CMakeLists.txt b/plugins/operators/CMakeLists.txt
index 44f9a0f2a2..abea93e0dc 100644
--- a/plugins/operators/CMakeLists.txt
+++ b/plugins/operators/CMakeLists.txt
@@ -8,6 +8,9 @@ if(ADIOS2_HAVE_Sodium)
     EncryptionOperator.cpp
   )
   target_link_libraries(EncryptionOperator adios2_core sodium)
+  if (NOT CMAKE_SKIP_INSTALL_RPATH)
+    set_target_properties(EncryptionOperator PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE)
+  endif()
   install(TARGETS EncryptionOperator  EXPORT adios2Exports
     RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT adios2_core-runtime
     LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT adios2_core-libraries NAMELINK_COMPONENT adios2_core-development
diff --git a/scripts/ci/circle/postCDashStatus.sh b/scripts/ci/circle/postCDashStatus.sh
index 800b192d77..78ec1e2228 100755
--- a/scripts/ci/circle/postCDashStatus.sh
+++ b/scripts/ci/circle/postCDashStatus.sh
@@ -4,7 +4,7 @@ USER=${STATUS_ROBOT_NAME}
 TOKEN=${STATUS_ROBOT_KEY}
 COMMIT=${CIRCLE_SHA1}
 CDASH_STATUS_CONTEXT="cdash"
-SOURCE_DIR="$(readlink -f ${CIRCLE_WORKING_DIRECTORY}/source)"
+SOURCE_DIR="$(readlink -f "${CIRCLE_WORKING_DIRECTORY}"/source)"
 
 build_status_body() {
   cat <<EOF
@@ -17,9 +17,10 @@ build_status_body() {
 EOF
 }
 
-PYTHON_SCRIPT="${SOURCE_DIR}/scripts/ci/findStatus.py"
-curl -u "${USER}:${TOKEN}" "${API_BASE}/commits/${COMMIT}/statuses" | python3 ${PYTHON_SCRIPT} --context ${CDASH_STATUS_CONTEXT}
-if [ $? -ne 0 ]
+PYTHON_SCRIPT="${SOURCE_DIR}/scripts/ci/circle/findStatus.py"
+curl -u "${USER}:${TOKEN}" "${API_BASE}/commits/${COMMIT}/statuses" | python3 "${PYTHON_SCRIPT}" --context ${CDASH_STATUS_CONTEXT}
+exit_status=$?
+if [ "$exit_status" -ne 0 ]
 then
   echo "Need to post a status for context ${CDASH_STATUS_CONTEXT}"
   postBody="$(build_status_body)"
diff --git a/scripts/ci/circle/run.sh b/scripts/ci/circle/run.sh
index c3e045c8e6..ed782db659 100755
--- a/scripts/ci/circle/run.sh
+++ b/scripts/ci/circle/run.sh
@@ -1,5 +1,6 @@
 #!/bin/bash --login
 
+# shellcheck source=/dev/null
 . /etc/profile.d/modules.sh
 
 # Parse the branch name used by the PR
@@ -8,8 +9,9 @@ REALBRANCH="${CIRCLE_BRANCH}"
 if [ -n "${CIRCLE_PR_NUMBER}" ]
 then
   APIURL="${API_BASE}/pulls/${CIRCLE_PR_NUMBER}"
-  RESULT="$(curl -s ${APIURL} | python3 -c "import sys, json; print(json.load(sys.stdin)['head']['ref'])" 2> /dev/null)"
-  if [ $? -eq 0 ]
+  RESULT="$(curl -s "${APIURL}" | python3 -c "import sys, json; print(json.load(sys.stdin)['head']['ref'])" 2> /dev/null)"
+  exit_status=$?
+  if [ "$exit_status" -eq 0 ]
   then
     REALBRANCH="$(echo "${RESULT}" | tr '/' '-')"
   fi
@@ -70,8 +72,9 @@ echo "**********Env End************"
 
 echo "**********CTest Begin**********"
 ${CTEST} --version
-echo ${CTEST} -VV -S ${CTEST_SCRIPT} -Ddashboard_full=OFF ${CTEST_STEP_ARGS}
-${CTEST} -VV -S ${CTEST_SCRIPT} -Ddashboard_full=OFF ${CTEST_STEP_ARGS}
+echo ${CTEST} -VV -S "${CTEST_SCRIPT}" -Ddashboard_full=OFF "${CTEST_STEP_ARGS}"
+# shellcheck disable=SC2086
+${CTEST} -VV -S "${CTEST_SCRIPT}" -Ddashboard_full=OFF ${CTEST_STEP_ARGS}
 RET=$?
 echo "**********CTest End************"
 
diff --git a/scripts/ci/cmake/ci-el8-icc-mpich.cmake b/scripts/ci/cmake/ci-el8-icc-mpich.cmake
new file mode 100644
index 0000000000..cf1bf07c66
--- /dev/null
+++ b/scripts/ci/cmake/ci-el8-icc-mpich.cmake
@@ -0,0 +1,37 @@
+include(ProcessorCount)
+ProcessorCount(NCPUS)
+math(EXPR N2CPUS "${NCPUS}*2")
+
+set(ENV{CC}  icc)
+set(ENV{CXX} icpc)
+set(ENV{FC}  ifort)
+
+set(dashboard_cache "
+BUILD_TESTING:BOOL=ON
+ADIOS2_BUILD_EXAMPLES:BOOL=ON
+
+ADIOS2_USE_BZip2:BOOL=ON
+ADIOS2_USE_Blosc:BOOL=ON
+ADIOS2_USE_DataMan:BOOL=ON
+ADIOS2_USE_DataSpaces:BOOL=OFF
+ADIOS2_USE_Fortran:BOOL=OFF
+ADIOS2_USE_HDF5:BOOL=ON
+ADIOS2_USE_MPI:BOOL=ON
+ADIOS2_USE_Python:BOOL=ON
+ADIOS2_USE_SZ:BOOL=ON
+ADIOS2_USE_ZeroMQ:STRING=ON
+ADIOS2_USE_ZFP:BOOL=ON
+
+CMAKE_C_COMPILER_LAUNCHER=ccache
+CMAKE_CXX_COMPILER_LAUNCHER=ccache
+CMAKE_C_FLAGS:STRING=-Wall -diag-disable=10441
+CMAKE_C_FLAGS_DEBUG:STRING=-g -O0
+CMAKE_CXX_FLAGS:STRING=-Wall -diag-disable=10441
+CMAKE_CXX_FLAGS_DEBUG:STRING=-g -O0
+
+MPIEXEC_MAX_NUMPROCS:STRING=${N2CPUS}
+")
+
+set(CTEST_CMAKE_GENERATOR "Unix Makefiles")
+list(APPEND CTEST_UPDATE_NOTES_FILES "${CMAKE_CURRENT_LIST_FILE}")
+include(${CMAKE_CURRENT_LIST_DIR}/ci-common.cmake)
diff --git a/scripts/ci/cmake/ci-el8-icc-ompi.cmake b/scripts/ci/cmake/ci-el8-icc-ompi.cmake
index c191d79c26..6f8803e44d 100644
--- a/scripts/ci/cmake/ci-el8-icc-ompi.cmake
+++ b/scripts/ci/cmake/ci-el8-icc-ompi.cmake
@@ -1,7 +1,3 @@
-include(ProcessorCount)
-ProcessorCount(NCPUS)
-math(EXPR N2CPUS "${NCPUS}*2")
-
 set(ENV{CC}  icc)
 set(ENV{CXX} icpc)
 set(ENV{FC}  ifort)
@@ -28,9 +24,6 @@ CMAKE_C_FLAGS:STRING=-Wall -diag-disable=10441
 CMAKE_C_FLAGS_DEBUG:STRING=-g -O0
 CMAKE_CXX_FLAGS:STRING=-Wall -diag-disable=10441
 CMAKE_CXX_FLAGS_DEBUG:STRING=-g -O0
-
-MPIEXEC_EXTRA_FLAGS:STRING=--allow-run-as-root --oversubscribe
-MPIEXEC_MAX_NUMPROCS:STRING=${N2CPUS}
 ")
 
 set(CTEST_CMAKE_GENERATOR "Unix Makefiles")
diff --git a/scripts/ci/cmake/ci-el8-oneapi-mpich.cmake b/scripts/ci/cmake/ci-el8-oneapi-mpich.cmake
new file mode 100644
index 0000000000..7a837a984a
--- /dev/null
+++ b/scripts/ci/cmake/ci-el8-oneapi-mpich.cmake
@@ -0,0 +1,38 @@
+include(ProcessorCount)
+ProcessorCount(NCPUS)
+math(EXPR N2CPUS "${NCPUS}*2")
+
+set(ENV{CC}  icx)
+set(ENV{CXX} icpx)
+set(ENV{FC}  ifort) # oneapi fortran compiler currently has issues
+
+set(dashboard_cache "
+BUILD_TESTING:BOOL=ON
+ADIOS2_BUILD_EXAMPLES:BOOL=ON
+
+ADIOS2_USE_BZip2:BOOL=ON
+ADIOS2_USE_Blosc:BOOL=OFF
+ADIOS2_USE_DataMan:BOOL=ON
+ADIOS2_USE_DataSpaces:BOOL=OFF
+ADIOS2_USE_Fortran:BOOL=OFF
+ADIOS2_USE_HDF5:BOOL=ON
+ADIOS2_USE_MPI:BOOL=ON
+ADIOS2_USE_Python:BOOL=ON
+ADIOS2_USE_SZ:BOOL=ON
+ADIOS2_USE_ZeroMQ:STRING=ON
+ADIOS2_USE_ZFP:BOOL=ON
+
+CMAKE_C_COMPILER_LAUNCHER=ccache
+CMAKE_CXX_COMPILER_LAUNCHER=ccache
+CMAKE_C_FLAGS:STRING=-Wall
+CMAKE_C_FLAGS_DEBUG:STRING=-g -O0
+CMAKE_CXX_FLAGS:STRING=-Wall
+CMAKE_CXX_FLAGS_DEBUG:STRING=-g -O0
+CMAKE_Fortran_FLAGS:STRING=-W1
+
+MPIEXEC_MAX_NUMPROCS:STRING=${N2CPUS}
+")
+
+set(CTEST_CMAKE_GENERATOR "Unix Makefiles")
+list(APPEND CTEST_UPDATE_NOTES_FILES "${CMAKE_CURRENT_LIST_FILE}")
+include(${CMAKE_CURRENT_LIST_DIR}/ci-common.cmake)
diff --git a/scripts/ci/cmake/ci-el8-oneapi-ompi.cmake b/scripts/ci/cmake/ci-el8-oneapi-ompi.cmake
index 7a637d1888..8a1c5e5fa9 100644
--- a/scripts/ci/cmake/ci-el8-oneapi-ompi.cmake
+++ b/scripts/ci/cmake/ci-el8-oneapi-ompi.cmake
@@ -1,7 +1,3 @@
-include(ProcessorCount)
-ProcessorCount(NCPUS)
-math(EXPR N2CPUS "${NCPUS}*2")
-
 set(ENV{CC}  icx)
 set(ENV{CXX} icpx)
 set(ENV{FC}  ifort) # oneapi fortran compiler currently has issues
@@ -29,9 +25,6 @@ CMAKE_C_FLAGS_DEBUG:STRING=-g -O0
 CMAKE_CXX_FLAGS:STRING=-Wall
 CMAKE_CXX_FLAGS_DEBUG:STRING=-g -O0
 CMAKE_Fortran_FLAGS:STRING=-W1
-
-MPIEXEC_EXTRA_FLAGS:STRING=--allow-run-as-root --oversubscribe
-MPIEXEC_MAX_NUMPROCS:STRING=${N2CPUS}
 ")
 
 set(CTEST_CMAKE_GENERATOR "Unix Makefiles")
diff --git a/scripts/ci/cmake/ci-fedora-asan.cmake b/scripts/ci/cmake/ci-fedora-asan.cmake
index 8651193fd8..8b7e6dcd65 100644
--- a/scripts/ci/cmake/ci-fedora-asan.cmake
+++ b/scripts/ci/cmake/ci-fedora-asan.cmake
@@ -4,7 +4,7 @@ set(ENV{CC}  clang)
 set(ENV{CXX} clang++)
 set(ASAN_FLAGS "-fsanitize=address -fno-omit-frame-pointer -pthread -mllvm -asan-use-private-alias=1 -Wno-unused-command-line-argument")
 set(ENV{ASAN_OPTIONS} "use_odr_indicator=1")
-set(ENV{LSAN_OPTIONS} "suppressions=${CMAKE_SOURCE_DIR}/thirdparty/perfstubs/perfstubs.supp")
+set(ENV{LSAN_OPTIONS} "suppressions=$ENV{CI_SOURCE_DIR}/thirdparty/perfstubs/perfstubs.supp")
 set(ENV{CFLAGS}   "${ASAN_FLAGS}")
 set(ENV{CXXFLAGS} "${ASAN_FLAGS}")
 
diff --git a/scripts/ci/cmake/ci-ubuntu20.04-clang10-mpich.cmake b/scripts/ci/cmake/ci-ubuntu20.04-clang10-mpich.cmake
new file mode 100644
index 0000000000..fd1ab9be47
--- /dev/null
+++ b/scripts/ci/cmake/ci-ubuntu20.04-clang10-mpich.cmake
@@ -0,0 +1,49 @@
+include(ProcessorCount)
+ProcessorCount(NCPUS)
+math(EXPR N2CPUS "${NCPUS}*2")
+
+set(ENV{CC}  clang-10)
+set(ENV{CXX} clang++-10)
+set(ENV{FC}  gfortran-11)
+
+execute_process(
+  COMMAND "python3-config" "--prefix"
+  OUTPUT_VARIABLE PY_ROOT
+  OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+set(dashboard_cache "
+BUILD_TESTING:BOOL=ON
+ADIOS2_BUILD_EXAMPLES:BOOL=ON
+
+ADIOS2_USE_Blosc:BOOL=ON
+ADIOS2_USE_BZip2:BOOL=ON
+ADIOS2_USE_DataMan:BOOL=ON
+ADIOS2_USE_Fortran:BOOL=ON
+ADIOS2_USE_HDF5:BOOL=ON
+ADIOS2_USE_MPI:BOOL=ON
+ADIOS2_USE_Python:BOOL=ON
+ADIOS2_USE_SZ:BOOL=ON
+ADIOS2_USE_ZeroMQ:STRING=ON
+ADIOS2_USE_ZFP:BOOL=ON
+
+Python_ROOT_DIR:PATH=${PY_ROOT}
+Python_FIND_STRATEGY:STRING=LOCATION
+Python_FIND_FRAMEWORK:STRING=FIRST
+
+CMAKE_C_COMPILER_LAUNCHER=ccache
+CMAKE_CXX_COMPILER_LAUNCHER=ccache
+CMAKE_C_FLAGS:STRING=-Wall
+CMAKE_CXX_FLAGS:STRING=-Wall
+CMAKE_Fortran_FLAGS:STRING=-Wall
+
+MPIEXEC_MAX_NUMPROCS:STRING=${N2CPUS}
+")
+
+# TODO: The Kill* and PreciousTimeStep tests fail (due to timeout) when
+# TODO: adios2 is built "--with-device=ch3:sock:tcp".  Once this is fixed
+# TODO: in the mpi_dp, we can re-enable these tests.
+set(CTEST_TEST_ARGS EXCLUDE "KillReader|KillWriter|PreciousTimestep")
+
+set(CTEST_CMAKE_GENERATOR "Ninja")
+list(APPEND CTEST_UPDATE_NOTES_FILES "${CMAKE_CURRENT_LIST_FILE}")
+include(${CMAKE_CURRENT_LIST_DIR}/ci-common.cmake)
diff --git a/scripts/ci/cmake/ci-ubuntu20.04-clang10-ompi.cmake b/scripts/ci/cmake/ci-ubuntu20.04-clang10-ompi.cmake
index e62ed9b57a..447c304316 100644
--- a/scripts/ci/cmake/ci-ubuntu20.04-clang10-ompi.cmake
+++ b/scripts/ci/cmake/ci-ubuntu20.04-clang10-ompi.cmake
@@ -1,7 +1,3 @@
-include(ProcessorCount)
-ProcessorCount(NCPUS)
-math(EXPR N2CPUS "${NCPUS}*2")
-
 set(ENV{CC}  clang-10)
 set(ENV{CXX} clang++-10)
 set(ENV{FC}  gfortran-11)
@@ -35,9 +31,6 @@ CMAKE_CXX_COMPILER_LAUNCHER=ccache
 CMAKE_C_FLAGS:STRING=-Wall
 CMAKE_CXX_FLAGS:STRING=-Wall
 CMAKE_Fortran_FLAGS:STRING=-Wall
-
-MPIEXEC_EXTRA_FLAGS:STRING=--allow-run-as-root --oversubscribe
-MPIEXEC_MAX_NUMPROCS:STRING=${N2CPUS}
 ")
 
 set(CTEST_CMAKE_GENERATOR "Ninja")
diff --git a/scripts/ci/cmake/ci-ubuntu20.04-clang6-mpich.cmake b/scripts/ci/cmake/ci-ubuntu20.04-clang6-mpich.cmake
new file mode 100644
index 0000000000..cb2977eeb6
--- /dev/null
+++ b/scripts/ci/cmake/ci-ubuntu20.04-clang6-mpich.cmake
@@ -0,0 +1,51 @@
+include(ProcessorCount)
+ProcessorCount(NCPUS)
+math(EXPR N2CPUS "${NCPUS}*2")
+
+set(ENV{CC}  clang-6.0)
+set(ENV{CXX} clang++-6.0)
+set(ENV{FC}  gfortran-11)
+
+execute_process(
+  COMMAND "python3-config" "--prefix"
+  OUTPUT_VARIABLE PY_ROOT
+  OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+set(dashboard_cache "
+BUILD_TESTING:BOOL=ON
+ADIOS2_BUILD_EXAMPLES:BOOL=ON
+
+ADIOS2_USE_Blosc:BOOL=ON
+ADIOS2_USE_BZip2:BOOL=ON
+ADIOS2_USE_DataMan:BOOL=ON
+ADIOS2_USE_Fortran:BOOL=ON
+ADIOS2_USE_HDF5:BOOL=ON
+ADIOS2_USE_MGARD:BOOL=OFF
+ADIOS2_USE_MPI:BOOL=ON
+ADIOS2_USE_Python:BOOL=ON
+ADIOS2_USE_SZ:BOOL=ON
+ADIOS2_USE_ZeroMQ:STRING=ON
+ADIOS2_USE_ZFP:BOOL=ON
+
+Python_ROOT_DIR:PATH=${PY_ROOT}
+Python_FIND_STRATEGY:STRING=LOCATION
+Python_FIND_FRAMEWORK:STRING=FIRST
+
+CMAKE_C_COMPILER_LAUNCHER=ccache
+CMAKE_CXX_COMPILER_LAUNCHER=ccache
+CMAKE_C_FLAGS:STRING=-Wall
+CMAKE_CXX_FLAGS:STRING=-Wall
+CMAKE_Fortran_FLAGS:STRING=-Wall
+
+MPIEXEC_MAX_NUMPROCS:STRING=${N2CPUS}
+")
+
+# We have a dedicated build for clang6 + serial, so we exclude ".Serial$"
+# TODO: The Kill* and PreciousTimeStep tests fail (due to timeout) when
+# TODO: adios2 is built "--with-device=ch3:sock:tcp".  Once this is fixed
+# TODO:  in the mpi_dp, we can re-enable these tests.
+set(CTEST_TEST_ARGS EXCLUDE "KillReader|KillWriter|PreciousTimestep|.Serial$")
+
+set(CTEST_CMAKE_GENERATOR "Ninja")
+list(APPEND CTEST_UPDATE_NOTES_FILES "${CMAKE_CURRENT_LIST_FILE}")
+include(${CMAKE_CURRENT_LIST_DIR}/ci-common.cmake)
diff --git a/scripts/ci/cmake/ci-ubuntu20.04-clang6-ompi.cmake b/scripts/ci/cmake/ci-ubuntu20.04-clang6-ompi.cmake
index 2bdf2c410d..bfe89a699c 100644
--- a/scripts/ci/cmake/ci-ubuntu20.04-clang6-ompi.cmake
+++ b/scripts/ci/cmake/ci-ubuntu20.04-clang6-ompi.cmake
@@ -1,7 +1,3 @@
-include(ProcessorCount)
-ProcessorCount(NCPUS)
-math(EXPR N2CPUS "${NCPUS}*2")
-
 set(ENV{CC}  clang-6.0)
 set(ENV{CXX} clang++-6.0)
 set(ENV{FC}  gfortran-11)
@@ -36,9 +32,6 @@ CMAKE_CXX_COMPILER_LAUNCHER=ccache
 CMAKE_C_FLAGS:STRING=-Wall
 CMAKE_CXX_FLAGS:STRING=-Wall
 CMAKE_Fortran_FLAGS:STRING=-Wall
-
-MPIEXEC_EXTRA_FLAGS:STRING=--allow-run-as-root --oversubscribe
-MPIEXEC_MAX_NUMPROCS:STRING=${N2CPUS}
 ")
 
 # We have a dedicated build for this setup without MPI
diff --git a/scripts/ci/cmake/ci-ubuntu20.04-clang6-static-ompi.cmake b/scripts/ci/cmake/ci-ubuntu20.04-clang6-static-ompi.cmake
index 4b338b9c89..71c0f9710e 100644
--- a/scripts/ci/cmake/ci-ubuntu20.04-clang6-static-ompi.cmake
+++ b/scripts/ci/cmake/ci-ubuntu20.04-clang6-static-ompi.cmake
@@ -1,7 +1,3 @@
-include(ProcessorCount)
-ProcessorCount(NCPUS)
-math(EXPR N2CPUS "${NCPUS}*2")
-
 set(ENV{CC}  clang-6.0)
 set(ENV{CXX} clang++-6.0)
 set(ENV{FC}  gfortran-11)
@@ -37,9 +33,6 @@ CMAKE_CXX_COMPILER_LAUNCHER=ccache
 CMAKE_C_FLAGS:STRING=-Wall
 CMAKE_CXX_FLAGS:STRING=-Wall
 CMAKE_Fortran_FLAGS:STRING=-Wall
-
-MPIEXEC_EXTRA_FLAGS:STRING=--allow-run-as-root --oversubscribe
-MPIEXEC_MAX_NUMPROCS:STRING=${N2CPUS}
 ")
 
 set(CTEST_CMAKE_GENERATOR "Ninja")
diff --git a/scripts/ci/cmake/ci-ubuntu20.04-gcc10-mpich.cmake b/scripts/ci/cmake/ci-ubuntu20.04-gcc10-mpich.cmake
index 12ff984d21..f4c486289f 100644
--- a/scripts/ci/cmake/ci-ubuntu20.04-gcc10-mpich.cmake
+++ b/scripts/ci/cmake/ci-ubuntu20.04-gcc10-mpich.cmake
@@ -39,8 +39,17 @@ CMAKE_Fortran_FLAGS:STRING=-Wall
 MPIEXEC_MAX_NUMPROCS:STRING=${N2CPUS}
 ")
 
-set(CTEST_TEST_ARGS
-  EXCLUDE "Engine.BPEngineTest.SzComplex.MPI|Engine.BPEngineTest.ZfpComplex.MPI|.Serial$")
+# TODO: The Kill* and PreciousTimeStep tests fail (due to timeout) when
+# TODO: adios2 is built "--with-device=ch3:sock:tcp".  Once this is fixed
+# TODO:  in the mpi_dp, we can re-enable these tests.
+list(APPEND EXCLUDE_EXPRESSIONS
+  "Engine.BPEngineTest.SzComplex.MPI"
+  "Engine.BPEngineTest.ZfpComplex.MPI"
+  "KillReader"
+  "KillWriter"
+  "PreciousTimestep")
+list(JOIN EXCLUDE_EXPRESSIONS "|" TEST_EXCLUDE_STRING)
+set(CTEST_TEST_ARGS EXCLUDE "${TEST_EXCLUDE_STRING}")
 
 set(CTEST_CMAKE_GENERATOR "Ninja")
 list(APPEND CTEST_UPDATE_NOTES_FILES "${CMAKE_CURRENT_LIST_FILE}")
diff --git a/scripts/ci/cmake/ci-ubuntu20.04-gcc10-ompi.cmake b/scripts/ci/cmake/ci-ubuntu20.04-gcc10-ompi.cmake
index 0f80809145..6463d8f72a 100644
--- a/scripts/ci/cmake/ci-ubuntu20.04-gcc10-ompi.cmake
+++ b/scripts/ci/cmake/ci-ubuntu20.04-gcc10-ompi.cmake
@@ -1,7 +1,3 @@
-include(ProcessorCount)
-ProcessorCount(NCPUS)
-math(EXPR N2CPUS "${NCPUS}*2")
-
 set(ENV{CC}  gcc)
 set(ENV{CXX} g++)
 set(ENV{FC}  gfortran)
@@ -35,9 +31,6 @@ CMAKE_CXX_COMPILER_LAUNCHER=ccache
 CMAKE_C_FLAGS:STRING=-Wall
 CMAKE_CXX_FLAGS:STRING=-Wall
 CMAKE_Fortran_FLAGS:STRING=-Wall
-
-MPIEXEC_EXTRA_FLAGS:STRING=--allow-run-as-root --oversubscribe
-MPIEXEC_MAX_NUMPROCS:STRING=${N2CPUS}
 ")
 
 # We have a dedicated build for this setup without MPI
diff --git a/scripts/ci/cmake/ci-ubuntu20.04-gcc11-mpich.cmake b/scripts/ci/cmake/ci-ubuntu20.04-gcc11-mpich.cmake
new file mode 100644
index 0000000000..e83c946898
--- /dev/null
+++ b/scripts/ci/cmake/ci-ubuntu20.04-gcc11-mpich.cmake
@@ -0,0 +1,49 @@
+include(ProcessorCount)
+ProcessorCount(NCPUS)
+math(EXPR N2CPUS "${NCPUS}*2")
+
+set(ENV{CC}  gcc)
+set(ENV{CXX} g++)
+set(ENV{FC}  gfortran)
+
+execute_process(
+  COMMAND "python3-config" "--prefix"
+  OUTPUT_VARIABLE PY_ROOT
+  OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+set(dashboard_cache "
+BUILD_TESTING:BOOL=ON
+ADIOS2_BUILD_EXAMPLES:BOOL=ON
+
+ADIOS2_USE_BZip2:BOOL=ON
+ADIOS2_USE_Blosc:BOOL=ON
+ADIOS2_USE_DataMan:BOOL=ON
+ADIOS2_USE_Fortran:BOOL=ON
+ADIOS2_USE_HDF5:BOOL=ON
+ADIOS2_USE_MPI:BOOL=ON
+ADIOS2_USE_Python:BOOL=ON
+ADIOS2_USE_SZ:BOOL=ON
+ADIOS2_USE_ZeroMQ:STRING=ON
+ADIOS2_USE_ZFP:BOOL=ON
+
+Python_ROOT_DIR:PATH=${PY_ROOT}
+Python_FIND_STRATEGY:STRING=LOCATION
+Python_FIND_FRAMEWORK:STRING=FIRST
+
+CMAKE_C_COMPILER_LAUNCHER=ccache
+CMAKE_CXX_COMPILER_LAUNCHER=ccache
+CMAKE_C_FLAGS:STRING=-Wall
+CMAKE_CXX_FLAGS:STRING=-Wall
+CMAKE_Fortran_FLAGS:STRING=-Wall
+
+MPIEXEC_MAX_NUMPROCS:STRING=${N2CPUS}
+")
+
+# TODO: The Kill* and PreciousTimeStep tests fail (due to timeout) when
+# TODO: adios2 is built "--with-device=ch3:sock:tcp".  Once this is fixed
+# TODO:  in the mpi_dp, we can re-enable these tests.
+set(CTEST_TEST_ARGS EXCLUDE "KillReader|KillWriter|PreciousTimestep")
+
+set(CTEST_CMAKE_GENERATOR "Ninja")
+list(APPEND CTEST_UPDATE_NOTES_FILES "${CMAKE_CURRENT_LIST_FILE}")
+include(${CMAKE_CURRENT_LIST_DIR}/ci-common.cmake)
diff --git a/scripts/ci/cmake/ci-ubuntu20.04-gcc11-ompi.cmake b/scripts/ci/cmake/ci-ubuntu20.04-gcc11-ompi.cmake
index 70cd4b3552..a686bb0c87 100644
--- a/scripts/ci/cmake/ci-ubuntu20.04-gcc11-ompi.cmake
+++ b/scripts/ci/cmake/ci-ubuntu20.04-gcc11-ompi.cmake
@@ -1,7 +1,3 @@
-include(ProcessorCount)
-ProcessorCount(NCPUS)
-math(EXPR N2CPUS "${NCPUS}*2")
-
 set(ENV{CC}  gcc)
 set(ENV{CXX} g++)
 set(ENV{FC}  gfortran)
@@ -35,9 +31,6 @@ CMAKE_CXX_COMPILER_LAUNCHER=ccache
 CMAKE_C_FLAGS:STRING=-Wall
 CMAKE_CXX_FLAGS:STRING=-Wall
 CMAKE_Fortran_FLAGS:STRING=-Wall
-
-MPIEXEC_EXTRA_FLAGS:STRING=--allow-run-as-root --oversubscribe
-MPIEXEC_MAX_NUMPROCS:STRING=${N2CPUS}
 ")
 
 set(CTEST_CMAKE_GENERATOR "Ninja")
diff --git a/scripts/ci/cmake/ci-ubuntu20.04-gcc8-mpich.cmake b/scripts/ci/cmake/ci-ubuntu20.04-gcc8-mpich.cmake
new file mode 100644
index 0000000000..4824d437ba
--- /dev/null
+++ b/scripts/ci/cmake/ci-ubuntu20.04-gcc8-mpich.cmake
@@ -0,0 +1,50 @@
+include(ProcessorCount)
+ProcessorCount(NCPUS)
+math(EXPR N2CPUS "${NCPUS}*2")
+
+set(ENV{CC}  gcc)
+set(ENV{CXX} g++)
+set(ENV{FC}  gfortran)
+
+execute_process(
+  COMMAND "python3-config" "--prefix"
+  OUTPUT_VARIABLE PY_ROOT
+  OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+set(dashboard_cache "
+BUILD_TESTING:BOOL=ON
+ADIOS2_BUILD_EXAMPLES:BOOL=ON
+
+ADIOS2_USE_BZip2:BOOL=ON
+ADIOS2_USE_Blosc:BOOL=ON
+ADIOS2_USE_DataMan:BOOL=ON
+ADIOS2_USE_Fortran:BOOL=ON
+ADIOS2_USE_HDF5:BOOL=ON
+ADIOS2_USE_MPI:BOOL=ON
+ADIOS2_USE_Python:BOOL=ON
+ADIOS2_USE_SZ:BOOL=ON
+ADIOS2_USE_ZeroMQ:STRING=ON
+ADIOS2_USE_ZFP:BOOL=ON
+
+Python_ROOT_DIR:PATH=${PY_ROOT}
+Python_FIND_STRATEGY:STRING=LOCATION
+Python_FIND_FRAMEWORK:STRING=FIRST
+
+CMAKE_C_COMPILER_LAUNCHER=ccache
+CMAKE_CXX_COMPILER_LAUNCHER=ccache
+CMAKE_C_FLAGS:STRING=-Wall
+CMAKE_CXX_FLAGS:STRING=-Wall
+CMAKE_Fortran_FLAGS:STRING=-Wall
+
+MPIEXEC_MAX_NUMPROCS:STRING=${N2CPUS}
+")
+
+# We have a dedicated build for gcc8 + serial, so we exclude ".Serial$"
+# TODO: The Kill* and PreciousTimeStep tests fail (due to timeout) when
+# TODO: adios2 is built "--with-device=ch3:sock:tcp".  Once this is fixed
+# TODO:  in the mpi_dp, we can re-enable these tests.
+set(CTEST_TEST_ARGS EXCLUDE "KillReader|KillWriter|PreciousTimestep|.Serial$")
+
+set(CTEST_CMAKE_GENERATOR "Ninja")
+list(APPEND CTEST_UPDATE_NOTES_FILES "${CMAKE_CURRENT_LIST_FILE}")
+include(${CMAKE_CURRENT_LIST_DIR}/ci-common.cmake)
diff --git a/scripts/ci/cmake/ci-ubuntu20.04-gcc8-ompi.cmake b/scripts/ci/cmake/ci-ubuntu20.04-gcc8-ompi.cmake
index 70cd4b3552..6463d8f72a 100644
--- a/scripts/ci/cmake/ci-ubuntu20.04-gcc8-ompi.cmake
+++ b/scripts/ci/cmake/ci-ubuntu20.04-gcc8-ompi.cmake
@@ -1,7 +1,3 @@
-include(ProcessorCount)
-ProcessorCount(NCPUS)
-math(EXPR N2CPUS "${NCPUS}*2")
-
 set(ENV{CC}  gcc)
 set(ENV{CXX} g++)
 set(ENV{FC}  gfortran)
@@ -35,11 +31,11 @@ CMAKE_CXX_COMPILER_LAUNCHER=ccache
 CMAKE_C_FLAGS:STRING=-Wall
 CMAKE_CXX_FLAGS:STRING=-Wall
 CMAKE_Fortran_FLAGS:STRING=-Wall
-
-MPIEXEC_EXTRA_FLAGS:STRING=--allow-run-as-root --oversubscribe
-MPIEXEC_MAX_NUMPROCS:STRING=${N2CPUS}
 ")
 
+# We have a dedicated build for this setup without MPI
+set(CTEST_TEST_ARGS EXCLUDE ".Serial$")
+
 set(CTEST_CMAKE_GENERATOR "Ninja")
 list(APPEND CTEST_UPDATE_NOTES_FILES "${CMAKE_CURRENT_LIST_FILE}")
 include(${CMAKE_CURRENT_LIST_DIR}/ci-common.cmake)
diff --git a/scripts/ci/cmake/ci-ubuntu20.04-gcc8-static-ompi.cmake b/scripts/ci/cmake/ci-ubuntu20.04-gcc8-static-ompi.cmake
index 9f4b991bc8..88ed69ff7a 100644
--- a/scripts/ci/cmake/ci-ubuntu20.04-gcc8-static-ompi.cmake
+++ b/scripts/ci/cmake/ci-ubuntu20.04-gcc8-static-ompi.cmake
@@ -1,7 +1,3 @@
-include(ProcessorCount)
-ProcessorCount(NCPUS)
-math(EXPR N2CPUS "${NCPUS}*2")
-
 set(ENV{CC}  gcc)
 set(ENV{CXX} g++)
 set(ENV{FC}  gfortran)
@@ -37,9 +33,6 @@ CMAKE_CXX_COMPILER_LAUNCHER=ccache
 CMAKE_C_FLAGS:STRING=-Wall
 CMAKE_CXX_FLAGS:STRING=-Wall
 CMAKE_Fortran_FLAGS:STRING=-Wall
-
-MPIEXEC_EXTRA_FLAGS:STRING=--allow-run-as-root --oversubscribe
-MPIEXEC_MAX_NUMPROCS:STRING=${N2CPUS}
 ")
 
 set(CTEST_CMAKE_GENERATOR "Ninja")
diff --git a/scripts/ci/cmake/ci-ubuntu20.04-gcc9-mpich.cmake b/scripts/ci/cmake/ci-ubuntu20.04-gcc9-mpich.cmake
new file mode 100644
index 0000000000..e83c946898
--- /dev/null
+++ b/scripts/ci/cmake/ci-ubuntu20.04-gcc9-mpich.cmake
@@ -0,0 +1,49 @@
+include(ProcessorCount)
+ProcessorCount(NCPUS)
+math(EXPR N2CPUS "${NCPUS}*2")
+
+set(ENV{CC}  gcc)
+set(ENV{CXX} g++)
+set(ENV{FC}  gfortran)
+
+execute_process(
+  COMMAND "python3-config" "--prefix"
+  OUTPUT_VARIABLE PY_ROOT
+  OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+set(dashboard_cache "
+BUILD_TESTING:BOOL=ON
+ADIOS2_BUILD_EXAMPLES:BOOL=ON
+
+ADIOS2_USE_BZip2:BOOL=ON
+ADIOS2_USE_Blosc:BOOL=ON
+ADIOS2_USE_DataMan:BOOL=ON
+ADIOS2_USE_Fortran:BOOL=ON
+ADIOS2_USE_HDF5:BOOL=ON
+ADIOS2_USE_MPI:BOOL=ON
+ADIOS2_USE_Python:BOOL=ON
+ADIOS2_USE_SZ:BOOL=ON
+ADIOS2_USE_ZeroMQ:STRING=ON
+ADIOS2_USE_ZFP:BOOL=ON
+
+Python_ROOT_DIR:PATH=${PY_ROOT}
+Python_FIND_STRATEGY:STRING=LOCATION
+Python_FIND_FRAMEWORK:STRING=FIRST
+
+CMAKE_C_COMPILER_LAUNCHER=ccache
+CMAKE_CXX_COMPILER_LAUNCHER=ccache
+CMAKE_C_FLAGS:STRING=-Wall
+CMAKE_CXX_FLAGS:STRING=-Wall
+CMAKE_Fortran_FLAGS:STRING=-Wall
+
+MPIEXEC_MAX_NUMPROCS:STRING=${N2CPUS}
+")
+
+# TODO: The Kill* and PreciousTimeStep tests fail (due to timeout) when
+# TODO: adios2 is built "--with-device=ch3:sock:tcp".  Once this is fixed
+# TODO:  in the mpi_dp, we can re-enable these tests.
+set(CTEST_TEST_ARGS EXCLUDE "KillReader|KillWriter|PreciousTimestep")
+
+set(CTEST_CMAKE_GENERATOR "Ninja")
+list(APPEND CTEST_UPDATE_NOTES_FILES "${CMAKE_CURRENT_LIST_FILE}")
+include(${CMAKE_CURRENT_LIST_DIR}/ci-common.cmake)
diff --git a/scripts/ci/cmake/ci-ubuntu20.04-gcc9-ompi.cmake b/scripts/ci/cmake/ci-ubuntu20.04-gcc9-ompi.cmake
index 70cd4b3552..a686bb0c87 100644
--- a/scripts/ci/cmake/ci-ubuntu20.04-gcc9-ompi.cmake
+++ b/scripts/ci/cmake/ci-ubuntu20.04-gcc9-ompi.cmake
@@ -1,7 +1,3 @@
-include(ProcessorCount)
-ProcessorCount(NCPUS)
-math(EXPR N2CPUS "${NCPUS}*2")
-
 set(ENV{CC}  gcc)
 set(ENV{CXX} g++)
 set(ENV{FC}  gfortran)
@@ -35,9 +31,6 @@ CMAKE_CXX_COMPILER_LAUNCHER=ccache
 CMAKE_C_FLAGS:STRING=-Wall
 CMAKE_CXX_FLAGS:STRING=-Wall
 CMAKE_Fortran_FLAGS:STRING=-Wall
-
-MPIEXEC_EXTRA_FLAGS:STRING=--allow-run-as-root --oversubscribe
-MPIEXEC_MAX_NUMPROCS:STRING=${N2CPUS}
 ")
 
 set(CTEST_CMAKE_GENERATOR "Ninja")
diff --git a/scripts/ci/gh-actions/check-branch-name.sh b/scripts/ci/gh-actions/check-branch-name.sh
index 83bffd8e85..52d09e6cb0 100755
--- a/scripts/ci/gh-actions/check-branch-name.sh
+++ b/scripts/ci/gh-actions/check-branch-name.sh
@@ -4,7 +4,7 @@ if [ "${GITHUB_EVENT_NAME}" = "pull_request" ]
 then
   if [ -z "${BASE_REF}" ]
   then
-    BASE_REF="$(jq -r .pull_request.base.ref ${GITHUB_EVENT_PATH})"
+    BASE_REF="$(jq -r .pull_request.base.ref "${GITHUB_EVENT_PATH}")"
   fi
   echo "Base ref: ${BASE_REF}"
   echo "Head ref: ${GITHUB_HEAD_REF}"
diff --git a/scripts/ci/gh-actions/get-changed-files.sh b/scripts/ci/gh-actions/get-changed-files.sh
index 9e3ed8fa5b..5afcd19396 100755
--- a/scripts/ci/gh-actions/get-changed-files.sh
+++ b/scripts/ci/gh-actions/get-changed-files.sh
@@ -3,12 +3,12 @@
 case "${GITHUB_EVENT_NAME}"
 in
   pull_request)
-    BASE_SHA=$(jq -r .pull_request.base.sha ${GITHUB_EVENT_PATH})
-    HEAD_SHA=$(jq -r .pull_request.head.sha ${GITHUB_EVENT_PATH})
+    BASE_SHA=$(jq -r .pull_request.base.sha "${GITHUB_EVENT_PATH}")
+    HEAD_SHA=$(jq -r .pull_request.head.sha "${GITHUB_EVENT_PATH}")
     ;;
   push)
-    BASE_SHA=$(jq -r .before ${GITHUB_EVENT_PATH})
-    HEAD_SHA=$(jq -r .after ${GITHUB_EVENT_PATH})
+    BASE_SHA=$(jq -r .before "${GITHUB_EVENT_PATH}")
+    HEAD_SHA=$(jq -r .after "${GITHUB_EVENT_PATH}")
     ;;
   *)
     echo "Unable to get changed files from '${GITHUB_EVENT_NAME}' event"
@@ -20,11 +20,11 @@ echo "Base: ${BASE_SHA}"
 echo "Head: ${HEAD_SHA}"
 echo ""
 
-git fetch origin ${BASE_SHA}
+git fetch origin "${BASE_SHA}"
 
 echo ""
 echo "::group::All changed files"
-git diff --name-only ${BASE_SHA}...${HEAD_SHA} | tee all-changed-files.txt
+git diff --name-only "${BASE_SHA}"..."${HEAD_SHA}" | tee all-changed-files.txt
 
 echo "::group::Filtered changes"
 grep -v '^docs/' all-changed-files.txt | tee filtered-changed-files.txt
diff --git a/scripts/ci/gh-actions/macos-setup.sh b/scripts/ci/gh-actions/macos-setup.sh
index 8cae241e0e..0eb6850f20 100755
--- a/scripts/ci/gh-actions/macos-setup.sh
+++ b/scripts/ci/gh-actions/macos-setup.sh
@@ -8,13 +8,13 @@ then
   echo "Error: GH_YML_MATRIX_COMPILER variable is not defined"
   exit 1
 fi
-XCODE_VER="$(echo ${GH_YML_MATRIX_COMPILER} | sed -e 's|_|.|g' -e 's|xcode||')"
-if [ ! -d /Applications/Xcode_${XCODE_VER}.app ]
+XCODE_VER="$(echo "${GH_YML_MATRIX_COMPILER}" | sed -e 's|_|.|g' -e 's|xcode||')"
+if [ ! -d "/Applications/Xcode_${XCODE_VER}.app" ]
 then
   echo "Error: XCode installation directory /Applications/Xcode_${XCODE_VER}.app does not exist"
   exit 2
 fi
-sudo xcode-select --switch /Applications/Xcode_${XCODE_VER}.app
+sudo xcode-select --switch "/Applications/Xcode_${XCODE_VER}.app"
 
 echo "Installing CMake"
 
@@ -36,7 +36,7 @@ brew install ninja
 
 echo "Installing GCC"
 brew install gcc
-sudo ln -v -s $(which gfortran-11) /usr/local/bin/gfortran
+sudo ln -v -s "$(which gfortran-11)" /usr/local/bin/gfortran
 
 echo "Installing blosc compression"
 brew install c-blosc
diff --git a/scripts/ci/gh-actions/run.sh b/scripts/ci/gh-actions/run.sh
index 86d12a4b49..c511730eea 100755
--- a/scripts/ci/gh-actions/run.sh
+++ b/scripts/ci/gh-actions/run.sh
@@ -72,13 +72,20 @@ mkdir -p "${TMPDIR}"
 # OpenMPI specific setup and workarounds
 if [[ "${GH_YML_MATRIX_PARALLEL}" =~ ompi && "${GH_YML_BASE_OS}" != "Windows" ]]
 then
-  # Quiet some warnings from OpenMPI
-  export OMPI_MCA_btl_base_warn_component_unused=0
-  export OMPI_MCA_btl_vader_single_copy_mechanism=none
+  # Enable run as root
+  export OMPI_ALLOW_RUN_AS_ROOT=1
+  export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1
 
   # Enable overscription in OpenMPI
-  export OMPI_MCA_rmaps_base_oversubscribe=1
+  export OMPI_MCA_rmaps_base_oversubscribe=true
   export OMPI_MCA_hwloc_base_binding_policy=none
+
+  # Only use loop interface
+  export OMPI_MCA_btl_tcp_if_include=lo
+
+  # Quiet some warnings from OpenMPI
+  export OMPI_MCA_btl_base_warn_component_unused=0
+  export OMPI_MCA_btl_vader_single_copy_mechanism=none
 fi
 
 if [[ "${GH_YML_MATRIX_PARALLEL}" =~ ompi ]]
@@ -89,20 +96,23 @@ fi
 # Make sure staging tests use localhost
 export ADIOS2_IP=127.0.0.1
 
+# We already paralelize with mpi (MGARD uses OMP)
+export OMP_NUM_THREADS=1
+
 # Load any additional setup scripts
-if [ -f gha/scripts/ci/setup-run/ci-${GH_YML_JOBNAME}.sh ]
+if [ -f "gha/scripts/ci/setup-run/ci-${GH_YML_JOBNAME}.sh" ]
 then
   SETUP_RUN_SCRIPT=gha/scripts/ci/setup-run/ci-${GH_YML_JOBNAME}.sh
-elif [ -f gha/scripts/ci/setup-run/ci-${GH_YML_MATRIX_OS}-${GH_YML_MATRIX_COMPILER}-${GH_YML_MATRIX_PARALLEL}.sh ]
+elif [ -f "gha/scripts/ci/setup-run/ci-${GH_YML_MATRIX_OS}-${GH_YML_MATRIX_COMPILER}-${GH_YML_MATRIX_PARALLEL}.sh" ]
 then
   SETUP_RUN_SCRIPT=gha/scripts/ci/setup-run/ci-${GH_YML_MATRIX_OS}-${GH_YML_MATRIX_COMPILER}-${GH_YML_MATRIX_PARALLEL}.sh
-elif [ -f gha/scripts/ci/setup-run/ci-${GH_YML_MATRIX_OS}-${GH_YML_MATRIX_COMPILER}.sh ]
+elif [ -f "gha/scripts/ci/setup-run/ci-${GH_YML_MATRIX_OS}-${GH_YML_MATRIX_COMPILER}.sh" ]
 then
   SETUP_RUN_SCRIPT=gha/scripts/ci/setup-run/ci-${GH_YML_MATRIX_OS}-${GH_YML_MATRIX_COMPILER}.sh
-elif [ -f gha/scripts/ci/setup-run/ci-${GH_YML_MATRIX_OS}.sh ]
+elif [ -f "gha/scripts/ci/setup-run/ci-${GH_YML_MATRIX_OS}.sh" ]
 then
   SETUP_RUN_SCRIPT=gha/scripts/ci/setup-run/ci-${GH_YML_MATRIX_OS}.sh
-elif [ -f gha/scripts/ci/setup-run/ci-${GH_YML_BASE_OS}.sh ]
+elif [ -f "gha/scripts/ci/setup-run/ci-${GH_YML_BASE_OS}.sh" ]
 then
   SETUP_RUN_SCRIPT=gha/scripts/ci/setup-run/ci-${GH_YML_BASE_OS}.sh
 fi
@@ -116,7 +126,8 @@ echo "::endgroup::"
 echo "::group::Job-run setup (if any)"
 if [ "${SETUP_RUN_SCRIPT:-UNSET}" != "UNSET" ]
 then
-  source ${SETUP_RUN_SCRIPT}
+  # shellcheck source=/dev/null
+  source "${SETUP_RUN_SCRIPT}"
 fi
 echo "::endgroup::"
 
@@ -129,7 +140,8 @@ echo "::group::CTest version"
 echo "::endgroup::"
 
 echo "::group::Execute job step"
-"${CTEST}" -VV -S ${CTEST_SCRIPT} -Ddashboard_full=OFF ${CTEST_STEP_ARGS}
+# shellcheck disable=SC2086
+"${CTEST}" -VV -S "${CTEST_SCRIPT}" -Ddashboard_full=OFF ${CTEST_STEP_ARGS}
 RET=$?
 echo "::endgroup::"
 
diff --git a/scripts/ci/images/0001-mpich-support-ch3-sock.patch b/scripts/ci/images/0001-mpich-support-ch3-sock.patch
new file mode 100644
index 0000000000..908c0eda30
--- /dev/null
+++ b/scripts/ci/images/0001-mpich-support-ch3-sock.patch
@@ -0,0 +1,74 @@
+From bde001bcdf8d14f9b6d6c3c216ab8c00f74b3c08 Mon Sep 17 00:00:00 2001
+From: Scott Wittenburg <scott.wittenburg@kitware.com>
+Date: Tue, 14 Nov 2023 13:46:37 -0700
+Subject: [PATCH] mpich: Support ch3:sock for a non busy-polling option
+
+---
+ var/spack/repos/builtin/packages/mpich/package.py | 15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+diff --git a/var/spack/repos/builtin/packages/mpich/package.py b/var/spack/repos/builtin/packages/mpich/package.py
+index b66c0b8fd4..ccc1082e79 100644
+--- a/var/spack/repos/builtin/packages/mpich/package.py
++++ b/var/spack/repos/builtin/packages/mpich/package.py
+@@ -70,16 +70,14 @@ class Mpich(AutotoolsPackage, CudaPackage, ROCmPackage):
+         description="""Abstract Device Interface (ADI)
+ implementation. The ch4 device is in experimental state for versions
+ before 3.4.""",
+-        values=("ch3", "ch4"),
++        values=("ch3", "ch4", "ch3:sock"),
+         multi=False,
+     )
+     variant(
+         "netmod",
+         default="ofi",
+         description="""Network module. Only single netmod builds are
+-supported. For ch3 device configurations, this presumes the
+-ch3:nemesis communication channel. ch3:sock is not supported by this
+-spack package at this time.""",
++supported, and netmod is ignored if device is ch3:sock.""",
+         values=("tcp", "mxm", "ofi", "ucx"),
+         multi=False,
+     )
+@@ -121,6 +119,7 @@ class Mpich(AutotoolsPackage, CudaPackage, ROCmPackage):
+     depends_on("yaksa+cuda", when="+cuda ^yaksa")
+     depends_on("yaksa+rocm", when="+rocm ^yaksa")
+     conflicts("datatype-engine=yaksa", when="device=ch3")
++    conflicts("datatype-engine=yaksa", when="device=ch3:sock")
+ 
+     variant(
+         "hcoll",
+@@ -135,8 +134,10 @@ class Mpich(AutotoolsPackage, CudaPackage, ROCmPackage):
+     # overriding the variant from CudaPackage.
+     conflicts("+cuda", when="@:3.3")
+     conflicts("+cuda", when="device=ch3")
++    conflicts("+cuda", when="device=ch3:sock")
+     conflicts("+rocm", when="@:4.0")
+     conflicts("+rocm", when="device=ch3")
++    conflicts("+rocm", when="device=ch3:sock")
+     conflicts("+cuda", when="+rocm", msg="CUDA must be disabled to support ROCm")
+ 
+     provides("mpi@:4.0")
+@@ -271,6 +272,7 @@ class Mpich(AutotoolsPackage, CudaPackage, ROCmPackage):
+     conflicts("netmod=tcp", when="device=ch4")
+     conflicts("pmi=pmi2", when="device=ch3 netmod=ofi")
+     conflicts("pmi=pmix", when="device=ch3")
++    conflicts("pmi=pmix", when="device=ch3:sock")
+     conflicts("pmi=pmix", when="+hydra")
+     conflicts("pmi=cray", when="+hydra")
+ 
+@@ -556,7 +558,10 @@ def configure_args(self):
+         elif "device=ch3" in spec:
+             device_config = "--with-device=ch3:nemesis:"
+ 
+-        if "netmod=ucx" in spec:
++        # Do not apply any netmod if device is ch3:sock
++        if "device=ch3:sock" in spec:
++            device_config = "--with-device=ch3:sock"
++        elif "netmod=ucx" in spec:
+             device_config += "ucx"
+         elif "netmod=ofi" in spec:
+             device_config += "ofi"
+-- 
+2.25.1
+
diff --git a/scripts/ci/images/Dockerfile.ci-el8-intel b/scripts/ci/images/Dockerfile.ci-el8-intel
index d43abe663a..4470f3a703 100644
--- a/scripts/ci/images/Dockerfile.ci-el8-intel
+++ b/scripts/ci/images/Dockerfile.ci-el8-intel
@@ -23,11 +23,11 @@ RUN dnf install -y \
         libpng-devel \
         Lmod \
         make \
-        openmpi-devel \
+        mpich-devel \
         patch \
         patchelf \
         python3-devel \
-        python3-mpi4py-openmpi \
+        python3-mpi4py-mpich \
         python3-pip \
         tar \
         tcl \
@@ -42,9 +42,9 @@ RUN dnf install -y \
 COPY oneAPI.repo /etc/yum.repos.d/
 RUN pip3 install numpy && \
     dnf install -y \
-        intel-oneapi-compiler-dpcpp-cpp-2023.1.0 \
-        intel-oneapi-compiler-fortran-2023.1.0 \
-        intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic-2023.1.0
+        intel-oneapi-compiler-dpcpp-cpp-2023.2.1 \
+        intel-oneapi-compiler-fortran-2023.2.1 \
+        intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic-2023.2.1
 RUN dnf clean all
 
 # Setup module files for the compilers
@@ -66,7 +66,7 @@ RUN curl -L https://github.com/szcompressor/SZ/releases/download/v2.1.12.5/SZ-2.
     cd .. && \
     rm -rf SZ-2.1.12.5 build
 
-ENV PATH /opt/cmake/bin:/opt/sz/2.1.12.5/bin:/usr/lib64/openmpi/bin:${PATH}
+ENV PATH /opt/cmake/bin:/opt/sz/2.1.12.5/bin:/usr/lib64/mpich/bin:${PATH}
 ENV LD_LIBRARY_PATH /opt/sz/2.1.12.5/lib64:${LD_LIBRARY_PATH}
 ENV CMAKE_PREFIX_PATH /opt/sz/2.1.12.5:${CMAKE_PREFIX_PATH}
 
diff --git a/scripts/ci/images/Dockerfile.ci-spack-ubuntu20.04-base b/scripts/ci/images/Dockerfile.ci-spack-ubuntu20.04-base
index 6f55304208..12e64f7abe 100644
--- a/scripts/ci/images/Dockerfile.ci-spack-ubuntu20.04-base
+++ b/scripts/ci/images/Dockerfile.ci-spack-ubuntu20.04-base
@@ -1,16 +1,27 @@
 # vim: ft=dockerfile
-FROM ecpe4s/ubuntu20.04:23.05
+FROM ecpe4s/ubuntu20.04-runner-amd64-gcc-11.4:2023.08.01
+
+ARG E4S_VERSION=23.08
+
+# Clone and patch spack
+RUN cd / && \
+    git clone --depth 1 --single-branch --branch e4s-${E4S_VERSION} https://github.com/spack/spack
 
 COPY packages.yaml /etc/spack/packages.yaml
+# TODO: Patch only needed until we merge https://github.com/spack/spack/pull/40964
+COPY 0001-mpich-support-ch3-sock.patch /spack
 
-# Install Base specs
-RUN . /spack/share/spack/setup-env.sh && \
+# Install base specs
+RUN cd spack && \
+    git apply 0001-mpich-support-ch3-sock.patch && \
+    . /spack/share/spack/setup-env.sh && \
+    spack mirror add E4S "https://cache.e4s.io/${E4S_VERSION}" && \
     export ADIOS_SPEC_NO_MPI="adios2~mpi" && \
     export ADIOS_SPEC_OPENMPI="adios2+mpi^openmpi" && \
     export ADIOS_SPEC_MPICH="adios2+mpi^mpich" && \
     spack config add "config:checksum:false" && \
     spack config add "config:build_jobs:$(nproc)" && \
-    spack config add "concretizer:unify:false" && \
+    spack config add "concretizer:unify:true" && \
     spack env create adios2-ci-serial && \
     spack -e adios2-ci-serial add ${ADIOS_SPEC_NO_MPI} && \
     spack -e adios2-ci-serial concretize && \
@@ -38,23 +49,6 @@ RUN . /spack/share/spack/setup-env.sh && \
     spack clean -a && \
     echo "source /spack/share/spack/setup-env.sh" >> /etc/profile.d/zz-spack.sh
 
-# Install deps
-RUN pip install shyaml
-COPY specs.yaml /tmp/specs.yaml
-RUN . /spack/share/spack/setup-env.sh && \
-    spack env create adios2-ci-deps /tmp/specs.yaml && \
-    spack -e adios2-ci-deps install \
-      --no-check-signature \
-      --include-build-deps \
-      --fail-fast && \
-    spack -e adios2-ci-serial add $(shyaml get-values spack.specs < /tmp/specs.yaml) && \
-    spack -e adios2-ci-serial concretize && \
-    spack -e adios2-ci-ompi add $(shyaml get-values spack.specs < /tmp/specs.yaml) && \
-    spack -e adios2-ci-ompi concretize && \
-    spack -e adios2-ci-mpich add $(shyaml get-values spack.specs < /tmp/specs.yaml) && \
-    spack -e adios2-ci-mpich concretize && \
-    spack clean -a
-
 ### Other missing packages (compared to el8 base):
 RUN apt-get update && apt-get install -y \
     ccache \
diff --git a/scripts/ci/images/packages.yaml b/scripts/ci/images/packages.yaml
index b034552fcb..484d7bef60 100644
--- a/scripts/ci/images/packages.yaml
+++ b/scripts/ci/images/packages.yaml
@@ -4,6 +4,8 @@ packages:
   adios2:
     require: '+blosc+bzip2+zfp+sz+png+sst+dataman+ssc+hdf5+python+fortran'
   mpich:
-    require: 'device=ch3 netmod=tcp'
+    require: 'device=ch3:sock'
   openmpi:
     require: '@4.1'
+  mgard:
+    require: '@2023-01-10'
diff --git a/scripts/ci/images/specs.yaml b/scripts/ci/images/specs.yaml
deleted file mode 100644
index 7ec02d9a41..0000000000
--- a/scripts/ci/images/specs.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-spack:
-  view: true
-  specs:
-    # https://github.com/ornladios/ADIOS2/issues/3754
-    - mgard@2023-01-10
diff --git a/scripts/ci/scripts/run-shellcheck.sh b/scripts/ci/scripts/run-shellcheck.sh
index c987a5bea0..1dc735fc8b 100755
--- a/scripts/ci/scripts/run-shellcheck.sh
+++ b/scripts/ci/scripts/run-shellcheck.sh
@@ -15,7 +15,7 @@ fi
 
 # Give me a sorted list of the project scripts
 found_scripts="$({
-  find scripts -regextype posix-extended -iregex '.*\.(sh|bash)' -print;
+  find scripts source testing -regextype posix-extended -iregex '.*\.(sh|bash)' -print;
   grep -rnlE -e '#!/(/usr)?/bin/(bash|sh)' -e '#!(/usr)?/bin/env\s+(bash|sh)' scripts;
 } | sort -u)"
 
diff --git a/source/adios2/CMakeLists.txt b/source/adios2/CMakeLists.txt
index 806d87c795..ebf3f46f4d 100644
--- a/source/adios2/CMakeLists.txt
+++ b/source/adios2/CMakeLists.txt
@@ -127,6 +127,21 @@ add_library(adios2_core
 set_property(TARGET adios2_core PROPERTY EXPORT_NAME core)
 set_property(TARGET adios2_core PROPERTY OUTPUT_NAME adios2${ADIOS2_LIBRARY_SUFFIX}_core)
 
+set(maybe_adios2_core_derived)
+if (ADIOS2_HAVE_Derived_Variable)
+  target_sources(adios2_core PRIVATE
+      core/VariableDerived.cpp
+      toolkit/derived/Expression.cpp
+      toolkit/derived/Function.cpp toolkit/derived/Function.tcc
+      toolkit/derived/ExprHelper.h)
+  add_library(adios2_core_derived
+      toolkit/derived/parser/lexer.cpp
+      toolkit/derived/parser/parser.cpp
+      toolkit/derived/parser/ASTNode.cpp)
+  target_link_libraries(adios2_core PRIVATE adios2_core_derived)
+  set(maybe_adios2_core_derived adios2_core_derived)
+endif()
+
 set(maybe_adios2_core_cuda)
 if(ADIOS2_HAVE_CUDA)
   add_library(adios2_core_cuda helper/adiosCUDA.cu)
@@ -448,10 +463,11 @@ install(DIRECTORY toolkit/
   PATTERN "*/*.inl"
   REGEX "sst/util" EXCLUDE
   REGEX "sst/dp" EXCLUDE
+  REGEX "derived/parser" EXCLUDE
 )
 
 # Library installation
-install(TARGETS adios2_core ${maybe_adios2_core_mpi} ${maybe_adios2_core_cuda} ${maybe_adios2_core_kokkos} ${maybe_adios2_blosc2} EXPORT adios2Exports
+install(TARGETS adios2_core ${maybe_adios2_core_mpi} ${maybe_adios2_core_cuda} ${maybe_adios2_core_kokkos} ${maybe_adios2_blosc2} ${maybe_adios2_core_derived} EXPORT adios2Exports
   RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT adios2_core-runtime
   LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT adios2_core-libraries NAMELINK_COMPONENT adios2_core-development
   ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT adios2_core-development
diff --git a/source/adios2/common/ADIOSTypes.h b/source/adios2/common/ADIOSTypes.h
index 7697c34624..01423e781f 100644
--- a/source/adios2/common/ADIOSTypes.h
+++ b/source/adios2/common/ADIOSTypes.h
@@ -32,6 +32,16 @@
 namespace adios2
 {
 
+#ifdef ADIOS2_HAVE_DERIVED_VARIABLE
+/** Type of derived variables */
+enum class DerivedVarType
+{
+    MetadataOnly,     ///< Store only the metadata (default)
+    ExpressionString, ///< Store only the expression string
+    StoreData         ///< Store data and metadata
+};
+#endif
+
 /** Memory space for the user provided buffers */
 enum class MemorySpace
 {
diff --git a/source/adios2/core/IO.cpp b/source/adios2/core/IO.cpp
index 1b99463b54..6b9faf01e9 100644
--- a/source/adios2/core/IO.cpp
+++ b/source/adios2/core/IO.cpp
@@ -288,6 +288,9 @@ void IO::SetTransportParameter(const size_t transportIndex, const std::string ke
 }
 
 const VarMap &IO::GetVariables() const noexcept { return m_Variables; }
+#ifdef ADIOS2_HAVE_DERIVED_VARIABLE
+const VarMap &IO::GetDerivedVariables() const noexcept { return m_VariablesDerived; }
+#endif
 
 const AttrMap &IO::GetAttributes() const noexcept { return m_Attributes; }
 
@@ -808,6 +811,92 @@ void IO::CheckTransportType(const std::string type) const
     }
 }
 
+#ifdef ADIOS2_HAVE_DERIVED_VARIABLE
+VariableDerived &IO::DefineDerivedVariable(const std::string &name, const std::string &exp_string,
+                                           const DerivedVarType varType)
+{
+    PERFSTUBS_SCOPED_TIMER("IO::DefineDerivedVariable");
+
+    {
+        auto itVariable = m_VariablesDerived.find(name);
+        if (itVariable != m_VariablesDerived.end())
+        {
+            helper::Throw<std::invalid_argument>("Core", "IO", "DefineDerivedVariable",
+                                                 "derived variable " + name +
+                                                     " already defined in IO " + m_Name);
+        }
+        else
+        {
+            auto itVariable = m_Variables.find(name);
+            if (itVariable != m_Variables.end())
+            {
+                helper::Throw<std::invalid_argument>(
+                    "Core", "IO", "DefineDerivedVariable",
+                    "derived variable " + name +
+                        " trying to use an already defined variable name in IO " + m_Name);
+            }
+        }
+    }
+
+    derived::Expression derived_exp(exp_string);
+    std::vector<std::string> var_list = derived_exp.VariableNameList();
+    DataType expressionType = DataType::None;
+    bool isConstant = true;
+    std::map<std::string, std::tuple<Dims, Dims, Dims>> name_to_dims;
+    // check correctness for the variable names and types within the expression
+    for (auto var_name : var_list)
+    {
+        auto itVariable = m_Variables.find(var_name);
+        if (itVariable == m_Variables.end())
+            helper::Throw<std::invalid_argument>("Core", "IO", "DefineDerivedVariable",
+                                                 "using undefine variable " + var_name +
+                                                     " in defining the derived variable " + name);
+        DataType var_type = InquireVariableType(var_name);
+        if (expressionType == DataType::None)
+            expressionType = var_type;
+        if (expressionType != var_type)
+            helper::Throw<std::invalid_argument>("Core", "IO", "DefineDerivedVariable",
+                                                 "all variables within a derived variable "
+                                                 " must have the same type ");
+        if ((itVariable->second)->IsConstantDims() == false)
+            isConstant = false;
+        name_to_dims.insert({var_name,
+                             {(itVariable->second)->m_Start, (itVariable->second)->m_Count,
+                              (itVariable->second)->m_Shape}});
+    }
+    // std::cout << "Derived variable " << name << ": PASS : variables exist and have the same type"
+    //          << std::endl;
+    // set the initial shape of the expression and check correcness
+    derived_exp.SetDims(name_to_dims);
+    // std::cout << "Derived variable " << name << ": PASS : initial variable dimensions are valid"
+    //          << std::endl;
+
+    // create derived variable with the expression
+    auto itVariablePair = m_VariablesDerived.emplace(
+        name, std::unique_ptr<VariableBase>(
+                  new VariableDerived(name, derived_exp, expressionType, isConstant, varType)));
+    VariableDerived &variable = static_cast<VariableDerived &>(*itVariablePair.first->second);
+
+    // check IO placeholder for variable operations
+    auto itOperations = m_VarOpsPlaceholder.find(name);
+    if (itOperations != m_VarOpsPlaceholder.end())
+    {
+        // allow to apply an operation only for derived variables that save the data
+        if (varType != DerivedVarType::StoreData)
+            helper::Throw<std::invalid_argument>(
+                "Core", "IO", "DefineDerivedVariable",
+                "Operators for derived variables can only be applied "
+                " for DerivedVarType::StoreData types.");
+        variable.m_Operations.reserve(itOperations->second.size());
+        for (auto &operation : itOperations->second)
+        {
+            variable.AddOperation(operation.first, operation.second);
+        }
+    }
+    return variable;
+}
+#endif
+
 StructDefinition &IO::DefineStruct(const std::string &name, const size_t size)
 {
     return m_ADIOS.m_StructDefinitions.emplace(name, StructDefinition(name, size))->second;
diff --git a/source/adios2/core/IO.h b/source/adios2/core/IO.h
index 4d64982f3b..0bdf279d12 100644
--- a/source/adios2/core/IO.h
+++ b/source/adios2/core/IO.h
@@ -28,6 +28,9 @@
 #include "adios2/core/CoreTypes.h"
 #include "adios2/core/Group.h"
 #include "adios2/core/Variable.h"
+#ifdef ADIOS2_HAVE_DERIVED_VARIABLE
+#include "adios2/core/VariableDerived.h"
+#endif
 #include "adios2/core/VariableStruct.h"
 
 namespace adios2
@@ -179,7 +182,11 @@ class IO
     Variable<T> &DefineVariable(const std::string &name, const Dims &shape = Dims(),
                                 const Dims &start = Dims(), const Dims &count = Dims(),
                                 const bool constantDims = false);
-
+#ifdef ADIOS2_HAVE_DERIVED_VARIABLE
+    VariableDerived &
+    DefineDerivedVariable(const std::string &name, const std::string &expression,
+                          const DerivedVarType varType = DerivedVarType::MetadataOnly);
+#endif
     VariableStruct &DefineStructVariable(const std::string &name, StructDefinition &def,
                                          const Dims &shape = Dims(), const Dims &start = Dims(),
                                          const Dims &count = Dims(),
@@ -304,6 +311,9 @@ class IO
      * </pre>
      */
     const VarMap &GetVariables() const noexcept;
+#ifdef ADIOS2_HAVE_DERIVED_VARIABLE
+    const VarMap &GetDerivedVariables() const noexcept;
+#endif
 
     /**
      * Retrieves hash holding internal Attributes identifiers
@@ -500,6 +510,9 @@ class IO
     adios2::IOMode m_IOMode = adios2::IOMode::Independent;
 
     VarMap m_Variables;
+#ifdef ADIOS2_HAVE_DERIVED_VARIABLE
+    VarMap m_VariablesDerived;
+#endif
 
     AttrMap m_Attributes;
 
diff --git a/source/adios2/core/VariableDerived.cpp b/source/adios2/core/VariableDerived.cpp
new file mode 100644
index 0000000000..670ad0bc35
--- /dev/null
+++ b/source/adios2/core/VariableDerived.cpp
@@ -0,0 +1,121 @@
+#include "VariableDerived.h"
+#include "adios2/helper/adiosType.h"
+
+namespace adios2
+{
+namespace core
+{
+
+VariableDerived::VariableDerived(const std::string &name, adios2::derived::Expression expr,
+                                 const DataType exprType, const bool isConstant,
+                                 const DerivedVarType varType)
+: VariableBase(name, exprType, helper::GetDataTypeSize(exprType), expr.GetShape(), expr.GetStart(),
+               expr.GetCount(), isConstant),
+  m_Expr(expr), m_DerivedType(varType)
+{
+}
+
+DerivedVarType VariableDerived::GetDerivedType() { return m_DerivedType; }
+
+std::vector<std::string> VariableDerived::VariableNameList() { return m_Expr.VariableNameList(); }
+void VariableDerived::UpdateExprDim(std::map<std::string, std::tuple<Dims, Dims, Dims>> NameToDims)
+{
+    m_Expr.SetDims(NameToDims);
+    m_Shape = m_Expr.GetShape();
+    m_Start = m_Expr.GetStart();
+    m_Count = m_Expr.GetCount();
+}
+
+std::vector<std::tuple<void *, Dims, Dims>>
+VariableDerived::ApplyExpression(std::map<std::string, MinVarInfo> NameToMVI)
+{
+    size_t numBlocks = 0;
+    // check that all variables have the same number of blocks
+    for (auto variable : NameToMVI)
+    {
+        if (numBlocks == 0)
+            numBlocks = variable.second.BlocksInfo.size();
+        if (numBlocks != variable.second.BlocksInfo.size())
+            helper::Throw<std::invalid_argument>("Core", "VariableDerived", "ApplyExpression",
+                                                 " variables do not have the same number of blocks "
+                                                 " in computing the derived variable " +
+                                                     m_Name);
+    }
+
+    std::map<std::string, std::vector<adios2::derived::DerivedData>> inputData;
+    // create the map between variable name and DerivedData object
+    for (auto variable : NameToMVI)
+    {
+        // add the dimensions of all blocks into a vector
+        std::vector<adios2::derived::DerivedData> varData;
+        for (size_t i = 0; i < numBlocks; i++)
+        {
+            Dims start;
+            Dims count;
+            for (size_t d = 0; d < variable.second.Dims; d++)
+            {
+                start.push_back(variable.second.BlocksInfo[i].Start[d]);
+                count.push_back(variable.second.BlocksInfo[i].Count[d]);
+            }
+            varData.push_back(adios2::derived::DerivedData(
+                {variable.second.BlocksInfo[i].BufferP, start, count}));
+        }
+        inputData.insert({variable.first, varData});
+    }
+    // TODO check that the dimensions are still corrects
+    std::vector<adios2::derived::DerivedData> outputData =
+        m_Expr.ApplyExpression(m_Type, numBlocks, inputData);
+
+    std::vector<std::tuple<void *, Dims, Dims>> blockData;
+    for (size_t i = 0; i < numBlocks; i++)
+    {
+        blockData.push_back({outputData[i].Data, outputData[i].Start, outputData[i].Count});
+    }
+
+    return blockData;
+}
+
+std::vector<void *>
+VariableDerived::ApplyExpression(std::map<std::string, std::vector<void *>> NameToData,
+                                 std::map<std::string, std::tuple<Dims, Dims, Dims>> NameToDims)
+{
+    size_t numBlocks = 0;
+    std::map<std::string, std::vector<adios2::derived::DerivedData>> inputData;
+    // check that all variables have the same number of blocks
+    for (auto variable : NameToData)
+    {
+        if (numBlocks == 0)
+            numBlocks = variable.second.size();
+        if (numBlocks != variable.second.size())
+            helper::Throw<std::invalid_argument>("Core", "VariableDerived", "ApplyExpression",
+                                                 " variables do not have the same number of blocks "
+                                                 " in computing the derived variable " +
+                                                     m_Name);
+    }
+    std::cout << "Derived variable " << m_Name
+              << ": PASS : variables have written the same num of blocks" << std::endl;
+    // create the map between variable name and DerivedData object
+    for (auto variable : NameToData)
+    {
+        // add the dimensions of all blocks into a vector
+        std::vector<adios2::derived::DerivedData> varData;
+        for (size_t i = 0; i < numBlocks; i++)
+        {
+            varData.push_back(adios2::derived::DerivedData(
+                {variable.second[i], std::get<0>(NameToDims[variable.first]),
+                 std::get<1>(NameToDims[variable.first])}));
+        }
+        inputData.insert({variable.first, varData});
+    }
+    std::vector<adios2::derived::DerivedData> outputData =
+        m_Expr.ApplyExpression(m_Type, numBlocks, inputData);
+    std::vector<void *> blockData;
+    for (size_t i = 0; i < numBlocks; i++)
+    {
+        blockData.push_back(outputData[i].Data);
+    }
+    return blockData;
+}
+
+} // end namespace core
+} // end namespace adios2
diff --git a/source/adios2/core/VariableDerived.h b/source/adios2/core/VariableDerived.h
new file mode 100644
index 0000000000..2ff9eb3903
--- /dev/null
+++ b/source/adios2/core/VariableDerived.h
@@ -0,0 +1,41 @@
+#ifndef ADIOS2_CORE_VARIABLE_DERIVED_H_
+#define ADIOS2_CORE_VARIABLE_DERIVED_H_
+
+#include "adios2/common/ADIOSTypes.h"
+#include "adios2/core/VariableBase.h"
+#include "adios2/helper/adiosType.h"
+#include "adios2/toolkit/derived/Expression.h"
+
+namespace adios2
+{
+namespace core
+{
+
+/**
+ * @param Base (parent) class for template derived (child) class Variable.
+ */
+class VariableDerived : public VariableBase
+{
+    DerivedVarType m_DerivedType;
+
+public:
+    adios2::derived::Expression m_Expr;
+    VariableDerived(const std::string &name, adios2::derived::Expression expr,
+                    const DataType exprType, const bool isConstant, const DerivedVarType varType);
+    ~VariableDerived() = default;
+
+    DerivedVarType GetDerivedType();
+    std::vector<std::string> VariableNameList();
+    void UpdateExprDim(std::map<std::string, std::tuple<Dims, Dims, Dims>> NameToDims);
+
+    std::vector<void *>
+    ApplyExpression(std::map<std::string, std::vector<void *>> NameToData,
+                    std::map<std::string, std::tuple<Dims, Dims, Dims>> NameToDims);
+    std::vector<std::tuple<void *, Dims, Dims>>
+    ApplyExpression(std::map<std::string, MinVarInfo> mvi);
+};
+
+} // end namespace core
+} // end namespace adios2
+
+#endif /* ADIOS2_CORE_VARIABLE_DERIVED_H_ */
diff --git a/source/adios2/engine/bp5/BP5Writer.cpp b/source/adios2/engine/bp5/BP5Writer.cpp
index 465a44d959..9ae651fc3a 100644
--- a/source/adios2/engine/bp5/BP5Writer.cpp
+++ b/source/adios2/engine/bp5/BP5Writer.cpp
@@ -495,8 +495,72 @@ void BP5Writer::MarshalAttributes()
     }
 }
 
+#ifdef ADIOS2_HAVE_DERIVED_VARIABLE
+void BP5Writer::ComputeDerivedVariables()
+{
+    auto const &m_VariablesDerived = m_IO.GetDerivedVariables();
+    auto const &m_Variables = m_IO.GetVariables();
+    // parse all derived variables
+    for (auto it = m_VariablesDerived.begin(); it != m_VariablesDerived.end(); it++)
+    {
+        // identify the variables used in the derived variable
+        auto derivedVar = dynamic_cast<core::VariableDerived *>((*it).second.get());
+        std::vector<std::string> varList = derivedVar->VariableNameList();
+        // to create a mapping between variable name and the varInfo (dim and data pointer)
+        std::map<std::string, MinVarInfo> nameToVarInfo;
+        bool computeDerived = true;
+        for (auto varName : varList)
+        {
+            auto itVariable = m_Variables.find(varName);
+            if (itVariable == m_Variables.end())
+                helper::Throw<std::invalid_argument>("Core", "IO", "DefineDerivedVariable",
+                                                     "using undefine variable " + varName +
+                                                         " in defining the derived variable " +
+                                                         (*it).second->m_Name);
+            // extract the dimensions and data for each variable
+            VariableBase *varBase = itVariable->second.get();
+            auto mvi = WriterMinBlocksInfo(*varBase);
+            if (mvi->BlocksInfo.size() == 0)
+            {
+                computeDerived = false;
+                std::cout << "Variable " << itVariable->first << " not written in this step";
+                std::cout << " .. skip derived variable " << (*it).second->m_Name << std::endl;
+                break;
+            }
+            nameToVarInfo.insert({varName, *mvi});
+        }
+        // skip computing derived variables if it contains variables that are not written this step
+        if (!computeDerived)
+            continue;
+
+        // compute the values for the derived variables that are not type ExpressionString
+        std::vector<std::tuple<void *, Dims, Dims>> DerivedBlockData;
+        if (derivedVar->GetDerivedType() != DerivedVarType::ExpressionString)
+        {
+            DerivedBlockData = derivedVar->ApplyExpression(nameToVarInfo);
+        }
+
+        // Send the derived variable to ADIOS2 internal logic
+        for (auto derivedBlock : DerivedBlockData)
+        {
+            // set the shape of the variable for each block
+            if (!(*it).second->IsConstantDims())
+            {
+                (*it).second->m_Start = std::get<1>(derivedBlock);
+                (*it).second->m_Count = std::get<2>(derivedBlock);
+            }
+            PutCommon(*(*it).second.get(), std::get<0>(derivedBlock), true /* sync */);
+            free(std::get<0>(derivedBlock));
+        }
+    }
+}
+#endif
+
 void BP5Writer::EndStep()
 {
+#ifdef ADIOS2_HAVE_DERIVED_VARIABLE
+    ComputeDerivedVariables();
+#endif
     m_BetweenStepPairs = false;
     PERFSTUBS_SCOPED_TIMER("BP5Writer::EndStep");
     m_Profiler.Start("ES");
@@ -504,26 +568,6 @@ void BP5Writer::EndStep()
     m_Profiler.Start("ES_close");
     MarshalAttributes();
 
-#ifdef NOT_DEF
-    const auto &vars = m_IO.GetVariables();
-    for (const auto &varPair : vars)
-    {
-        auto baseVar = varPair.second.get();
-        auto mvi = WriterMinBlocksInfo(*baseVar);
-        if (mvi)
-        {
-            std::cout << "Info for Variable " << varPair.first << std::endl;
-            PrintMVI(std::cout, *mvi);
-            if (baseVar->m_Type == DataType::Double)
-                std::cout << "Double value is " << *((double *)mvi->BlocksInfo[0].BufferP)
-                          << std::endl;
-            delete mvi;
-        }
-        else
-            std::cout << "Variable " << varPair.first << " not written on this step" << std::endl;
-    }
-#endif
-
     // true: advances step
     auto TSInfo = m_BP5Serializer.CloseTimestep((int)m_WriterStep,
                                                 m_Parameters.AsyncWrite || m_Parameters.DirectIO);
diff --git a/source/adios2/engine/bp5/BP5Writer.h b/source/adios2/engine/bp5/BP5Writer.h
index eaddf93b9d..cfaeb51bf2 100644
--- a/source/adios2/engine/bp5/BP5Writer.h
+++ b/source/adios2/engine/bp5/BP5Writer.h
@@ -119,6 +119,10 @@ class BP5Writer : public BP5Engine, public core::Engine
     /** Inform about computation block through User->ADIOS->IO */
     void ExitComputationBlock() noexcept;
 
+#ifdef ADIOS2_HAVE_DERIVED_VARIABLE
+    void ComputeDerivedVariables();
+#endif
+
 #define declare_type(T)                                                                            \
     void DoPut(Variable<T> &variable, typename Variable<T>::Span &span, const bool initialize,     \
                const T &value) final;
diff --git a/source/adios2/engine/dataman/DataManReader.tcc b/source/adios2/engine/dataman/DataManReader.tcc
index 7b44c8cbbd..9edd962b96 100644
--- a/source/adios2/engine/dataman/DataManReader.tcc
+++ b/source/adios2/engine/dataman/DataManReader.tcc
@@ -35,9 +35,9 @@ void DataManReader::GetDeferredCommon(Variable<T> &variable, T *data)
     {
         while (true)
         {
-            int ret =
-                m_Serializer.GetData(data, variable.m_Name, variable.m_Start, variable.m_Count,
-                                     m_CurrentStep, variable.m_MemoryStart, variable.m_MemoryCount);
+            int ret = m_Serializer.GetData(data, variable.m_Name, variable.m_Start,
+                                           variable.m_Count, m_CurrentStep, variable.m_MemSpace,
+                                           variable.m_MemoryStart, variable.m_MemoryCount);
             if (ret == 0)
             {
                 break;
@@ -57,7 +57,7 @@ void DataManReader::GetDeferredCommon(Variable<T> &variable, T *data)
         while (true)
         {
             int ret = m_Serializer.GetData(data, variable.m_Name, start, count, m_CurrentStep,
-                                           memstart, memcount);
+                                           variable.m_MemSpace, memstart, memcount);
             if (ret == 0)
             {
                 break;
diff --git a/source/adios2/engine/dataman/DataManWriter.tcc b/source/adios2/engine/dataman/DataManWriter.tcc
index fb96f948ef..f9996e4678 100644
--- a/source/adios2/engine/dataman/DataManWriter.tcc
+++ b/source/adios2/engine/dataman/DataManWriter.tcc
@@ -49,7 +49,8 @@ void DataManWriter::PutDeferredCommon(Variable<T> &variable, const T *values)
         std::reverse(memstart.begin(), memstart.end());
         std::reverse(memcount.begin(), memcount.end());
         m_Serializer.PutData(variable.m_Data, variable.m_Name, shape, start, count, memstart,
-                             memcount, m_Name, CurrentStep(), m_MpiRank, "", variable.m_Operations);
+                             memcount, variable.m_MemSpace, m_Name, CurrentStep(), m_MpiRank, "",
+                             variable.m_Operations);
     }
 
     if (m_MonitorActive)
diff --git a/source/adios2/toolkit/derived/ExprHelper.h b/source/adios2/toolkit/derived/ExprHelper.h
new file mode 100644
index 0000000000..c72888f95f
--- /dev/null
+++ b/source/adios2/toolkit/derived/ExprHelper.h
@@ -0,0 +1,60 @@
+#ifndef ADIOS2_DERIVED_PARSER_EXPHELPER_H_
+#define ADIOS2_DERIVED_PARSER_EXPHELPER_H_
+
+#include <map>
+#include <string>
+#include <tuple>
+#include <vector>
+
+namespace adios2
+{
+namespace detail
+{
+
+enum ExpressionOperator
+{
+    OP_NULL,
+    OP_ALIAS, /* Parser-use only */
+    OP_PATH,  /* Parser-use only */
+    OP_NUM,   /* Parser-use only */
+    OP_INDEX,
+    OP_ADD,
+    OP_SQRT,
+    OP_POW,
+    OP_MAGN
+};
+
+struct OperatorProperty
+{
+    std::string name;
+    bool is_associative;
+};
+
+const std::map<ExpressionOperator, OperatorProperty> op_property = {
+    {ExpressionOperator::OP_NULL, {"NULL", false}},
+    {ExpressionOperator::OP_ALIAS, {"ALIAS", false}}, /* Parser-use only */
+    {ExpressionOperator::OP_PATH, {"PATH", false}},   /* Parser-use only */
+    {ExpressionOperator::OP_NUM, {"NUM", false}},     /* Parser-use only */
+    {ExpressionOperator::OP_INDEX, {"INDEX", false}},
+    {ExpressionOperator::OP_ADD, {"ADD", true}},
+    {ExpressionOperator::OP_SQRT, {"SQRT", false}},
+    {ExpressionOperator::OP_POW, {"POW", false}},
+    {ExpressionOperator::OP_MAGN, {"MAGNITUDE", false}}};
+
+const std::map<std::string, ExpressionOperator> string_to_op = {
+    {"ALIAS", ExpressionOperator::OP_ALIAS}, /* Parser-use only */
+    {"PATH", ExpressionOperator::OP_PATH},   /* Parser-use only */
+    {"NUM", ExpressionOperator::OP_NUM},     /* Parser-use only */
+    {"INDEX", ExpressionOperator::OP_INDEX},    {"+", ExpressionOperator::OP_ADD},
+    {"add", ExpressionOperator::OP_ADD},        {"ADD", ExpressionOperator::OP_ADD},
+    {"SQRT", ExpressionOperator::OP_SQRT},      {"sqrt", ExpressionOperator::OP_SQRT},
+    {"POW", ExpressionOperator::OP_POW},        {"^", ExpressionOperator::OP_POW},
+    {"MAGNITUDE", ExpressionOperator::OP_MAGN}, {"magnitude", ExpressionOperator::OP_MAGN}};
+
+inline std::string get_op_name(ExpressionOperator op) { return op_property.at(op).name; }
+
+inline ExpressionOperator get_op(std::string op) { return string_to_op.at(op); }
+
+}
+}
+#endif
\ No newline at end of file
diff --git a/source/adios2/toolkit/derived/Expression.cpp b/source/adios2/toolkit/derived/Expression.cpp
new file mode 100644
index 0000000000..d773236526
--- /dev/null
+++ b/source/adios2/toolkit/derived/Expression.cpp
@@ -0,0 +1,226 @@
+#ifndef ADIOS2_DERIVED_Expression_CPP_
+#define ADIOS2_DERIVED_Expression_CPP_
+
+#include "Expression.h"
+#include "parser/ASTNode.h"
+#include "parser/parser.h"
+
+namespace adios2
+{
+namespace detail
+{
+adios2::derived::ExpressionTree ASTNode_to_ExpressionTree(adios2::detail::ASTNode *node)
+{
+    adios2::derived::ExpressionTree exprTree_node(node->operation);
+    for (adios2::detail::ASTNode *e : node->sub_expr)
+    {
+        switch (e->operation)
+        {
+        case adios2::detail::ExpressionOperator::OP_ALIAS: // add variable given by alias
+            // add an index operation in the chain if the variable contains indeces
+            /*if (e->lookup_var_indices(e->alias) != "")
+            {
+                ExpressionTree index_expr(adios2::detail::ExpressionOperator::OP_INDEX);
+                index_expr.set_indeces(e->lookup_var_indices(e->alias));
+                index_expr.add_child(e->lookup_var_path(e->alias));
+                expTree_node->add_child(expr);
+            }*/
+            exprTree_node.add_child(e->lookup_var_path(e->alias));
+            break;
+        case adios2::detail::ExpressionOperator::OP_PATH: // add variable name
+            exprTree_node.add_child(e->alias);
+            break;
+        case adios2::detail::ExpressionOperator::OP_NUM: // set the base value for the operation
+            exprTree_node.set_base(e->value);
+            break;
+        default: // if the children nodes are other expressions, convert them to expressions
+            auto temp_node = ASTNode_to_ExpressionTree(e);
+            // move from a binary to a multinary tree if the child has the same operation
+            if (e->operation == node->operation &&
+                adios2::detail::op_property.at(e->operation).is_associative)
+            {
+                // concatenate exprTree with temp_node
+                for (std::tuple<adios2::derived::ExpressionTree, std::string, bool> childTree :
+                     temp_node.sub_exprs)
+                {
+                    if (std::get<2>(childTree) == true)
+                        exprTree_node.add_child(std::get<0>(childTree));
+                    else
+                        exprTree_node.add_child(std::get<1>(childTree));
+                }
+            }
+            else
+            {
+                exprTree_node.add_child(temp_node);
+            }
+        }
+    }
+    return exprTree_node;
+}
+}
+
+namespace derived
+{
+
+Expression::Expression(std::string string_exp)
+: ExprString(string_exp), m_Shape({0}), m_Start({0}), m_Count({0})
+{
+    adios2::detail::ASTNode *root_node = adios2::detail::parse_expression(string_exp);
+    m_Expr = adios2::detail::ASTNode_to_ExpressionTree(root_node);
+}
+
+std::vector<std::string> Expression::VariableNameList() { return m_Expr.VariableNameList(); }
+
+Dims Expression::GetShape() { return m_Shape; }
+
+Dims Expression::GetStart() { return m_Start; }
+
+Dims Expression::GetCount() { return m_Count; }
+
+void Expression::SetDims(std::map<std::string, std::tuple<Dims, Dims, Dims>> NameToDims)
+{
+    std::map<std::string, Dims> NameToCount, NameToStart, NameToShape;
+    for (const auto &it : NameToDims)
+    {
+        NameToStart[it.first] = std::get<0>(it.second);
+        NameToCount[it.first] = std::get<1>(it.second);
+        NameToShape[it.first] = std::get<2>(it.second);
+    }
+    m_Count = m_Expr.GetDims(NameToCount);
+    m_Start = m_Expr.GetDims(NameToStart);
+    m_Shape = m_Expr.GetDims(NameToShape);
+}
+
+std::vector<DerivedData>
+Expression::ApplyExpression(DataType type, size_t numBlocks,
+                            std::map<std::string, std::vector<DerivedData>> nameToData)
+{
+    return m_Expr.ApplyExpression(type, numBlocks, nameToData);
+}
+
+void ExpressionTree::set_base(double c) { detail.constant = c; }
+
+void ExpressionTree::set_indeces(std::vector<std::tuple<size_t, size_t, size_t>> index_list)
+{
+    detail.indices = index_list;
+}
+
+void ExpressionTree::add_child(ExpressionTree exp) { sub_exprs.push_back({exp, "", true}); }
+
+void ExpressionTree::add_child(std::string var)
+{
+    sub_exprs.push_back({ExpressionTree(), var, false});
+}
+
+std::vector<std::string> ExpressionTree::VariableNameList()
+{
+    std::vector<std::string> var_list;
+    for (auto subexp : sub_exprs)
+    {
+        // if the sub_expression is a leaf
+        if (!std::get<2>(subexp))
+        {
+            var_list.push_back(std::get<1>(subexp));
+        }
+        else
+        {
+            auto subexpr_list = std::get<0>(subexp).VariableNameList();
+            var_list.insert(var_list.end(), subexpr_list.begin(), subexpr_list.end());
+        }
+    }
+    return var_list;
+}
+
+void ExpressionTree::print()
+{
+    std::cout << "Print Expression:" << std::endl;
+    std::cout << "\toperation: " << get_op_name(detail.operation) << std::endl;
+    std::cout << "\tconstant: " << detail.constant << std::endl;
+    std::cout << "\tchildren: " << sub_exprs.size() << std::endl;
+
+    for (std::tuple<ExpressionTree, std::string, bool> t : sub_exprs)
+    {
+        if (std::get<2>(t) == true)
+        {
+            std::get<0>(t).print();
+        }
+        else
+        {
+            std::cout << "string: " << std::get<1>(t) << std::endl;
+        }
+    }
+}
+
+Dims ExpressionTree::GetDims(std::map<std::string, Dims> NameToDims)
+{
+    std::vector<Dims> exprDims;
+    for (auto subexp : sub_exprs)
+    {
+        // if the sub_expression is a leaf, we get the shape from the input std::map
+        if (!std::get<2>(subexp))
+        {
+            exprDims.push_back(NameToDims[std::get<1>(subexp)]);
+        }
+        else
+        {
+            exprDims.push_back(std::get<0>(subexp).GetDims(NameToDims));
+        }
+    }
+    // get the output dimensions after applying the operator
+    auto op_fct = OpFunctions.at(detail.operation);
+    Dims opDims = op_fct.DimsFct(exprDims);
+    return opDims;
+}
+
+std::vector<DerivedData>
+ExpressionTree::ApplyExpression(DataType type, size_t numBlocks,
+                                std::map<std::string, std::vector<DerivedData>> nameToData)
+{
+    // create operands for the computation function
+    // exprData[0] = list of void* data for block 0 for each variable
+    std::vector<std::vector<DerivedData>> exprData(numBlocks);
+    std::vector<bool> dealocate;
+    for (auto subexp : sub_exprs)
+    {
+        if (!std::get<2>(subexp))
+        {
+            // do not dealocate leafs (this is user data)
+            dealocate.push_back(false);
+            for (size_t blk = 0; blk < numBlocks; blk++)
+            {
+                exprData[blk].push_back(nameToData[std::get<1>(subexp)][blk]);
+            }
+        }
+        else
+        {
+            dealocate.push_back(true);
+            auto subexpData = std::get<0>(subexp).ApplyExpression(type, numBlocks, nameToData);
+            for (size_t blk = 0; blk < numBlocks; blk++)
+            {
+                exprData[blk].push_back(subexpData[blk]);
+            }
+        }
+    }
+    // apply the computation operator on all blocks
+    std::vector<DerivedData> outputData(numBlocks);
+    auto op_fct = OpFunctions.at(detail.operation);
+    for (size_t blk = 0; blk < numBlocks; blk++)
+    {
+        outputData[blk] = op_fct.ComputeFct(exprData[blk], type);
+    }
+    // deallocate intermediate data after computing the operation
+    for (size_t blk = 0; blk < numBlocks; blk++)
+    {
+        for (size_t i = 0; i < exprData[blk].size(); i++)
+        {
+            if (dealocate[i] == false)
+                continue;
+            free(exprData[blk][i].Data);
+        }
+    }
+    return outputData;
+}
+
+}
+}
+#endif
diff --git a/source/adios2/toolkit/derived/Expression.h b/source/adios2/toolkit/derived/Expression.h
new file mode 100644
index 0000000000..6f60f1c262
--- /dev/null
+++ b/source/adios2/toolkit/derived/Expression.h
@@ -0,0 +1,85 @@
+#ifndef ADIOS2_DERIVED_Expression_H_
+#define ADIOS2_DERIVED_Expression_H_
+
+#include "Function.h"
+#include "adios2/common/ADIOSTypes.h"
+#include <string>
+#include <unordered_map>
+
+namespace adios2
+{
+
+namespace derived
+{
+/*
+ A Note on ExpressionTree:
+ - Sub expressions can include another operation node or a variable name
+    - the third entry in the tuple distinguishes between variable and operation
+ - OpInfo contains information about the operation stoder in the node
+    - The type of the operation
+    - Indexing/Slicing: detail is indices (std::vector<std::tuple<size_t start, size_t end, size_t
+ stride>>, e.g. for each dimension start:end:stride
+    - Constant used to compute the operation [e.g. log_2]
+ */
+struct OpInfo
+{
+    adios2::detail::ExpressionOperator operation;
+    std::vector<std::tuple<size_t, size_t, size_t>> indices;
+    double constant;
+};
+
+class ExpressionTree
+{
+public:
+    std::vector<std::tuple<ExpressionTree, std::string, bool>> sub_exprs;
+    OpInfo detail;
+
+    ExpressionTree(){};
+    ExpressionTree(adios2::detail::ExpressionOperator o) : detail({o, {}, 0}) {}
+    ExpressionTree(adios2::detail::ExpressionOperator o, double c) : detail({o, {}, 0}) {}
+    ExpressionTree(std::vector<std::tuple<size_t, size_t, size_t>> indices)
+    : detail({adios2::detail::ExpressionOperator ::OP_INDEX, indices, 0})
+    {
+    }
+
+    void set_base(double c);
+    void set_indeces(std::vector<std::tuple<size_t, size_t, size_t>> index_list);
+
+    void add_child(ExpressionTree exp);
+    void add_child(std::string var);
+
+    std::vector<std::string> VariableNameList();
+    Dims GetDims(std::map<std::string, Dims> NameToDims);
+    std::vector<DerivedData>
+    ApplyExpression(DataType type, size_t numBlocks,
+                    std::map<std::string, std::vector<DerivedData>> nameToData);
+    void print();
+};
+
+class Expression
+{
+    ExpressionTree m_Expr;
+
+    Dims m_Shape;
+    Dims m_Start;
+    Dims m_Count;
+
+public:
+    Expression() = default;
+    Expression(std::string expression);
+
+    std::string ExprString;
+
+    Dims GetShape();
+    Dims GetStart();
+    Dims GetCount();
+    void SetDims(std::map<std::string, std::tuple<Dims, Dims, Dims>> NameToDims);
+    std::vector<std::string> VariableNameList();
+    std::vector<DerivedData>
+    ApplyExpression(DataType type, size_t numBlocks,
+                    std::map<std::string, std::vector<DerivedData>> nameToData);
+};
+
+}
+}
+#endif
diff --git a/source/adios2/toolkit/derived/Function.cpp b/source/adios2/toolkit/derived/Function.cpp
new file mode 100644
index 0000000000..c524ac8c15
--- /dev/null
+++ b/source/adios2/toolkit/derived/Function.cpp
@@ -0,0 +1,74 @@
+#ifndef ADIOS2_DERIVED_Function_CPP_
+#define ADIOS2_DERIVED_Function_CPP_
+
+#include "Function.h"
+#include "Function.tcc"
+#include "adios2/common/ADIOSMacros.h"
+#include "adios2/helper/adiosFunctions.h"
+#include <cmath>
+
+namespace adios2
+{
+namespace derived
+{
+
+DerivedData AddFunc(std::vector<DerivedData> inputData, DataType type)
+{
+    size_t dataSize = std::accumulate(std::begin(inputData[0].Count), std::end(inputData[0].Count),
+                                      1, std::multiplies<size_t>());
+
+#define declare_type_add(T)                                                                        \
+    if (type == helper::GetDataType<T>())                                                          \
+    {                                                                                              \
+        T *addValues = ApplyOneToOne<T>(inputData, dataSize, [](T a, T b) { return a + b; });      \
+        return DerivedData({(void *)addValues, inputData[0].Start, inputData[0].Count});           \
+    }
+    ADIOS2_FOREACH_PRIMITIVE_STDTYPE_1ARG(declare_type_add)
+    helper::Throw<std::invalid_argument>("Derived", "Function", "AddFunc",
+                                         "Invalid variable types");
+    return DerivedData();
+}
+
+DerivedData MagnitudeFunc(std::vector<DerivedData> inputData, DataType type)
+{
+    size_t dataSize = std::accumulate(std::begin(inputData[0].Count), std::end(inputData[0].Count),
+                                      1, std::multiplies<size_t>());
+#define declare_type_mag(T)                                                                        \
+    if (type == helper::GetDataType<T>())                                                          \
+    {                                                                                              \
+        T *magValues = ApplyOneToOne<T>(inputData, dataSize, [](T a, T b) { return a + b * b; });  \
+        for (size_t i = 0; i < dataSize; i++)                                                      \
+        {                                                                                          \
+            magValues[i] = std::sqrt(magValues[i]);                                                \
+        }                                                                                          \
+        return DerivedData({(void *)magValues, inputData[0].Start, inputData[0].Count});           \
+    }
+    ADIOS2_FOREACH_PRIMITIVE_STDTYPE_1ARG(declare_type_mag)
+    helper::Throw<std::invalid_argument>("Derived", "Function", "MagnitudeFunc",
+                                         "Invalid variable types");
+    return DerivedData();
+}
+
+Dims SameDimsFunc(std::vector<Dims> input)
+{
+    // check that all dimenstions are the same
+    if (input.size() > 1)
+    {
+        bool dim_are_equal = std::equal(input.begin() + 1, input.end(), input.begin());
+        if (!dim_are_equal)
+            helper::Throw<std::invalid_argument>("Derived", "Function", "SameDimFunc",
+                                                 "Invalid variable dimensions");
+    }
+    // return the first dimension
+    return input[0];
+}
+
+#define declare_template_instantiation(T)                                                          \
+    T *ApplyOneToOne(std::vector<DerivedData>, size_t, std::function<T(T, T)>);
+
+ADIOS2_FOREACH_PRIMITIVE_STDTYPE_1ARG(declare_template_instantiation)
+#undef declare_template_instantiation
+
+}
+} // namespace adios2
+#endif
diff --git a/source/adios2/toolkit/derived/Function.h b/source/adios2/toolkit/derived/Function.h
new file mode 100644
index 0000000000..5dfa5aba97
--- /dev/null
+++ b/source/adios2/toolkit/derived/Function.h
@@ -0,0 +1,42 @@
+#ifndef ADIOS2_DERIVED_Function_H_
+#define ADIOS2_DERIVED_Function_H_
+
+#include "ExprHelper.h"
+#include "adios2/common/ADIOSTypes.h"
+#include "adios2/helper/adiosLog.h"
+#include <functional>
+
+namespace adios2
+{
+namespace derived
+{
+
+struct DerivedData
+{
+    void *Data;
+    Dims Start;
+    Dims Count;
+};
+
+struct OperatorFunctions
+{
+    std::function<DerivedData(std::vector<DerivedData>, DataType)> ComputeFct;
+    std::function<Dims(std::vector<Dims>)> DimsFct;
+};
+
+DerivedData AddFunc(std::vector<DerivedData> input, DataType type);
+DerivedData MagnitudeFunc(std::vector<DerivedData> input, DataType type);
+
+Dims SameDimsFunc(std::vector<Dims> input);
+
+const std::map<adios2::detail::ExpressionOperator, OperatorFunctions> OpFunctions = {
+    {adios2::detail::ExpressionOperator::OP_ADD, {AddFunc, SameDimsFunc}},
+    {adios2::detail::ExpressionOperator::OP_MAGN, {MagnitudeFunc, SameDimsFunc}}};
+
+template <class T>
+T *ApplyOneToOne(std::vector<DerivedData> inputData, size_t dataSize,
+                 std::function<T(T, T)> compFct);
+
+}
+}
+#endif
diff --git a/source/adios2/toolkit/derived/Function.tcc b/source/adios2/toolkit/derived/Function.tcc
new file mode 100644
index 0000000000..1c47c1665b
--- /dev/null
+++ b/source/adios2/toolkit/derived/Function.tcc
@@ -0,0 +1,39 @@
+#ifndef ADIOS2_DERIVED_Function_TCC_
+#define ADIOS2_DERIVED_Function_TCC_
+
+#include "Function.h"
+#include <algorithm>
+#include <cstring>
+#include <iostream>
+#include <numeric>
+
+namespace adios2
+{
+namespace derived
+{
+
+template <class T>
+T *ApplyOneToOne(std::vector<DerivedData> inputData, size_t dataSize,
+                 std::function<T(T, T)> compFct)
+{
+    T *outValues = (T *)malloc(dataSize * sizeof(T));
+    if (outValues == nullptr)
+    {
+        std::cout << "Allocation failed for the derived data" << std::endl;
+        // TODO - throw an exception
+    }
+    memset(outValues, 0, dataSize * sizeof(T));
+    for (auto &variable : inputData)
+    {
+        for (size_t i = 0; i < dataSize; i++)
+        {
+            T data = *(reinterpret_cast<T *>(variable.Data) + i);
+            outValues[i] = compFct(outValues[i], data);
+        }
+    }
+    return outValues;
+}
+
+}
+}
+#endif
diff --git a/source/adios2/toolkit/derived/parser/ASTNode.cpp b/source/adios2/toolkit/derived/parser/ASTNode.cpp
new file mode 100644
index 0000000000..5cbc07322d
--- /dev/null
+++ b/source/adios2/toolkit/derived/parser/ASTNode.cpp
@@ -0,0 +1,143 @@
+#ifndef ADIOS2_DERIVED_PARSER_ASTNODE_CPP_
+#define ADIOS2_DERIVED_PARSER_ASTNODE_CPP_
+
+#include "ASTNode.h"
+
+namespace adios2
+{
+namespace detail
+{
+
+/*****************************************/
+// alias maps to pair of path and indices (indices may be empty string)
+std::map<std::string, std::pair<std::string, std::string>> ASTNode::var_lookup;
+
+ASTNode::ASTNode() {}
+
+ASTNode::ASTNode(ExpressionOperator op) : operation(op) {}
+
+ASTNode::ASTNode(ExpressionOperator op, const char *str) : operation(op)
+{
+    switch (operation)
+    {
+    case ExpressionOperator::OP_ALIAS:
+        alias = str;
+        break;
+    case ExpressionOperator::OP_PATH:
+        alias = str;
+        break;
+    case ExpressionOperator::OP_INDEX:
+        indices = str;
+        break;
+    default:
+        // TODO: Make some error
+        // std::cout << "***That's a problem... ASTNode constructed with string should be alias
+        // type, path type,  or index type\n";
+        break;
+    }
+}
+
+ASTNode::ASTNode(ExpressionOperator op, double val) : operation(op), value(val) {}
+
+ASTNode::ASTNode(ExpressionOperator op, ASTNode *e) : operation(op) { sub_expr.push_back(e); }
+
+// for index
+ASTNode::ASTNode(ExpressionOperator op, ASTNode *e, const char *str) : operation(op), indices(str)
+{
+    sub_expr.push_back(e);
+}
+
+ASTNode::ASTNode(ExpressionOperator op, ASTNode *e1, ASTNode *e2) : operation(op)
+{
+    sub_expr.push_back(e1);
+    sub_expr.push_back(e2);
+}
+
+// Copy constructor
+ASTNode::ASTNode(const ASTNode &e)
+: operation(e.operation), alias(e.alias), value(e.value), sub_expr(e.sub_expr)
+{
+}
+
+ASTNode::~ASTNode()
+{
+    for (ASTNode *e : sub_expr)
+    {
+        delete e;
+    }
+}
+
+std::pair<std::string, std::string> ASTNode::lookup_var(const std::string var_alias)
+{
+    return var_lookup[var_alias];
+}
+
+std::string ASTNode::lookup_var_path(const std::string var_alias)
+{
+    return var_lookup[var_alias].first;
+}
+
+std::string ASTNode::lookup_var_indices(const std::string var_alias)
+{
+    return var_lookup[var_alias].second;
+}
+
+void ASTNode::add_lookup_entry(const std::string alias, const std::string var_name,
+                               const std::string indices)
+{
+    // std::cout << "Adding alias to lookup table:\n\talias: " << alias << "\n\tvar_name: " <<
+    // var_name << "\n\tindices: " << indices <<  std::endl;
+    var_lookup[alias] = std::make_pair(var_name, indices);
+}
+
+void ASTNode::add_subexpr(ASTNode *e) { sub_expr.push_back(e); }
+
+void ASTNode::add_back_subexpr(ASTNode *e, size_t n)
+{
+    size_t index = sub_expr.size() - n;
+    // std::cout << "ASTNode add_back_subexpr index: " << index << std::endl;
+    // if (index > 0 && sub_expr[index] == nullptr)
+    sub_expr[index] = e;
+}
+
+void ASTNode::extend_subexprs(size_t n)
+{
+    // std::cout << "ASTNode extending subexprs from size " << sub_expr.size() << " to " <<
+    // (sub_expr.size() + n) << std::endl;
+    sub_expr.resize(sub_expr.size() + n);
+}
+
+void ASTNode::printpretty(std::string indent)
+{
+    std::cout << indent << get_op_name(operation) << ":";
+    if (operation == ExpressionOperator::OP_ALIAS)
+    {
+        std::cout << " (alias " << alias << " maps to Variable '";
+        std::cout << lookup_var_path(alias) << "'";
+        if (lookup_var_indices(alias) != "")
+        {
+            std::cout << " [" << lookup_var_indices(alias) << "]";
+        }
+        std::cout << ")";
+    }
+    else if (operation == ExpressionOperator::OP_PATH)
+    {
+        std::cout << " (" << alias << ")";
+    }
+    else if (operation == ExpressionOperator::OP_INDEX)
+    {
+        std::cout << " [" << indices << "]";
+    }
+    std::cout << std::endl;
+    for (ASTNode *e : sub_expr)
+    {
+        if (e != nullptr)
+            e->printpretty(indent + "    ");
+        else
+            std::cout << "sub_expr is nullptr" << std::endl;
+    }
+}
+
+}
+}
+#endif
diff --git a/source/adios2/toolkit/derived/parser/ASTNode.h b/source/adios2/toolkit/derived/parser/ASTNode.h
new file mode 100644
index 0000000000..72cec1a812
--- /dev/null
+++ b/source/adios2/toolkit/derived/parser/ASTNode.h
@@ -0,0 +1,58 @@
+#ifndef ADIOS2_DERIVED_PARSER_ASTNODE_H_
+#define ADIOS2_DERIVED_PARSER_ASTNODE_H_
+
+#include <iostream>
+#include <map>
+#include <string>
+#include <vector>
+
+#include "../ExprHelper.h"
+
+/*****************************************/
+
+namespace adios2
+{
+namespace detail
+{
+
+class ASTNode
+{
+public:
+    ASTNode();
+    ASTNode(ExpressionOperator);
+    ASTNode(ExpressionOperator, const char *a);
+    ASTNode(ExpressionOperator, double val);
+    ASTNode(ExpressionOperator, ASTNode *e);
+    ASTNode(ExpressionOperator, ASTNode *e, const char *i);
+    ASTNode(ExpressionOperator, ASTNode *e1, ASTNode *e2);
+
+    // Copy constructor
+    ASTNode(const ASTNode &e);
+
+    ~ASTNode();
+
+    static std::pair<std::string, std::string> lookup_var(const std::string var_alias);
+    static std::string lookup_var_path(const std::string var_alias);
+    static std::string lookup_var_indices(const std::string var_alias);
+    static void add_lookup_entry(const std::string alias, const std::string var_name,
+                                 const std::string indices);
+
+    void add_subexpr(ASTNode *e);
+    void add_back_subexpr(ASTNode *e, size_t i);
+    void extend_subexprs(size_t n);
+    void infer_type();
+    void printpretty(std::string indent = "");
+
+    // private:
+    ExpressionOperator operation;
+    std::string alias;
+    std::string indices;
+    double value;
+    std::vector<ASTNode *> sub_expr;
+
+    static std::map<std::string, std::pair<std::string, std::string>> var_lookup;
+};
+
+}
+}
+#endif
\ No newline at end of file
diff --git a/source/adios2/toolkit/derived/parser/lexer.cpp b/source/adios2/toolkit/derived/parser/lexer.cpp
new file mode 100644
index 0000000000..03b101c191
--- /dev/null
+++ b/source/adios2/toolkit/derived/parser/lexer.cpp
@@ -0,0 +1,1884 @@
+#line 1 "lexer.cpp"
+
+#line 3 "lexer.cpp"
+
+#define YY_INT_ALIGNED short int
+
+/* A lexical scanner generated by flex */
+
+#define FLEX_SCANNER
+#define YY_FLEX_MAJOR_VERSION 2
+#define YY_FLEX_MINOR_VERSION 6
+#define YY_FLEX_SUBMINOR_VERSION 4
+#if YY_FLEX_SUBMINOR_VERSION > 0
+#define FLEX_BETA
+#endif
+
+/* First, we deal with  platform-specific or compiler-specific issues. */
+
+/* begin standard C headers. */
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* end standard C headers. */
+
+/* flex integer type definitions */
+
+#ifndef FLEXINT_H
+#define FLEXINT_H
+
+/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */
+
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+
+/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
+ * if you want the limit (max/min) macros for int types.
+ */
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS 1
+#endif
+
+#include <inttypes.h>
+typedef int8_t flex_int8_t;
+typedef uint8_t flex_uint8_t;
+typedef int16_t flex_int16_t;
+typedef uint16_t flex_uint16_t;
+typedef int32_t flex_int32_t;
+typedef uint32_t flex_uint32_t;
+typedef uint64_t flex_uint64_t;
+#else
+typedef signed char flex_int8_t;
+typedef short int flex_int16_t;
+typedef int flex_int32_t;
+typedef unsigned char flex_uint8_t;
+typedef unsigned short int flex_uint16_t;
+typedef unsigned int flex_uint32_t;
+
+/* Limits of integral types. */
+#ifndef INT8_MIN
+#define INT8_MIN (-128)
+#endif
+#ifndef INT16_MIN
+#define INT16_MIN (-32767 - 1)
+#endif
+#ifndef INT32_MIN
+#define INT32_MIN (-2147483647 - 1)
+#endif
+#ifndef INT8_MAX
+#define INT8_MAX (127)
+#endif
+#ifndef INT16_MAX
+#define INT16_MAX (32767)
+#endif
+#ifndef INT32_MAX
+#define INT32_MAX (2147483647)
+#endif
+#ifndef UINT8_MAX
+#define UINT8_MAX (255U)
+#endif
+#ifndef UINT16_MAX
+#define UINT16_MAX (65535U)
+#endif
+#ifndef UINT32_MAX
+#define UINT32_MAX (4294967295U)
+#endif
+
+#ifndef SIZE_MAX
+#define SIZE_MAX (~(size_t)0)
+#endif
+
+#endif /* ! C99 */
+
+#endif /* ! FLEXINT_H */
+
+/* begin standard C++ headers. */
+
+/* TODO: this is always defined, so inline it */
+#define yyconst const
+
+#if defined(__GNUC__) && __GNUC__ >= 3
+#define yynoreturn __attribute__((__noreturn__))
+#else
+#define yynoreturn
+#endif
+
+/* Returned upon end-of-file. */
+#define YY_NULL 0
+
+/* Promotes a possibly negative, possibly signed char to an
+ *   integer in range [0..255] for use as an array index.
+ */
+#define YY_SC_TO_UI(c) ((YY_CHAR)(c))
+
+/* Enter a start condition.  This macro really ought to take a parameter,
+ * but we do it the disgusting crufty way forced on us by the ()-less
+ * definition of BEGIN.
+ */
+#define BEGIN (yy_start) = 1 + 2 *
+/* Translate the current start state into a value that can be later handed
+ * to BEGIN to return to the state.  The YYSTATE alias is for lex
+ * compatibility.
+ */
+#define YY_START (((yy_start)-1) / 2)
+#define YYSTATE YY_START
+/* Action number for EOF rule of a given start state. */
+#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)
+/* Special action meaning "start processing a new file". */
+#define YY_NEW_FILE yyrestart(yyin)
+#define YY_END_OF_BUFFER_CHAR 0
+
+/* Size of default input buffer. */
+#ifndef YY_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k.
+ * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case.
+ * Ditto for the __ia64__ case accordingly.
+ */
+#define YY_BUF_SIZE 32768
+#else
+#define YY_BUF_SIZE 16384
+#endif /* __ia64__ */
+#endif
+
+/* The state buf must be large enough to hold one state per character in the main buffer.
+ */
+#define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type))
+
+#ifndef YY_TYPEDEF_YY_BUFFER_STATE
+#define YY_TYPEDEF_YY_BUFFER_STATE
+typedef struct yy_buffer_state *YY_BUFFER_STATE;
+#endif
+
+#ifndef YY_TYPEDEF_YY_SIZE_T
+#define YY_TYPEDEF_YY_SIZE_T
+typedef size_t yy_size_t;
+#endif
+
+extern yy_size_t yyleng;
+
+extern FILE *yyin, *yyout;
+
+#define EOB_ACT_CONTINUE_SCAN 0
+#define EOB_ACT_END_OF_FILE 1
+#define EOB_ACT_LAST_MATCH 2
+
+#define YY_LESS_LINENO(n)
+#define YY_LINENO_REWIND_TO(ptr)
+
+/* Return all but the first "n" matched characters back to the input stream. */
+#define yyless(n)                                                                                  \
+    do                                                                                             \
+    {                                                                                              \
+        /* Undo effects of setting up yytext. */                                                   \
+        int yyless_macro_arg = (n);                                                                \
+        YY_LESS_LINENO(yyless_macro_arg);                                                          \
+        *yy_cp = (yy_hold_char);                                                                   \
+        YY_RESTORE_YY_MORE_OFFSET(yy_c_buf_p) = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ;    \
+        YY_DO_BEFORE_ACTION; /* set up yytext again */                                             \
+    } while (0)
+#define unput(c) yyunput(c, (yytext_ptr))
+
+#ifndef YY_STRUCT_YY_BUFFER_STATE
+#define YY_STRUCT_YY_BUFFER_STATE
+struct yy_buffer_state
+{
+    FILE *yy_input_file;
+
+    char *yy_ch_buf;  /* input buffer */
+    char *yy_buf_pos; /* current position in input buffer */
+
+    /* Size of input buffer in bytes, not including room for EOB
+     * characters.
+     */
+    int yy_buf_size;
+
+    /* Number of characters read into yy_ch_buf, not including EOB
+     * characters.
+     */
+    yy_size_t yy_n_chars;
+
+    /* Whether we "own" the buffer - i.e., we know we created it,
+     * and can realloc() it to grow it, and should free() it to
+     * delete it.
+     */
+    int yy_is_our_buffer;
+
+    /* Whether this is an "interactive" input source; if so, and
+     * if we're using stdio for input, then we want to use getc()
+     * instead of fread(), to make sure we stop fetching input after
+     * each newline.
+     */
+    int yy_is_interactive;
+
+    /* Whether we're considered to be at the beginning of a line.
+     * If so, '^' rules will be active on the next match, otherwise
+     * not.
+     */
+    int yy_at_bol;
+
+    int yy_bs_lineno; /**< The line count. */
+    int yy_bs_column; /**< The column count. */
+
+    /* Whether to try to fill the input buffer when we reach the
+     * end of it.
+     */
+    int yy_fill_buffer;
+
+    int yy_buffer_status;
+
+#define YY_BUFFER_NEW 0
+#define YY_BUFFER_NORMAL 1
+    /* When an EOF's been seen but there's still some text to process
+     * then we mark the buffer as YY_EOF_PENDING, to indicate that we
+     * shouldn't try reading from the input source any more.  We might
+     * still have a bunch of tokens to match, though, because of
+     * possible backing-up.
+     *
+     * When we actually see the EOF, we change the status to "new"
+     * (via yyrestart()), so that the user can continue scanning by
+     * just pointing yyin at a new input file.
+     */
+#define YY_BUFFER_EOF_PENDING 2
+};
+#endif /* !YY_STRUCT_YY_BUFFER_STATE */
+
+/* Stack of input buffers. */
+static size_t yy_buffer_stack_top = 0;          /**< index of top of stack. */
+static size_t yy_buffer_stack_max = 0;          /**< capacity of stack. */
+static YY_BUFFER_STATE *yy_buffer_stack = NULL; /**< Stack as an array. */
+
+/* We provide macros for accessing buffer states in case in the
+ * future we want to put the buffer states in a more general
+ * "scanner state".
+ *
+ * Returns the top of the stack, or NULL.
+ */
+#define YY_CURRENT_BUFFER ((yy_buffer_stack) ? (yy_buffer_stack)[(yy_buffer_stack_top)] : NULL)
+/* Same as previous macro, but useful when we know that the buffer stack is not
+ * NULL or when we need an lvalue. For internal use only.
+ */
+#define YY_CURRENT_BUFFER_LVALUE (yy_buffer_stack)[(yy_buffer_stack_top)]
+
+/* yy_hold_char holds the character lost when yytext is formed. */
+static char yy_hold_char;
+static yy_size_t yy_n_chars; /* number of characters read into yy_ch_buf */
+yy_size_t yyleng;
+
+/* Points to current character in buffer. */
+static char *yy_c_buf_p = NULL;
+static int yy_init = 0;  /* whether we need to initialize */
+static int yy_start = 0; /* start state number */
+
+/* Flag which is used to allow yywrap()'s to do buffer switches
+ * instead of setting up a fresh yyin.  A bit of a hack ...
+ */
+static int yy_did_buffer_switch_on_eof;
+
+void yyrestart(FILE *input_file);
+void yy_switch_to_buffer(YY_BUFFER_STATE new_buffer);
+YY_BUFFER_STATE yy_create_buffer(FILE *file, int size);
+void yy_delete_buffer(YY_BUFFER_STATE b);
+void yy_flush_buffer(YY_BUFFER_STATE b);
+void yypush_buffer_state(YY_BUFFER_STATE new_buffer);
+void yypop_buffer_state(void);
+
+static void yyensure_buffer_stack(void);
+static void yy_load_buffer_state(void);
+static void yy_init_buffer(YY_BUFFER_STATE b, FILE *file);
+#define YY_FLUSH_BUFFER yy_flush_buffer(YY_CURRENT_BUFFER)
+
+YY_BUFFER_STATE yy_scan_buffer(char *base, yy_size_t size);
+YY_BUFFER_STATE yy_scan_string(const char *yy_str);
+YY_BUFFER_STATE yy_scan_bytes(const char *bytes, yy_size_t len);
+
+void *yyalloc(yy_size_t);
+void *yyrealloc(void *, yy_size_t);
+void yyfree(void *);
+
+#define yy_new_buffer yy_create_buffer
+#define yy_set_interactive(is_interactive)                                                         \
+    {                                                                                              \
+        if (!YY_CURRENT_BUFFER)                                                                    \
+        {                                                                                          \
+            yyensure_buffer_stack();                                                               \
+            YY_CURRENT_BUFFER_LVALUE = yy_create_buffer(yyin, YY_BUF_SIZE);                        \
+        }                                                                                          \
+        YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive;                              \
+    }
+#define yy_set_bol(at_bol)                                                                         \
+    {                                                                                              \
+        if (!YY_CURRENT_BUFFER)                                                                    \
+        {                                                                                          \
+            yyensure_buffer_stack();                                                               \
+            YY_CURRENT_BUFFER_LVALUE = yy_create_buffer(yyin, YY_BUF_SIZE);                        \
+        }                                                                                          \
+        YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol;                                              \
+    }
+#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol)
+
+/* Begin user sect3 */
+
+#define yywrap() (/*CONSTCOND*/ 1)
+#define YY_SKIP_YYWRAP
+typedef flex_uint8_t YY_CHAR;
+
+FILE *yyin = NULL, *yyout = NULL;
+
+typedef int yy_state_type;
+
+extern int yylineno;
+int yylineno = 1;
+
+extern char *yytext;
+#ifdef yytext_ptr
+#undef yytext_ptr
+#endif
+#define yytext_ptr yytext
+
+static yy_state_type yy_get_previous_state(void);
+static yy_state_type yy_try_NUL_trans(yy_state_type current_state);
+static int yy_get_next_buffer(void);
+static void yynoreturn yy_fatal_error(const char *msg);
+
+/* Done after the current pattern has been matched and before the
+ * corresponding action - sets up yytext.
+ */
+#define YY_DO_BEFORE_ACTION                                                                        \
+    (yytext_ptr) = yy_bp;                                                                          \
+    yyleng = (yy_size_t)(yy_cp - yy_bp);                                                           \
+    (yy_hold_char) = *yy_cp;                                                                       \
+    *yy_cp = '\0';                                                                                 \
+    (yy_c_buf_p) = yy_cp;
+#define YY_NUM_RULES 24
+#define YY_END_OF_BUFFER 25
+/* This struct is not used in this scanner,
+   but its presence is necessary. */
+struct yy_trans_info
+{
+    flex_int32_t yy_verify;
+    flex_int32_t yy_nxt;
+};
+static const flex_int16_t yy_accept[91] = {
+    0,  0,  0,  25, 23, 17, 18, 23, 23, 6, 7,  3,  1,  8,  2,  23, 4,  16, 23, 19, 19, 23, 5,
+    19, 19, 19, 19, 19, 18, 17, 0,  0,  0, 16, 16, 16, 19, 19, 0,  20, 19, 0,  0,  19, 19, 19,
+    19, 19, 19, 19, 0,  21, 0,  0,  16, 0, 0,  16, 20, 20, 18, 0,  22, 0,  0,  9,  12, 19, 19,
+    11, 19, 13, 18, 0,  16, 20, 0,  0,  0, 15, 19, 10, 0,  0,  19, 0,  19, 19, 19, 14, 0};
+
+static const YY_CHAR yy_ec[256] = {
+    0,  1,  1,  1,  1,  1,  1,  1,  1,  2,  3,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+    1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  2,  1,  1,  1,  4,  1,  1,  5,  6,  7,
+    8,  9,  10, 11, 12, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 1,  1,  1,  1,
+    1,  1,  16, 16, 16, 16, 17, 18, 16, 16, 16, 16, 16, 16, 16, 16, 19, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 20, 21, 22, 23, 24, 1,  25, 26, 27, 28,
+
+    29, 26, 30, 26, 31, 26, 26, 32, 33, 34, 35, 26, 36, 37, 38, 39, 40, 26, 26, 26, 26,
+    26, 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+    1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+    1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+    1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+
+    1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+    1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+    1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1};
+
+static const YY_CHAR yy_meta[42] = {0, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 3, 4, 5, 6, 1, 6, 6, 6, 6, 1,
+                                    5, 1, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 1};
+
+static const flex_int16_t yy_base[98] = {
+    0,   0,   0,   221, 254, 218, 254, 190, 29,  254, 254, 254, 254, 254, 254, 203, 254,
+    32,  34,  204, 33,  42,  254, 39,  42,  47,  48,  50,  254, 213, 180, 50,  75,  199,
+    52,  56,  200, 81,  84,  89,  89,  101, 103, 66,  53,  75,  92,  94,  97,  102, 183,
+    254, 116, 128, 118, 134, 196, 112, 140, 144, 196, 105, 254, 145, 152, 195, 193, 138,
+    144, 157, 132, 126, 254, 162, 117, 166, 174, 167, 181, 113, 173, 71,  178, 184, 160,
+    187, 188, 190, 134, 64,  254, 224, 227, 229, 234, 238, 243, 247
+
+};
+
+static const flex_int16_t yy_def[98] = {
+    0,  90, 1,  90, 90, 90, 90, 90, 91, 90, 90, 90, 90, 90, 90, 90, 90, 92, 93, 92,
+    92, 90, 90, 92, 92, 92, 92, 92, 90, 90, 90, 91, 94, 90, 90, 92, 92, 92, 93, 95,
+    92, 90, 90, 92, 92, 92, 92, 92, 92, 92, 90, 90, 96, 94, 90, 90, 90, 92, 97, 95,
+    92, 90, 90, 90, 90, 92, 92, 92, 92, 92, 92, 92, 90, 96, 90, 97, 90, 90, 90, 92,
+    92, 92, 90, 90, 92, 90, 92, 92, 92, 92, 0,  90, 90, 90, 90, 90, 90, 90
+
+};
+
+static const flex_int16_t yy_nxt[296] = {
+    0,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 19, 19, 21, 4,  4,  22,
+    19, 23, 19, 24, 19, 19, 19, 19, 19, 25, 19, 19, 19, 19, 26, 27, 19, 28, 31, 31, 34, 90, 35, 38,
+    38, 37, 31, 90, 40, 31, 90, 38, 41, 42, 38, 90, 90, 37, 90, 31, 31, 90, 54, 43, 34, 55, 35, 31,
+    46, 37, 31, 49, 90, 44, 90, 47, 51, 55, 45, 90, 48, 37, 52, 90, 52, 52, 56, 66, 56, 90, 65, 57,
+    52, 38, 38, 52, 58,
+
+    90, 58, 58, 90, 38, 90, 60, 38, 90, 58, 61, 67, 58, 90, 41, 42, 63, 64, 76, 77, 51, 68, 62, 90,
+    90, 57, 52, 69, 52, 52, 74, 54, 51, 70, 55, 71, 52, 90, 52, 52, 52, 52, 56, 90, 56, 90, 55, 74,
+    52, 90, 58, 52, 58, 58, 58, 90, 58, 58, 63, 64, 58, 61, 89, 58, 58, 78, 51, 58, 90, 79, 81, 90,
+    52, 62, 52, 52, 58, 80, 58, 58, 82, 83, 52, 61, 90, 52, 58, 76, 77, 58, 61, 82, 83, 61, 78, 62,
+    61, 85, 86, 90,
+
+    85, 90, 62, 84, 90, 62, 90, 90, 62, 74, 72, 90, 33, 50, 29, 90, 33, 88, 30, 29, 90, 90, 90, 90,
+    90, 90, 90, 87, 32, 32, 36, 90, 36, 39, 39, 53, 53, 90, 53, 53, 59, 90, 59, 59, 73, 73, 90, 73,
+    73, 75, 90, 75, 75, 3,  90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90,
+    90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90
+
+};
+
+static const flex_int16_t yy_chk[296] = {
+    0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+    1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  8,  8,  17, 20, 17, 18,
+    18, 17, 8,  23, 20, 8,  24, 18, 21, 21, 18, 25, 26, 17, 27, 31, 31, 44, 34, 23, 35, 34, 35, 31,
+    25, 35, 31, 27, 89, 24, 43, 26, 32, 34, 24, 81, 26, 35, 32, 45, 32, 32, 37, 44, 37, 37, 43, 37,
+    32, 38, 38, 32, 39,
+
+    40, 39, 39, 46, 38, 47, 40, 38, 48, 39, 41, 45, 39, 49, 41, 41, 42, 42, 61, 61, 52, 46, 41, 57,
+    79, 57, 52, 47, 52, 52, 74, 54, 53, 48, 54, 49, 52, 71, 53, 52, 53, 53, 55, 70, 55, 88, 54, 55,
+    53, 67, 58, 53, 58, 58, 59, 68, 59, 59, 63, 63, 58, 64, 88, 58, 59, 64, 73, 59, 69, 67, 70, 84,
+    73, 64, 73, 73, 75, 68, 75, 75, 77, 77, 73, 76, 80, 73, 75, 76, 76, 75, 78, 82, 82, 83, 78, 76,
+    85, 83, 84, 86,
+
+    85, 87, 78, 80, 66, 83, 65, 60, 85, 56, 50, 36, 33, 30, 29, 19, 15, 87, 7,  5,  3,  0,  0,  0,
+    0,  0,  0,  86, 91, 91, 92, 0,  92, 93, 93, 94, 94, 0,  94, 94, 95, 0,  95, 95, 96, 96, 0,  96,
+    96, 97, 0,  97, 97, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90,
+    90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90
+
+};
+
+static yy_state_type yy_last_accepting_state;
+static char *yy_last_accepting_cpos;
+
+extern int yy_flex_debug;
+int yy_flex_debug = 0;
+
+/* The intent behind this definition is that it'll catch
+ * any uses of REJECT which flex missed.
+ */
+#define REJECT reject_used_but_not_detected
+#define yymore() yymore_used_but_not_detected
+#define YY_MORE_ADJ 0
+#define YY_RESTORE_YY_MORE_OFFSET
+char *yytext;
+#line 1 "lexer.l"
+#line 2 "lexer.l"
+#include "parser.h"
+#define YY_USER_ACTION yylloc.first_line = yylloc.last_line = yylineno;
+#line 544 "lexer.cpp"
+#line 545 "lexer.cpp"
+
+#define INITIAL 0
+
+#ifndef YY_NO_UNISTD_H
+/* Special case for "unistd.h", since it is non-ANSI. We include it way
+ * down here because we want the user's section 1 to have been scanned first.
+ * The user has a chance to override it with an option.
+ */
+#include <unistd.h>
+#endif
+
+#ifndef YY_EXTRA_TYPE
+#define YY_EXTRA_TYPE void *
+#endif
+
+static int yy_init_globals(void);
+
+/* Accessor methods to globals.
+   These are made visible to non-reentrant scanners for convenience. */
+
+int yylex_destroy(void);
+
+int yyget_debug(void);
+
+void yyset_debug(int debug_flag);
+
+YY_EXTRA_TYPE yyget_extra(void);
+
+void yyset_extra(YY_EXTRA_TYPE user_defined);
+
+FILE *yyget_in(void);
+
+void yyset_in(FILE *_in_str);
+
+FILE *yyget_out(void);
+
+void yyset_out(FILE *_out_str);
+
+yy_size_t yyget_leng(void);
+
+char *yyget_text(void);
+
+int yyget_lineno(void);
+
+void yyset_lineno(int _line_number);
+
+/* Macros after this point can all be overridden by user definitions in
+ * section 1.
+ */
+
+#ifndef YY_SKIP_YYWRAP
+#ifdef __cplusplus
+extern "C" int yywrap(void);
+#else
+extern int yywrap(void);
+#endif
+#endif
+
+#ifndef YY_NO_UNPUT
+
+static void yyunput(int c, char *buf_ptr);
+
+#endif
+
+#ifndef yytext_ptr
+static void yy_flex_strncpy(char *, const char *, int);
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen(const char *);
+#endif
+
+#ifndef YY_NO_INPUT
+#ifdef __cplusplus
+static int yyinput(void);
+#else
+static int input(void);
+#endif
+
+#endif
+
+/* Amount of stuff to slurp up with each read. */
+#ifndef YY_READ_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k */
+#define YY_READ_BUF_SIZE 16384
+#else
+#define YY_READ_BUF_SIZE 8192
+#endif /* __ia64__ */
+#endif
+
+/* Copy whatever the last rule matched to the standard output. */
+#ifndef ECHO
+/* This used to be an fputs(), but since the string might contain NUL's,
+ * we now use fwrite().
+ */
+#define ECHO                                                                                       \
+    do                                                                                             \
+    {                                                                                              \
+        if (fwrite(yytext, (size_t)yyleng, 1, yyout))                                              \
+        {                                                                                          \
+        }                                                                                          \
+    } while (0)
+#endif
+
+/* Gets input and stuffs it into "buf".  number of characters read, or YY_NULL,
+ * is returned in "result".
+ */
+#ifndef YY_INPUT
+#define YY_INPUT(buf, result, max_size)                                                            \
+    if (YY_CURRENT_BUFFER_LVALUE->yy_is_interactive)                                               \
+    {                                                                                              \
+        int c = '*';                                                                               \
+        yy_size_t n;                                                                               \
+        for (n = 0; n < max_size && (c = getc(yyin)) != EOF && c != '\n'; ++n)                     \
+            buf[n] = (char)c;                                                                      \
+        if (c == '\n')                                                                             \
+            buf[n++] = (char)c;                                                                    \
+        if (c == EOF && ferror(yyin))                                                              \
+            YY_FATAL_ERROR("input in flex scanner failed");                                        \
+        result = n;                                                                                \
+    }                                                                                              \
+    else                                                                                           \
+    {                                                                                              \
+        errno = 0;                                                                                 \
+        while ((result = (int)fread(buf, 1, (yy_size_t)max_size, yyin)) == 0 && ferror(yyin))      \
+        {                                                                                          \
+            if (errno != EINTR)                                                                    \
+            {                                                                                      \
+                YY_FATAL_ERROR("input in flex scanner failed");                                    \
+                break;                                                                             \
+            }                                                                                      \
+            errno = 0;                                                                             \
+            clearerr(yyin);                                                                        \
+        }                                                                                          \
+    }
+
+#endif
+
+/* No semi-colon after return; correct usage is to write "yyterminate();" -
+ * we don't want an extra ';' after the "return" because that will cause
+ * some compilers to complain about unreachable statements.
+ */
+#ifndef yyterminate
+#define yyterminate() return YY_NULL
+#endif
+
+/* Number of entries by which start-condition stack grows. */
+#ifndef YY_START_STACK_INCR
+#define YY_START_STACK_INCR 25
+#endif
+
+/* Report a fatal error. */
+#ifndef YY_FATAL_ERROR
+#define YY_FATAL_ERROR(msg) yy_fatal_error(msg)
+#endif
+
+/* end tables serialization structures and prototypes */
+
+/* Default declaration of generated scanner - a define so the user can
+ * easily add parameters.
+ */
+#ifndef YY_DECL
+#define YY_DECL_IS_OURS 1
+
+extern int yylex(void);
+
+#define YY_DECL int yylex(void)
+#endif /* !YY_DECL */
+
+/* Code executed at the beginning of each rule, after yytext and yyleng
+ * have been set up.
+ */
+#ifndef YY_USER_ACTION
+#define YY_USER_ACTION
+#endif
+
+/* Code executed at the end of each rule. */
+#ifndef YY_BREAK
+#define YY_BREAK /*LINTED*/ break;
+#endif
+
+#define YY_RULE_SETUP YY_USER_ACTION
+
+/** The main scanner function which does all the work.
+ */
+YY_DECL
+{
+    yy_state_type yy_current_state;
+    char *yy_cp, *yy_bp;
+    int yy_act;
+
+    if (!(yy_init))
+    {
+        (yy_init) = 1;
+
+#ifdef YY_USER_INIT
+        YY_USER_INIT;
+#endif
+
+        if (!(yy_start))
+            (yy_start) = 1; /* first start state */
+
+        if (!yyin)
+            yyin = stdin;
+
+        if (!yyout)
+            yyout = stdout;
+
+        if (!YY_CURRENT_BUFFER)
+        {
+            yyensure_buffer_stack();
+            YY_CURRENT_BUFFER_LVALUE = yy_create_buffer(yyin, YY_BUF_SIZE);
+        }
+
+        yy_load_buffer_state();
+    }
+
+    {
+#line 12 "lexer.l"
+
+#line 765 "lexer.cpp"
+
+        while (/*CONSTCOND*/ 1) /* loops until end-of-file is reached */
+        {
+            yy_cp = (yy_c_buf_p);
+
+            /* Support of yytext. */
+            *yy_cp = (yy_hold_char);
+
+            /* yy_bp points to the position in yy_ch_buf of the start of
+             * the current run.
+             */
+            yy_bp = yy_cp;
+
+            yy_current_state = (yy_start);
+        yy_match:
+            do
+            {
+                YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)];
+                if (yy_accept[yy_current_state])
+                {
+                    (yy_last_accepting_state) = yy_current_state;
+                    (yy_last_accepting_cpos) = yy_cp;
+                }
+                while (yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state)
+                {
+                    yy_current_state = (int)yy_def[yy_current_state];
+                    if (yy_current_state >= 91)
+                        yy_c = yy_meta[yy_c];
+                }
+                yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c];
+                ++yy_cp;
+            } while (yy_base[yy_current_state] != 254);
+
+        yy_find_action:
+            yy_act = yy_accept[yy_current_state];
+            if (yy_act == 0)
+            { /* have to back up */
+                yy_cp = (yy_last_accepting_cpos);
+                yy_current_state = (yy_last_accepting_state);
+                yy_act = yy_accept[yy_current_state];
+            }
+
+            YY_DO_BEFORE_ACTION;
+
+        do_action: /* This label is used only to access EOF actions. */
+
+            switch (yy_act)
+            {       /* beginning of action switch */
+            case 0: /* must back up */
+                /* undo the effects of YY_DO_BEFORE_ACTION */
+                *yy_cp = (yy_hold_char);
+                yy_cp = (yy_last_accepting_cpos);
+                yy_current_state = (yy_last_accepting_state);
+                goto yy_find_action;
+
+            case 1:
+                YY_RULE_SETUP
+#line 14 "lexer.l"
+                {
+                    yylval.sval = strdup(yytext);
+                    return OPERATOR;
+                }
+                YY_BREAK
+            case 2:
+                YY_RULE_SETUP
+#line 15 "lexer.l"
+                {
+                    yylval.sval = strdup(yytext);
+                    return OPERATOR;
+                }
+                YY_BREAK
+            case 3:
+                YY_RULE_SETUP
+#line 16 "lexer.l"
+                {
+                    yylval.sval = strdup(yytext);
+                    return OPERATOR;
+                }
+                YY_BREAK
+            case 4:
+                YY_RULE_SETUP
+#line 17 "lexer.l"
+                {
+                    yylval.sval = strdup(yytext);
+                    return OPERATOR;
+                }
+                YY_BREAK
+            case 5:
+                YY_RULE_SETUP
+#line 18 "lexer.l"
+                {
+                    yylval.sval = strdup(yytext);
+                    return OPERATOR;
+                }
+                YY_BREAK
+            case 6:
+                YY_RULE_SETUP
+#line 19 "lexer.l"
+                {
+                    return L_PAREN;
+                }
+                YY_BREAK
+            case 7:
+                YY_RULE_SETUP
+#line 20 "lexer.l"
+                {
+                    return R_PAREN;
+                }
+                YY_BREAK
+            case 8:
+                YY_RULE_SETUP
+#line 21 "lexer.l"
+                {
+                    return COMMA;
+                }
+                YY_BREAK
+            case 9:
+                YY_RULE_SETUP
+#line 22 "lexer.l"
+                {
+                    yylval.sval = strdup(yytext);
+                    return FUNCTION;
+                }
+                YY_BREAK
+            case 10:
+                YY_RULE_SETUP
+#line 23 "lexer.l"
+                {
+                    yylval.sval = strdup(yytext);
+                    return FUNCTION;
+                }
+                YY_BREAK
+            case 11:
+                YY_RULE_SETUP
+#line 24 "lexer.l"
+                {
+                    yylval.sval = strdup(yytext);
+                    return FUNCTION;
+                }
+                YY_BREAK
+            case 12:
+                YY_RULE_SETUP
+#line 25 "lexer.l"
+                {
+                    yylval.sval = strdup(yytext);
+                    return FUNCTION;
+                }
+                YY_BREAK
+            case 13:
+                YY_RULE_SETUP
+#line 26 "lexer.l"
+                {
+                    yylval.sval = strdup(yytext);
+                    return FUNCTION;
+                }
+                YY_BREAK
+            case 14:
+                YY_RULE_SETUP
+#line 27 "lexer.l"
+                {
+                    yylval.sval = strdup(yytext);
+                    return FUNCTION;
+                }
+                YY_BREAK
+            case 15:
+                YY_RULE_SETUP
+#line 28 "lexer.l"
+                {
+                    yylval.sval = strdup(yytext);
+                    return FUNCTION;
+                }
+                YY_BREAK
+            case 16:
+                YY_RULE_SETUP
+#line 31 "lexer.l"
+                {
+                    yylval.dval = atof(yytext);
+                    return NUMBER;
+                }
+                YY_BREAK
+            case 17:
+                YY_RULE_SETUP
+#line 33 "lexer.l"
+                { /* ignore spaces */
+                }
+                YY_BREAK
+            case 18:
+                /* rule 18 can match eol */
+                YY_RULE_SETUP
+#line 35 "lexer.l"
+                {
+                    return ENDL;
+                }
+                YY_BREAK
+            case 19:
+                YY_RULE_SETUP
+#line 37 "lexer.l"
+                {
+                    yylval.sval = strdup(yytext);
+                    return ALIAS;
+                }
+                YY_BREAK
+            case 20:
+                YY_RULE_SETUP
+#line 39 "lexer.l"
+                {
+                    yylval.sval = strndup(yytext + 1, strlen(yytext) - 1);
+                    return PATH;
+                }
+                YY_BREAK
+            case 21:
+                YY_RULE_SETUP
+#line 41 "lexer.l"
+                {
+                    yylval.sval = strndup(yytext + 1, strlen(yytext) - 2);
+                    return PATH;
+                }
+                YY_BREAK
+            case 22:
+                YY_RULE_SETUP
+#line 43 "lexer.l"
+                {
+                    yylval.sval = strndup(yytext + 1, strlen(yytext) - 2);
+                    return INDICES;
+                }
+                YY_BREAK
+            case 23:
+                YY_RULE_SETUP
+#line 45 "lexer.l"
+                {
+                    printf("Error at line %d: unrecognized symbol \"%s\"\n", yylloc.first_line,
+                           yytext);
+                    exit(0);
+                }
+                YY_BREAK
+            case 24:
+                YY_RULE_SETUP
+#line 47 "lexer.l"
+                ECHO;
+                YY_BREAK
+#line 943 "lexer.cpp"
+            case YY_STATE_EOF(INITIAL):
+                yyterminate();
+
+            case YY_END_OF_BUFFER: {
+                /* Amount of text matched not including the EOB char. */
+                int yy_amount_of_matched_text = (int)(yy_cp - (yytext_ptr)) - 1;
+
+                /* Undo the effects of YY_DO_BEFORE_ACTION. */
+                *yy_cp = (yy_hold_char);
+                YY_RESTORE_YY_MORE_OFFSET
+
+                if (YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW)
+                {
+                    /* We're scanning a new file or input source.  It's
+                     * possible that this happened because the user
+                     * just pointed yyin at a new source and called
+                     * yylex().  If so, then we have to assure
+                     * consistency between YY_CURRENT_BUFFER and our
+                     * globals.  Here is the right place to do so, because
+                     * this is the first action (other than possibly a
+                     * back-up) that will match for the new input source.
+                     */
+                    (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
+                    YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin;
+                    YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL;
+                }
+
+                /* Note that here we test for yy_c_buf_p "<=" to the position
+                 * of the first EOB in the buffer, since yy_c_buf_p will
+                 * already have been incremented past the NUL character
+                 * (since all states make transitions on EOB to the
+                 * end-of-buffer state).  Contrast this with the test
+                 * in input().
+                 */
+                if ((yy_c_buf_p) <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)])
+                { /* This was really a NUL. */
+                    yy_state_type yy_next_state;
+
+                    (yy_c_buf_p) = (yytext_ptr) + yy_amount_of_matched_text;
+
+                    yy_current_state = yy_get_previous_state();
+
+                    /* Okay, we're now positioned to make the NUL
+                     * transition.  We couldn't have
+                     * yy_get_previous_state() go ahead and do it
+                     * for us because it doesn't know how to deal
+                     * with the possibility of jamming (and we don't
+                     * want to build jamming into it because then it
+                     * will run more slowly).
+                     */
+
+                    yy_next_state = yy_try_NUL_trans(yy_current_state);
+
+                    yy_bp = (yytext_ptr) + YY_MORE_ADJ;
+
+                    if (yy_next_state)
+                    {
+                        /* Consume the NUL. */
+                        yy_cp = ++(yy_c_buf_p);
+                        yy_current_state = yy_next_state;
+                        goto yy_match;
+                    }
+
+                    else
+                    {
+                        yy_cp = (yy_c_buf_p);
+                        goto yy_find_action;
+                    }
+                }
+
+                else
+                    switch (yy_get_next_buffer())
+                    {
+                    case EOB_ACT_END_OF_FILE: {
+                        (yy_did_buffer_switch_on_eof) = 0;
+
+                        if (yywrap())
+                        {
+                            /* Note: because we've taken care in
+                             * yy_get_next_buffer() to have set up
+                             * yytext, we can now set up
+                             * yy_c_buf_p so that if some total
+                             * hoser (like flex itself) wants to
+                             * call the scanner after we return the
+                             * YY_NULL, it'll still work - another
+                             * YY_NULL will get returned.
+                             */
+                            (yy_c_buf_p) = (yytext_ptr) + YY_MORE_ADJ;
+
+                            yy_act = YY_STATE_EOF(YY_START);
+                            goto do_action;
+                        }
+
+                        else
+                        {
+                            if (!(yy_did_buffer_switch_on_eof))
+                                YY_NEW_FILE;
+                        }
+                        break;
+                    }
+
+                    case EOB_ACT_CONTINUE_SCAN:
+                        (yy_c_buf_p) = (yytext_ptr) + yy_amount_of_matched_text;
+
+                        yy_current_state = yy_get_previous_state();
+
+                        yy_cp = (yy_c_buf_p);
+                        yy_bp = (yytext_ptr) + YY_MORE_ADJ;
+                        goto yy_match;
+
+                    case EOB_ACT_LAST_MATCH:
+                        (yy_c_buf_p) = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)];
+
+                        yy_current_state = yy_get_previous_state();
+
+                        yy_cp = (yy_c_buf_p);
+                        yy_bp = (yytext_ptr) + YY_MORE_ADJ;
+                        goto yy_find_action;
+                    }
+                break;
+            }
+
+            default:
+                YY_FATAL_ERROR("fatal flex scanner internal error--no action found");
+            } /* end of action switch */
+        }     /* end of scanning one token */
+    }         /* end of user's declarations */
+} /* end of yylex */
+
+/* yy_get_next_buffer - try to read in a new buffer
+ *
+ * Returns a code representing an action:
+ *	EOB_ACT_LAST_MATCH -
+ *	EOB_ACT_CONTINUE_SCAN - continue scanning from current position
+ *	EOB_ACT_END_OF_FILE - end of file
+ */
+static int yy_get_next_buffer(void)
+{
+    char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
+    char *source = (yytext_ptr);
+    int number_to_move, i;
+    int ret_val;
+
+    if ((yy_c_buf_p) > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1])
+        YY_FATAL_ERROR("fatal flex scanner internal error--end of buffer missed");
+
+    if (YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0)
+    { /* Don't try to fill the buffer, so this is an EOF. */
+        if ((yy_c_buf_p) - (yytext_ptr)-YY_MORE_ADJ == 1)
+        {
+            /* We matched a single character, the EOB, so
+             * treat this as a final EOF.
+             */
+            return EOB_ACT_END_OF_FILE;
+        }
+
+        else
+        {
+            /* We matched some text prior to the EOB, first
+             * process it.
+             */
+            return EOB_ACT_LAST_MATCH;
+        }
+    }
+
+    /* Try to read more data. */
+
+    /* First move last chars to start of buffer. */
+    number_to_move = (int)((yy_c_buf_p) - (yytext_ptr)-1);
+
+    for (i = 0; i < number_to_move; ++i)
+        *(dest++) = *(source++);
+
+    if (YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING)
+        /* don't do the read, it's not guaranteed to return an EOF,
+         * just force an EOF
+         */
+        YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars) = 0;
+
+    else
+    {
+        yy_size_t num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1;
+
+        while (num_to_read <= 0)
+        { /* Not enough room in the buffer - grow it. */
+
+            /* just a shorter name for the current buffer */
+            YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE;
+
+            int yy_c_buf_p_offset = (int)((yy_c_buf_p)-b->yy_ch_buf);
+
+            if (b->yy_is_our_buffer)
+            {
+                yy_size_t new_size = b->yy_buf_size * 2;
+
+                if (new_size <= 0)
+                    b->yy_buf_size += b->yy_buf_size / 8;
+                else
+                    b->yy_buf_size *= 2;
+
+                b->yy_ch_buf = (char *)
+                    /* Include room in for 2 EOB chars. */
+                    yyrealloc((void *)b->yy_ch_buf, (yy_size_t)(b->yy_buf_size + 2));
+            }
+            else
+                /* Can't grow it, we don't own it. */
+                b->yy_ch_buf = NULL;
+
+            if (!b->yy_ch_buf)
+                YY_FATAL_ERROR("fatal error - scanner input buffer overflow");
+
+            (yy_c_buf_p) = &b->yy_ch_buf[yy_c_buf_p_offset];
+
+            num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1;
+        }
+
+        if (num_to_read > YY_READ_BUF_SIZE)
+            num_to_read = YY_READ_BUF_SIZE;
+
+        /* Read in more data. */
+        YY_INPUT((&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]), (yy_n_chars), num_to_read);
+
+        YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
+    }
+
+    if ((yy_n_chars) == 0)
+    {
+        if (number_to_move == YY_MORE_ADJ)
+        {
+            ret_val = EOB_ACT_END_OF_FILE;
+            yyrestart(yyin);
+        }
+
+        else
+        {
+            ret_val = EOB_ACT_LAST_MATCH;
+            YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_EOF_PENDING;
+        }
+    }
+
+    else
+        ret_val = EOB_ACT_CONTINUE_SCAN;
+
+    if (((yy_n_chars) + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size)
+    {
+        /* Extend the array by 50%, plus the number we really need. */
+        yy_size_t new_size = (yy_n_chars) + number_to_move + ((yy_n_chars) >> 1);
+        YY_CURRENT_BUFFER_LVALUE->yy_ch_buf =
+            (char *)yyrealloc((void *)YY_CURRENT_BUFFER_LVALUE->yy_ch_buf, (yy_size_t)new_size);
+        if (!YY_CURRENT_BUFFER_LVALUE->yy_ch_buf)
+            YY_FATAL_ERROR("out of dynamic memory in yy_get_next_buffer()");
+        /* "- 2" to take care of EOB's */
+        YY_CURRENT_BUFFER_LVALUE->yy_buf_size = (int)(new_size - 2);
+    }
+
+    (yy_n_chars) += number_to_move;
+    YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] = YY_END_OF_BUFFER_CHAR;
+    YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] = YY_END_OF_BUFFER_CHAR;
+
+    (yytext_ptr) = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0];
+
+    return ret_val;
+}
+
+/* yy_get_previous_state - get the state just before the EOB char was reached */
+
+static yy_state_type yy_get_previous_state(void)
+{
+    yy_state_type yy_current_state;
+    char *yy_cp;
+
+    yy_current_state = (yy_start);
+
+    for (yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp)
+    {
+        YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 41);
+        if (yy_accept[yy_current_state])
+        {
+            (yy_last_accepting_state) = yy_current_state;
+            (yy_last_accepting_cpos) = yy_cp;
+        }
+        while (yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state)
+        {
+            yy_current_state = (int)yy_def[yy_current_state];
+            if (yy_current_state >= 91)
+                yy_c = yy_meta[yy_c];
+        }
+        yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c];
+    }
+
+    return yy_current_state;
+}
+
+/* yy_try_NUL_trans - try to make a transition on the NUL character
+ *
+ * synopsis
+ *	next_state = yy_try_NUL_trans( current_state );
+ */
+static yy_state_type yy_try_NUL_trans(yy_state_type yy_current_state)
+{
+    int yy_is_jam;
+    char *yy_cp = (yy_c_buf_p);
+
+    YY_CHAR yy_c = 41;
+    if (yy_accept[yy_current_state])
+    {
+        (yy_last_accepting_state) = yy_current_state;
+        (yy_last_accepting_cpos) = yy_cp;
+    }
+    while (yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state)
+    {
+        yy_current_state = (int)yy_def[yy_current_state];
+        if (yy_current_state >= 91)
+            yy_c = yy_meta[yy_c];
+    }
+    yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c];
+    yy_is_jam = (yy_current_state == 90);
+
+    return yy_is_jam ? 0 : yy_current_state;
+}
+
+#ifndef YY_NO_UNPUT
+
+static void yyunput(int c, char *yy_bp)
+{
+    char *yy_cp;
+
+    yy_cp = (yy_c_buf_p);
+
+    /* undo effects of setting up yytext */
+    *yy_cp = (yy_hold_char);
+
+    if (yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2)
+    { /* need to shift things up to make room */
+        /* +2 for EOB chars. */
+        yy_size_t number_to_move = (yy_n_chars) + 2;
+        char *dest =
+            &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[YY_CURRENT_BUFFER_LVALUE->yy_buf_size + 2];
+        char *source = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move];
+
+        while (source > YY_CURRENT_BUFFER_LVALUE->yy_ch_buf)
+            *--dest = *--source;
+
+        yy_cp += (int)(dest - source);
+        yy_bp += (int)(dest - source);
+        YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars) =
+            (int)YY_CURRENT_BUFFER_LVALUE->yy_buf_size;
+
+        if (yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2)
+            YY_FATAL_ERROR("flex scanner push-back overflow");
+    }
+
+    *--yy_cp = (char)c;
+
+    (yytext_ptr) = yy_bp;
+    (yy_hold_char) = *yy_cp;
+    (yy_c_buf_p) = yy_cp;
+}
+
+#endif
+
+#ifndef YY_NO_INPUT
+#ifdef __cplusplus
+static int yyinput(void)
+#else
+static int input(void)
+#endif
+
+{
+    int c;
+
+    *(yy_c_buf_p) = (yy_hold_char);
+
+    if (*(yy_c_buf_p) == YY_END_OF_BUFFER_CHAR)
+    {
+        /* yy_c_buf_p now points to the character we want to return.
+         * If this occurs *before* the EOB characters, then it's a
+         * valid NUL; if not, then we've hit the end of the buffer.
+         */
+        if ((yy_c_buf_p) < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)])
+            /* This was really a NUL. */
+            *(yy_c_buf_p) = '\0';
+
+        else
+        { /* need more input */
+            yy_size_t offset = (yy_c_buf_p) - (yytext_ptr);
+            ++(yy_c_buf_p);
+
+            switch (yy_get_next_buffer())
+            {
+            case EOB_ACT_LAST_MATCH:
+                /* This happens because yy_g_n_b()
+                 * sees that we've accumulated a
+                 * token and flags that we need to
+                 * try matching the token before
+                 * proceeding.  But for input(),
+                 * there's no matching to consider.
+                 * So convert the EOB_ACT_LAST_MATCH
+                 * to EOB_ACT_END_OF_FILE.
+                 */
+
+                /* Reset buffer status. */
+                yyrestart(yyin);
+
+                /*FALLTHROUGH*/
+
+            case EOB_ACT_END_OF_FILE: {
+                if (yywrap())
+                    return 0;
+
+                if (!(yy_did_buffer_switch_on_eof))
+                    YY_NEW_FILE;
+#ifdef __cplusplus
+                return yyinput();
+#else
+                return input();
+#endif
+            }
+
+            case EOB_ACT_CONTINUE_SCAN:
+                (yy_c_buf_p) = (yytext_ptr) + offset;
+                break;
+            }
+        }
+    }
+
+    c = *(unsigned char *)(yy_c_buf_p); /* cast for 8-bit char's */
+    *(yy_c_buf_p) = '\0';               /* preserve yytext */
+    (yy_hold_char) = *++(yy_c_buf_p);
+
+    return c;
+}
+#endif /* ifndef YY_NO_INPUT */
+
+/** Immediately switch to a different input stream.
+ * @param input_file A readable stream.
+ *
+ * @note This function does not reset the start condition to @c INITIAL .
+ */
+void yyrestart(FILE *input_file)
+{
+
+    if (!YY_CURRENT_BUFFER)
+    {
+        yyensure_buffer_stack();
+        YY_CURRENT_BUFFER_LVALUE = yy_create_buffer(yyin, YY_BUF_SIZE);
+    }
+
+    yy_init_buffer(YY_CURRENT_BUFFER, input_file);
+    yy_load_buffer_state();
+}
+
+/** Switch to a different input buffer.
+ * @param new_buffer The new input buffer.
+ *
+ */
+void yy_switch_to_buffer(YY_BUFFER_STATE new_buffer)
+{
+
+    /* TODO. We should be able to replace this entire function body
+     * with
+     *		yypop_buffer_state();
+     *		yypush_buffer_state(new_buffer);
+     */
+    yyensure_buffer_stack();
+    if (YY_CURRENT_BUFFER == new_buffer)
+        return;
+
+    if (YY_CURRENT_BUFFER)
+    {
+        /* Flush out information for old buffer. */
+        *(yy_c_buf_p) = (yy_hold_char);
+        YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
+        YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
+    }
+
+    YY_CURRENT_BUFFER_LVALUE = new_buffer;
+    yy_load_buffer_state();
+
+    /* We don't actually know whether we did this switch during
+     * EOF (yywrap()) processing, but the only time this flag
+     * is looked at is after yywrap() is called, so it's safe
+     * to go ahead and always set it.
+     */
+    (yy_did_buffer_switch_on_eof) = 1;
+}
+
+static void yy_load_buffer_state(void)
+{
+    (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
+    (yytext_ptr) = (yy_c_buf_p) = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos;
+    yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file;
+    (yy_hold_char) = *(yy_c_buf_p);
+}
+
+/** Allocate and initialize an input buffer state.
+ * @param file A readable stream.
+ * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE.
+ *
+ * @return the allocated buffer state.
+ */
+YY_BUFFER_STATE yy_create_buffer(FILE *file, int size)
+{
+    YY_BUFFER_STATE b;
+
+    b = (YY_BUFFER_STATE)yyalloc(sizeof(struct yy_buffer_state));
+    if (!b)
+        YY_FATAL_ERROR("out of dynamic memory in yy_create_buffer()");
+
+    b->yy_buf_size = size;
+
+    /* yy_ch_buf has to be 2 characters longer than the size given because
+     * we need to put in 2 end-of-buffer characters.
+     */
+    b->yy_ch_buf = (char *)yyalloc((yy_size_t)(b->yy_buf_size + 2));
+    if (!b->yy_ch_buf)
+        YY_FATAL_ERROR("out of dynamic memory in yy_create_buffer()");
+
+    b->yy_is_our_buffer = 1;
+
+    yy_init_buffer(b, file);
+
+    return b;
+}
+
+/** Destroy the buffer.
+ * @param b a buffer created with yy_create_buffer()
+ *
+ */
+void yy_delete_buffer(YY_BUFFER_STATE b)
+{
+
+    if (!b)
+        return;
+
+    if (b == YY_CURRENT_BUFFER) /* Not sure if we should pop here. */
+        YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE)0;
+
+    if (b->yy_is_our_buffer)
+        yyfree((void *)b->yy_ch_buf);
+
+    yyfree((void *)b);
+}
+
+/* Initializes or reinitializes a buffer.
+ * This function is sometimes called more than once on the same buffer,
+ * such as during a yyrestart() or at EOF.
+ */
+static void yy_init_buffer(YY_BUFFER_STATE b, FILE *file)
+
+{
+    int oerrno = errno;
+
+    yy_flush_buffer(b);
+
+    b->yy_input_file = file;
+    b->yy_fill_buffer = 1;
+
+    /* If b is the current buffer, then yy_init_buffer was _probably_
+     * called from yyrestart() or through yy_get_next_buffer.
+     * In that case, we don't want to reset the lineno or column.
+     */
+    if (b != YY_CURRENT_BUFFER)
+    {
+        b->yy_bs_lineno = 1;
+        b->yy_bs_column = 0;
+    }
+
+    b->yy_is_interactive = file ? (isatty(fileno(file)) > 0) : 0;
+
+    errno = oerrno;
+}
+
+/** Discard all buffered characters. On the next scan, YY_INPUT will be called.
+ * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER.
+ *
+ */
+void yy_flush_buffer(YY_BUFFER_STATE b)
+{
+    if (!b)
+        return;
+
+    b->yy_n_chars = 0;
+
+    /* We always need two end-of-buffer characters.  The first causes
+     * a transition to the end-of-buffer state.  The second causes
+     * a jam in that state.
+     */
+    b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
+    b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
+
+    b->yy_buf_pos = &b->yy_ch_buf[0];
+
+    b->yy_at_bol = 1;
+    b->yy_buffer_status = YY_BUFFER_NEW;
+
+    if (b == YY_CURRENT_BUFFER)
+        yy_load_buffer_state();
+}
+
+/** Pushes the new state onto the stack. The new state becomes
+ *  the current state. This function will allocate the stack
+ *  if necessary.
+ *  @param new_buffer The new state.
+ *
+ */
+void yypush_buffer_state(YY_BUFFER_STATE new_buffer)
+{
+    if (new_buffer == NULL)
+        return;
+
+    yyensure_buffer_stack();
+
+    /* This block is copied from yy_switch_to_buffer. */
+    if (YY_CURRENT_BUFFER)
+    {
+        /* Flush out information for old buffer. */
+        *(yy_c_buf_p) = (yy_hold_char);
+        YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
+        YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
+    }
+
+    /* Only push if top exists. Otherwise, replace top. */
+    if (YY_CURRENT_BUFFER)
+        (yy_buffer_stack_top)++;
+    YY_CURRENT_BUFFER_LVALUE = new_buffer;
+
+    /* copied from yy_switch_to_buffer. */
+    yy_load_buffer_state();
+    (yy_did_buffer_switch_on_eof) = 1;
+}
+
+/** Removes and deletes the top of the stack, if present.
+ *  The next element becomes the new top.
+ *
+ */
+void yypop_buffer_state(void)
+{
+    if (!YY_CURRENT_BUFFER)
+        return;
+
+    yy_delete_buffer(YY_CURRENT_BUFFER);
+    YY_CURRENT_BUFFER_LVALUE = NULL;
+    if ((yy_buffer_stack_top) > 0)
+        --(yy_buffer_stack_top);
+
+    if (YY_CURRENT_BUFFER)
+    {
+        yy_load_buffer_state();
+        (yy_did_buffer_switch_on_eof) = 1;
+    }
+}
+
+/* Allocates the stack if it does not exist.
+ *  Guarantees space for at least one push.
+ */
+static void yyensure_buffer_stack(void)
+{
+    yy_size_t num_to_alloc;
+
+    if (!(yy_buffer_stack))
+    {
+
+        /* First allocation is just for 2 elements, since we don't know if this
+         * scanner will even need a stack. We use 2 instead of 1 to avoid an
+         * immediate realloc on the next call.
+         */
+        num_to_alloc = 1; /* After all that talk, this was set to 1 anyways... */
+        (yy_buffer_stack) =
+            (struct yy_buffer_state **)yyalloc(num_to_alloc * sizeof(struct yy_buffer_state *));
+        if (!(yy_buffer_stack))
+            YY_FATAL_ERROR("out of dynamic memory in yyensure_buffer_stack()");
+
+        memset((yy_buffer_stack), 0, num_to_alloc * sizeof(struct yy_buffer_state *));
+
+        (yy_buffer_stack_max) = num_to_alloc;
+        (yy_buffer_stack_top) = 0;
+        return;
+    }
+
+    if ((yy_buffer_stack_top) >= ((yy_buffer_stack_max)) - 1)
+    {
+
+        /* Increase the buffer to prepare for a possible push. */
+        yy_size_t grow_size = 8 /* arbitrary grow size */;
+
+        num_to_alloc = (yy_buffer_stack_max) + grow_size;
+        (yy_buffer_stack) = (struct yy_buffer_state **)yyrealloc(
+            (yy_buffer_stack), num_to_alloc * sizeof(struct yy_buffer_state *));
+        if (!(yy_buffer_stack))
+            YY_FATAL_ERROR("out of dynamic memory in yyensure_buffer_stack()");
+
+        /* zero only the new slots.*/
+        memset((yy_buffer_stack) + (yy_buffer_stack_max), 0,
+               grow_size * sizeof(struct yy_buffer_state *));
+        (yy_buffer_stack_max) = num_to_alloc;
+    }
+}
+
+/** Setup the input buffer state to scan directly from a user-specified character buffer.
+ * @param base the character buffer
+ * @param size the size in bytes of the character buffer
+ *
+ * @return the newly allocated buffer state object.
+ */
+YY_BUFFER_STATE yy_scan_buffer(char *base, yy_size_t size)
+{
+    YY_BUFFER_STATE b;
+
+    if (size < 2 || base[size - 2] != YY_END_OF_BUFFER_CHAR ||
+        base[size - 1] != YY_END_OF_BUFFER_CHAR)
+        /* They forgot to leave room for the EOB's. */
+        return NULL;
+
+    b = (YY_BUFFER_STATE)yyalloc(sizeof(struct yy_buffer_state));
+    if (!b)
+        YY_FATAL_ERROR("out of dynamic memory in yy_scan_buffer()");
+
+    b->yy_buf_size = (int)(size - 2); /* "- 2" to take care of EOB's */
+    b->yy_buf_pos = b->yy_ch_buf = base;
+    b->yy_is_our_buffer = 0;
+    b->yy_input_file = NULL;
+    b->yy_n_chars = b->yy_buf_size;
+    b->yy_is_interactive = 0;
+    b->yy_at_bol = 1;
+    b->yy_fill_buffer = 0;
+    b->yy_buffer_status = YY_BUFFER_NEW;
+
+    yy_switch_to_buffer(b);
+
+    return b;
+}
+
+/** Setup the input buffer state to scan a string. The next call to yylex() will
+ * scan from a @e copy of @a str.
+ * @param yystr a NUL-terminated string to scan
+ *
+ * @return the newly allocated buffer state object.
+ * @note If you want to scan bytes that may contain NUL values, then use
+ *       yy_scan_bytes() instead.
+ */
+YY_BUFFER_STATE yy_scan_string(const char *yystr)
+{
+
+    return yy_scan_bytes(yystr, (int)strlen(yystr));
+}
+
+/** Setup the input buffer state to scan the given bytes. The next call to yylex() will
+ * scan from a @e copy of @a bytes.
+ * @param yybytes the byte buffer to scan
+ * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes.
+ *
+ * @return the newly allocated buffer state object.
+ */
+YY_BUFFER_STATE yy_scan_bytes(const char *yybytes, yy_size_t _yybytes_len)
+{
+    YY_BUFFER_STATE b;
+    char *buf;
+    yy_size_t n;
+    yy_size_t i;
+
+    /* Get memory for full buffer, including space for trailing EOB's. */
+    n = (yy_size_t)(_yybytes_len + 2);
+    buf = (char *)yyalloc(n);
+    if (!buf)
+        YY_FATAL_ERROR("out of dynamic memory in yy_scan_bytes()");
+
+    for (i = 0; i < _yybytes_len; ++i)
+        buf[i] = yybytes[i];
+
+    buf[_yybytes_len] = buf[_yybytes_len + 1] = YY_END_OF_BUFFER_CHAR;
+
+    b = yy_scan_buffer(buf, n);
+    if (!b)
+        YY_FATAL_ERROR("bad buffer in yy_scan_bytes()");
+
+    /* It's okay to grow etc. this buffer, and we should throw it
+     * away when we're done.
+     */
+    b->yy_is_our_buffer = 1;
+
+    return b;
+}
+
+#ifndef YY_EXIT_FAILURE
+#define YY_EXIT_FAILURE 2
+#endif
+
+static void yynoreturn yy_fatal_error(const char *msg)
+{
+    fprintf(stderr, "%s\n", msg);
+    exit(YY_EXIT_FAILURE);
+}
+
+/* Redefine yyless() so it works in section 3 code. */
+
+#undef yyless
+#define yyless(n)                                                                                  \
+    do                                                                                             \
+    {                                                                                              \
+        /* Undo effects of setting up yytext. */                                                   \
+        yy_size_t yyless_macro_arg = (n);                                                          \
+        YY_LESS_LINENO(yyless_macro_arg);                                                          \
+        yytext[yyleng] = (yy_hold_char);                                                           \
+        (yy_c_buf_p) = yytext + yyless_macro_arg;                                                  \
+        (yy_hold_char) = *(yy_c_buf_p);                                                            \
+        *(yy_c_buf_p) = '\0';                                                                      \
+        yyleng = yyless_macro_arg;                                                                 \
+    } while (0)
+
+/* Accessor  methods (get/set functions) to struct members. */
+
+/** Get the current line number.
+ *
+ */
+int yyget_lineno(void) { return yylineno; }
+
+/** Get the input stream.
+ *
+ */
+FILE *yyget_in(void) { return yyin; }
+
+/** Get the output stream.
+ *
+ */
+FILE *yyget_out(void) { return yyout; }
+
+/** Get the length of the current token.
+ *
+ */
+yy_size_t yyget_leng(void) { return yyleng; }
+
+/** Get the current token.
+ *
+ */
+
+char *yyget_text(void) { return yytext; }
+
+/** Set the current line number.
+ * @param _line_number line number
+ *
+ */
+void yyset_lineno(int _line_number) { yylineno = _line_number; }
+
+/** Set the input stream. This does not discard the current
+ * input buffer.
+ * @param _in_str A readable stream.
+ *
+ * @see yy_switch_to_buffer
+ */
+void yyset_in(FILE *_in_str) { yyin = _in_str; }
+
+void yyset_out(FILE *_out_str) { yyout = _out_str; }
+
+int yyget_debug(void) { return yy_flex_debug; }
+
+void yyset_debug(int _bdebug) { yy_flex_debug = _bdebug; }
+
+static int yy_init_globals(void)
+{
+    /* Initialization is the same as for the non-reentrant scanner.
+     * This function is called from yylex_destroy(), so don't allocate here.
+     */
+
+    (yy_buffer_stack) = NULL;
+    (yy_buffer_stack_top) = 0;
+    (yy_buffer_stack_max) = 0;
+    (yy_c_buf_p) = NULL;
+    (yy_init) = 0;
+    (yy_start) = 0;
+
+/* Defined in main.c */
+#ifdef YY_STDINIT
+    yyin = stdin;
+    yyout = stdout;
+#else
+    yyin = NULL;
+    yyout = NULL;
+#endif
+
+    /* For future reference: Set errno on error, since we are called by
+     * yylex_init()
+     */
+    return 0;
+}
+
+/* yylex_destroy is for both reentrant and non-reentrant scanners. */
+int yylex_destroy(void)
+{
+
+    /* Pop the buffer stack, destroying each element. */
+    while (YY_CURRENT_BUFFER)
+    {
+        yy_delete_buffer(YY_CURRENT_BUFFER);
+        YY_CURRENT_BUFFER_LVALUE = NULL;
+        yypop_buffer_state();
+    }
+
+    /* Destroy the stack itself. */
+    yyfree((yy_buffer_stack));
+    (yy_buffer_stack) = NULL;
+
+    /* Reset the globals. This is important in a non-reentrant scanner so the next time
+     * yylex() is called, initialization will occur. */
+    yy_init_globals();
+
+    return 0;
+}
+
+/*
+ * Internal utility routines.
+ */
+
+#ifndef yytext_ptr
+static void yy_flex_strncpy(char *s1, const char *s2, int n)
+{
+
+    int i;
+    for (i = 0; i < n; ++i)
+        s1[i] = s2[i];
+}
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen(const char *s)
+{
+    int n;
+    for (n = 0; s[n]; ++n)
+        ;
+
+    return n;
+}
+#endif
+
+void *yyalloc(yy_size_t size) { return malloc(size); }
+
+void *yyrealloc(void *ptr, yy_size_t size)
+{
+
+    /* The cast to (char *) in the following accommodates both
+     * implementations that use char* generic pointers, and those
+     * that use void* generic pointers.  It works with the latter
+     * because both ANSI C and C++ allow castless assignment from
+     * any pointer type to void*, and deal with argument conversions
+     * as though doing an assignment.
+     */
+    return realloc(ptr, size);
+}
+
+void yyfree(void *ptr) { free((char *)ptr); /* see yyrealloc() for (char *) cast */ }
+
+#define YYTABLES_NAME "yytables"
+
+#line 47 "lexer.l"
diff --git a/source/adios2/toolkit/derived/parser/lexer.h b/source/adios2/toolkit/derived/parser/lexer.h
new file mode 100644
index 0000000000..75c3a14657
--- /dev/null
+++ b/source/adios2/toolkit/derived/parser/lexer.h
@@ -0,0 +1,476 @@
+#ifndef yyHEADER_H
+#define yyHEADER_H 1
+#define yyIN_HEADER 1
+
+#line 5 "lexer.h"
+
+#line 7 "lexer.h"
+
+#define YY_INT_ALIGNED short int
+
+/* A lexical scanner generated by flex */
+
+#define FLEX_SCANNER
+#define YY_FLEX_MAJOR_VERSION 2
+#define YY_FLEX_MINOR_VERSION 6
+#define YY_FLEX_SUBMINOR_VERSION 4
+#if YY_FLEX_SUBMINOR_VERSION > 0
+#define FLEX_BETA
+#endif
+
+/* First, we deal with  platform-specific or compiler-specific issues. */
+
+/* begin standard C headers. */
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* end standard C headers. */
+
+/* flex integer type definitions */
+
+#ifndef FLEXINT_H
+#define FLEXINT_H
+
+/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */
+
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+
+/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
+ * if you want the limit (max/min) macros for int types.
+ */
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS 1
+#endif
+
+#include <inttypes.h>
+typedef int8_t flex_int8_t;
+typedef uint8_t flex_uint8_t;
+typedef int16_t flex_int16_t;
+typedef uint16_t flex_uint16_t;
+typedef int32_t flex_int32_t;
+typedef uint32_t flex_uint32_t;
+typedef uint64_t flex_uint64_t;
+#else
+typedef signed char flex_int8_t;
+typedef short int flex_int16_t;
+typedef int flex_int32_t;
+typedef unsigned char flex_uint8_t;
+typedef unsigned short int flex_uint16_t;
+typedef unsigned int flex_uint32_t;
+
+/* Limits of integral types. */
+#ifndef INT8_MIN
+#define INT8_MIN (-128)
+#endif
+#ifndef INT16_MIN
+#define INT16_MIN (-32767 - 1)
+#endif
+#ifndef INT32_MIN
+#define INT32_MIN (-2147483647 - 1)
+#endif
+#ifndef INT8_MAX
+#define INT8_MAX (127)
+#endif
+#ifndef INT16_MAX
+#define INT16_MAX (32767)
+#endif
+#ifndef INT32_MAX
+#define INT32_MAX (2147483647)
+#endif
+#ifndef UINT8_MAX
+#define UINT8_MAX (255U)
+#endif
+#ifndef UINT16_MAX
+#define UINT16_MAX (65535U)
+#endif
+#ifndef UINT32_MAX
+#define UINT32_MAX (4294967295U)
+#endif
+
+#ifndef SIZE_MAX
+#define SIZE_MAX (~(size_t)0)
+#endif
+
+#endif /* ! C99 */
+
+#endif /* ! FLEXINT_H */
+
+/* begin standard C++ headers. */
+
+/* TODO: this is always defined, so inline it */
+#define yyconst const
+
+#if defined(__GNUC__) && __GNUC__ >= 3
+#define yynoreturn __attribute__((__noreturn__))
+#else
+#define yynoreturn
+#endif
+
+/* Size of default input buffer. */
+#ifndef YY_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k.
+ * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case.
+ * Ditto for the __ia64__ case accordingly.
+ */
+#define YY_BUF_SIZE 32768
+#else
+#define YY_BUF_SIZE 16384
+#endif /* __ia64__ */
+#endif
+
+#ifndef YY_TYPEDEF_YY_BUFFER_STATE
+#define YY_TYPEDEF_YY_BUFFER_STATE
+typedef struct yy_buffer_state *YY_BUFFER_STATE;
+#endif
+
+#ifndef YY_TYPEDEF_YY_SIZE_T
+#define YY_TYPEDEF_YY_SIZE_T
+typedef size_t yy_size_t;
+#endif
+
+extern yy_size_t yyleng;
+
+extern FILE *yyin, *yyout;
+
+#ifndef YY_STRUCT_YY_BUFFER_STATE
+#define YY_STRUCT_YY_BUFFER_STATE
+struct yy_buffer_state
+{
+    FILE *yy_input_file;
+
+    char *yy_ch_buf;  /* input buffer */
+    char *yy_buf_pos; /* current position in input buffer */
+
+    /* Size of input buffer in bytes, not including room for EOB
+     * characters.
+     */
+    int yy_buf_size;
+
+    /* Number of characters read into yy_ch_buf, not including EOB
+     * characters.
+     */
+    yy_size_t yy_n_chars;
+
+    /* Whether we "own" the buffer - i.e., we know we created it,
+     * and can realloc() it to grow it, and should free() it to
+     * delete it.
+     */
+    int yy_is_our_buffer;
+
+    /* Whether this is an "interactive" input source; if so, and
+     * if we're using stdio for input, then we want to use getc()
+     * instead of fread(), to make sure we stop fetching input after
+     * each newline.
+     */
+    int yy_is_interactive;
+
+    /* Whether we're considered to be at the beginning of a line.
+     * If so, '^' rules will be active on the next match, otherwise
+     * not.
+     */
+    int yy_at_bol;
+
+    int yy_bs_lineno; /**< The line count. */
+    int yy_bs_column; /**< The column count. */
+
+    /* Whether to try to fill the input buffer when we reach the
+     * end of it.
+     */
+    int yy_fill_buffer;
+
+    int yy_buffer_status;
+};
+#endif /* !YY_STRUCT_YY_BUFFER_STATE */
+
+void yyrestart(FILE *input_file);
+void yy_switch_to_buffer(YY_BUFFER_STATE new_buffer);
+YY_BUFFER_STATE yy_create_buffer(FILE *file, int size);
+void yy_delete_buffer(YY_BUFFER_STATE b);
+void yy_flush_buffer(YY_BUFFER_STATE b);
+void yypush_buffer_state(YY_BUFFER_STATE new_buffer);
+void yypop_buffer_state(void);
+
+YY_BUFFER_STATE yy_scan_buffer(char *base, yy_size_t size);
+YY_BUFFER_STATE yy_scan_string(const char *yy_str);
+YY_BUFFER_STATE yy_scan_bytes(const char *bytes, yy_size_t len);
+
+void *yyalloc(yy_size_t);
+void *yyrealloc(void *, yy_size_t);
+void yyfree(void *);
+
+/* Begin user sect3 */
+
+#define yywrap() (/*CONSTCOND*/ 1)
+#define YY_SKIP_YYWRAP
+
+extern int yylineno;
+
+extern char *yytext;
+#ifdef yytext_ptr
+#undef yytext_ptr
+#endif
+#define yytext_ptr yytext
+
+#ifdef YY_HEADER_EXPORT_START_CONDITIONS
+#define INITIAL 0
+
+#endif
+
+#ifndef YY_NO_UNISTD_H
+/* Special case for "unistd.h", since it is non-ANSI. We include it way
+ * down here because we want the user's section 1 to have been scanned first.
+ * The user has a chance to override it with an option.
+ */
+#include <unistd.h>
+#endif
+
+#ifndef YY_EXTRA_TYPE
+#define YY_EXTRA_TYPE void *
+#endif
+
+/* Accessor methods to globals.
+   These are made visible to non-reentrant scanners for convenience. */
+
+int yylex_destroy(void);
+
+int yyget_debug(void);
+
+void yyset_debug(int debug_flag);
+
+YY_EXTRA_TYPE yyget_extra(void);
+
+void yyset_extra(YY_EXTRA_TYPE user_defined);
+
+FILE *yyget_in(void);
+
+void yyset_in(FILE *_in_str);
+
+FILE *yyget_out(void);
+
+void yyset_out(FILE *_out_str);
+
+yy_size_t yyget_leng(void);
+
+char *yyget_text(void);
+
+int yyget_lineno(void);
+
+void yyset_lineno(int _line_number);
+
+/* Macros after this point can all be overridden by user definitions in
+ * section 1.
+ */
+
+#ifndef YY_SKIP_YYWRAP
+#ifdef __cplusplus
+extern "C" int yywrap(void);
+#else
+extern int yywrap(void);
+#endif
+#endif
+
+#ifndef yytext_ptr
+static void yy_flex_strncpy(char *, const char *, int);
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen(const char *);
+#endif
+
+#ifndef YY_NO_INPUT
+
+#endif
+
+/* Amount of stuff to slurp up with each read. */
+#ifndef YY_READ_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k */
+#define YY_READ_BUF_SIZE 16384
+#else
+#define YY_READ_BUF_SIZE 8192
+#endif /* __ia64__ */
+#endif
+
+/* Number of entries by which start-condition stack grows. */
+#ifndef YY_START_STACK_INCR
+#define YY_START_STACK_INCR 25
+#endif
+
+/* Default declaration of generated scanner - a define so the user can
+ * easily add parameters.
+ */
+#ifndef YY_DECL
+#define YY_DECL_IS_OURS 1
+
+extern int yylex(void);
+
+#define YY_DECL int yylex(void)
+#endif /* !YY_DECL */
+
+/* yy_get_previous_state - get the state just before the EOB char was reached */
+
+#undef YY_NEW_FILE
+#undef YY_FLUSH_BUFFER
+#undef yy_set_bol
+#undef yy_new_buffer
+#undef yy_set_interactive
+#undef YY_DO_BEFORE_ACTION
+
+#ifdef YY_DECL_IS_OURS
+#undef YY_DECL_IS_OURS
+#undef YY_DECL
+#endif
+
+#ifndef yy_create_buffer_ALREADY_DEFINED
+#undef yy_create_buffer
+#endif
+#ifndef yy_delete_buffer_ALREADY_DEFINED
+#undef yy_delete_buffer
+#endif
+#ifndef yy_scan_buffer_ALREADY_DEFINED
+#undef yy_scan_buffer
+#endif
+#ifndef yy_scan_string_ALREADY_DEFINED
+#undef yy_scan_string
+#endif
+#ifndef yy_scan_bytes_ALREADY_DEFINED
+#undef yy_scan_bytes
+#endif
+#ifndef yy_init_buffer_ALREADY_DEFINED
+#undef yy_init_buffer
+#endif
+#ifndef yy_flush_buffer_ALREADY_DEFINED
+#undef yy_flush_buffer
+#endif
+#ifndef yy_load_buffer_state_ALREADY_DEFINED
+#undef yy_load_buffer_state
+#endif
+#ifndef yy_switch_to_buffer_ALREADY_DEFINED
+#undef yy_switch_to_buffer
+#endif
+#ifndef yypush_buffer_state_ALREADY_DEFINED
+#undef yypush_buffer_state
+#endif
+#ifndef yypop_buffer_state_ALREADY_DEFINED
+#undef yypop_buffer_state
+#endif
+#ifndef yyensure_buffer_stack_ALREADY_DEFINED
+#undef yyensure_buffer_stack
+#endif
+#ifndef yylex_ALREADY_DEFINED
+#undef yylex
+#endif
+#ifndef yyrestart_ALREADY_DEFINED
+#undef yyrestart
+#endif
+#ifndef yylex_init_ALREADY_DEFINED
+#undef yylex_init
+#endif
+#ifndef yylex_init_extra_ALREADY_DEFINED
+#undef yylex_init_extra
+#endif
+#ifndef yylex_destroy_ALREADY_DEFINED
+#undef yylex_destroy
+#endif
+#ifndef yyget_debug_ALREADY_DEFINED
+#undef yyget_debug
+#endif
+#ifndef yyset_debug_ALREADY_DEFINED
+#undef yyset_debug
+#endif
+#ifndef yyget_extra_ALREADY_DEFINED
+#undef yyget_extra
+#endif
+#ifndef yyset_extra_ALREADY_DEFINED
+#undef yyset_extra
+#endif
+#ifndef yyget_in_ALREADY_DEFINED
+#undef yyget_in
+#endif
+#ifndef yyset_in_ALREADY_DEFINED
+#undef yyset_in
+#endif
+#ifndef yyget_out_ALREADY_DEFINED
+#undef yyget_out
+#endif
+#ifndef yyset_out_ALREADY_DEFINED
+#undef yyset_out
+#endif
+#ifndef yyget_leng_ALREADY_DEFINED
+#undef yyget_leng
+#endif
+#ifndef yyget_text_ALREADY_DEFINED
+#undef yyget_text
+#endif
+#ifndef yyget_lineno_ALREADY_DEFINED
+#undef yyget_lineno
+#endif
+#ifndef yyset_lineno_ALREADY_DEFINED
+#undef yyset_lineno
+#endif
+#ifndef yyget_column_ALREADY_DEFINED
+#undef yyget_column
+#endif
+#ifndef yyset_column_ALREADY_DEFINED
+#undef yyset_column
+#endif
+#ifndef yywrap_ALREADY_DEFINED
+#undef yywrap
+#endif
+#ifndef yyget_lval_ALREADY_DEFINED
+#undef yyget_lval
+#endif
+#ifndef yyset_lval_ALREADY_DEFINED
+#undef yyset_lval
+#endif
+#ifndef yyget_lloc_ALREADY_DEFINED
+#undef yyget_lloc
+#endif
+#ifndef yyset_lloc_ALREADY_DEFINED
+#undef yyset_lloc
+#endif
+#ifndef yyalloc_ALREADY_DEFINED
+#undef yyalloc
+#endif
+#ifndef yyrealloc_ALREADY_DEFINED
+#undef yyrealloc
+#endif
+#ifndef yyfree_ALREADY_DEFINED
+#undef yyfree
+#endif
+#ifndef yytext_ALREADY_DEFINED
+#undef yytext
+#endif
+#ifndef yyleng_ALREADY_DEFINED
+#undef yyleng
+#endif
+#ifndef yyin_ALREADY_DEFINED
+#undef yyin
+#endif
+#ifndef yyout_ALREADY_DEFINED
+#undef yyout
+#endif
+#ifndef yy_flex_debug_ALREADY_DEFINED
+#undef yy_flex_debug
+#endif
+#ifndef yylineno_ALREADY_DEFINED
+#undef yylineno
+#endif
+#ifndef yytables_fload_ALREADY_DEFINED
+#undef yytables_fload
+#endif
+#ifndef yytables_destroy_ALREADY_DEFINED
+#undef yytables_destroy
+#endif
+#ifndef yyTABLES_NAME_ALREADY_DEFINED
+#undef yyTABLES_NAME
+#endif
+
+#line 47 "lexer.l"
+
+#line 476 "lexer.h"
+#undef yyIN_HEADER
+#endif /* yyHEADER_H */
diff --git a/source/adios2/toolkit/derived/parser/lexer.l b/source/adios2/toolkit/derived/parser/lexer.l
new file mode 100644
index 0000000000..f81a98bcf9
--- /dev/null
+++ b/source/adios2/toolkit/derived/parser/lexer.l
@@ -0,0 +1,47 @@
+%{   
+#include "parser.hpp"
+#define YY_USER_ACTION yylloc.first_line = yylloc.last_line = yylineno;
+%}
+
+%option noyywrap
+
+
+DIGIT    [0-9]
+CHAR     ([a-z]|[A-Z])
+   
+%%
+
+"+"            {yylval.sval=strdup(yytext); return OPERATOR;}
+"-"            {yylval.sval=strdup(yytext); return OPERATOR;}
+"*"            {yylval.sval=strdup(yytext); return OPERATOR;}
+"/"            {yylval.sval=strdup(yytext); return OPERATOR;}
+"^"            {yylval.sval=strdup(yytext); return OPERATOR;}
+"("            {return L_PAREN;}
+")"            {return R_PAREN;}
+","            {return COMMA;}
+"add"          {yylval.sval=strdup(yytext); return FUNCTION;}
+"sqrt"         {yylval.sval=strdup(yytext); return FUNCTION;}
+"sin"          {yylval.sval=strdup(yytext); return FUNCTION;}
+"cos"          {yylval.sval=strdup(yytext); return FUNCTION;}
+"tan"          {yylval.sval=strdup(yytext); return FUNCTION;}
+"magnitude"    {yylval.sval=strdup(yytext); return FUNCTION;}
+"curl"         {yylval.sval=strdup(yytext); return FUNCTION;}
+
+
+(\.{DIGIT}+)|({DIGIT}+(\.{DIGIT}*)?([eE][+-]?[0-9]+)?)   {yylval.dval = atof(yytext); return NUMBER;}
+
+[ \t]+         {/* ignore spaces */}
+
+(\n|\0|EOF|$end)           {return ENDL;}
+
+({CHAR}|{DIGIT}|_)+        {yylval.sval=strdup(yytext); return ALIAS;}
+
+:(\\|\/|_|{DIGIT})*{CHAR}+(\\|\/|-|_|{DIGIT}|{CHAR})*      {yylval.sval=strndup(yytext + 1,strlen(yytext)-1); return PATH;}
+
+'(\\|\/|_|{DIGIT})*{CHAR}+(\\|\/|-|_|{DIGIT}|{CHAR})*'     {yylval.sval=strndup(yytext + 1,strlen(yytext)-2); return PATH;}
+
+"["({DIGIT}+|{DIGIT}*":"{DIGIT}*":"{DIGIT}*)(,({DIGIT}+|{DIGIT}*":"{DIGIT}*":"{DIGIT}*))*"]"                            {yylval.sval=strndup(yytext + 1,strlen(yytext)-2); return INDICES;}
+
+.              {printf("Error at line %d: unrecognized symbol \"%s\"\n", yylloc.first_line, yytext); exit(0);}
+
+%%
\ No newline at end of file
diff --git a/source/adios2/toolkit/derived/parser/parser.cpp b/source/adios2/toolkit/derived/parser/parser.cpp
new file mode 100644
index 0000000000..03938fc8d6
--- /dev/null
+++ b/source/adios2/toolkit/derived/parser/parser.cpp
@@ -0,0 +1,1666 @@
+/* A Bison parser, made by GNU Bison 2.3.  */
+
+/* Skeleton implementation for Bison's Yacc-like parsers in C
+
+   Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006
+   Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor,
+   Boston, MA 02110-1301, USA.  */
+
+/* As a special exception, you may create a larger work that contains
+   part or all of the Bison parser skeleton and distribute that work
+   under terms of your choice, so long as that work isn't itself a
+   parser generator using the skeleton or a modified version thereof
+   as a parser skeleton.  Alternatively, if you modify or redistribute
+   the parser skeleton itself, you may (at your option) remove this
+   special exception, which will cause the skeleton and the resulting
+   Bison output files to be licensed under the GNU General Public
+   License without this special exception.
+
+   This special exception was added by the Free Software Foundation in
+   version 2.2 of Bison.  */
+
+/* C LALR(1) parser skeleton written by Richard Stallman, by
+   simplifying the original so-called "semantic" parser.  */
+
+/* All symbols defined below should begin with yy or YY, to avoid
+   infringing on user name space.  This should be done even for local
+   variables, as they might otherwise be expanded by user macros.
+   There are some unavoidable exceptions within include files to
+   define necessary library symbols; they are noted "INFRINGES ON
+   USER NAME SPACE" below.  */
+
+/* Identify Bison output.  */
+#define YYBISON 1
+
+/* Bison version.  */
+#define YYBISON_VERSION "2.3"
+
+/* Skeleton name.  */
+#define YYSKELETON_NAME "yacc.c"
+
+/* Pure parsers.  */
+#define YYPURE 0
+
+/* Using locations.  */
+#define YYLSP_NEEDED 1
+
+/* Tokens.  */
+#ifndef YYTOKENTYPE
+#define YYTOKENTYPE
+/* Put the tokens into the symbol table, so that GDB and other debuggers
+   know about them.  */
+enum yytokentype
+{
+    COMMA = 258,
+    L_PAREN = 259,
+    R_PAREN = 260,
+    ENDL = 261,
+    FUNCTION = 262,
+    OPERATOR = 263,
+    INDICES = 264,
+    NUMBER = 265,
+    ALIAS = 266,
+    PATH = 267
+};
+#endif
+/* Tokens.  */
+#define COMMA 258
+#define L_PAREN 259
+#define R_PAREN 260
+#define ENDL 261
+#define FUNCTION 262
+#define OPERATOR 263
+#define INDICES 264
+#define NUMBER 265
+#define ALIAS 266
+#define PATH 267
+
+/* Copy the first part of user declarations.  */
+#line 2 "parser.y"
+
+#include "parser.h"
+#include "lexer.h"
+#include <iostream>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string>
+#include <vector>
+
+extern int yyparse(std::stack<adios2::detail::ASTNode *> *expr_stack);
+
+static void *yyparse_value;
+
+void yyerror(std::stack<adios2::detail::ASTNode *> *expr_stack, const char *msg);
+
+namespace adios2
+{
+namespace detail
+{
+void *createExpr(std::stack<ASTNode *> *, std::string, const char *, double, size_t);
+}
+}
+
+/* Enabling traces.  */
+#ifndef YYDEBUG
+#define YYDEBUG 0
+#endif
+
+/* Enabling verbose error messages.  */
+#ifdef YYERROR_VERBOSE
+#undef YYERROR_VERBOSE
+#define YYERROR_VERBOSE 1
+#else
+#define YYERROR_VERBOSE 1
+#endif
+
+/* Enabling the token table.  */
+#ifndef YYTOKEN_TABLE
+#define YYTOKEN_TABLE 0
+#endif
+
+#if !defined YYSTYPE && !defined YYSTYPE_IS_DECLARED
+typedef union YYSTYPE
+#line 25 "parser.y"
+{
+    double dval;
+    int ival;
+    char *sval;
+    void *expr_ptr;
+}
+/* Line 193 of yacc.c.  */
+#line 148 "parser.cpp"
+YYSTYPE;
+#define yystype YYSTYPE /* obsolescent; will be withdrawn */
+#define YYSTYPE_IS_DECLARED 1
+#define YYSTYPE_IS_TRIVIAL 1
+#endif
+
+#if !defined YYLTYPE && !defined YYLTYPE_IS_DECLARED
+typedef struct YYLTYPE
+{
+    int first_line;
+    int first_column;
+    int last_line;
+    int last_column;
+} YYLTYPE;
+#define yyltype YYLTYPE /* obsolescent; will be withdrawn */
+#define YYLTYPE_IS_DECLARED 1
+#define YYLTYPE_IS_TRIVIAL 1
+#endif
+
+/* Copy the second part of user declarations.  */
+
+/* Line 216 of yacc.c.  */
+#line 173 "parser.cpp"
+
+#ifdef short
+#undef short
+#endif
+
+#ifdef YYTYPE_UINT8
+typedef YYTYPE_UINT8 yytype_uint8;
+#else
+typedef unsigned char yytype_uint8;
+#endif
+
+#ifdef YYTYPE_INT8
+typedef YYTYPE_INT8 yytype_int8;
+#elif (defined __STDC__ || defined __C99__FUNC__ || defined __cplusplus || defined _MSC_VER)
+typedef signed char yytype_int8;
+#else
+typedef short int yytype_int8;
+#endif
+
+#ifdef YYTYPE_UINT16
+typedef YYTYPE_UINT16 yytype_uint16;
+#else
+typedef unsigned short int yytype_uint16;
+#endif
+
+#ifdef YYTYPE_INT16
+typedef YYTYPE_INT16 yytype_int16;
+#else
+typedef short int yytype_int16;
+#endif
+
+#ifndef YYSIZE_T
+#ifdef __SIZE_TYPE__
+#define YYSIZE_T __SIZE_TYPE__
+#elif defined size_t
+#define YYSIZE_T size_t
+#elif !defined YYSIZE_T &&                                                                         \
+    (defined __STDC__ || defined __C99__FUNC__ || defined __cplusplus || defined _MSC_VER)
+#include <stddef.h> /* INFRINGES ON USER NAME SPACE */
+#define YYSIZE_T size_t
+#else
+#define YYSIZE_T unsigned int
+#endif
+#endif
+
+#define YYSIZE_MAXIMUM ((YYSIZE_T)-1)
+
+#ifndef YY_
+#if defined YYENABLE_NLS && YYENABLE_NLS
+#if ENABLE_NLS
+#include <libintl.h> /* INFRINGES ON USER NAME SPACE */
+#define YY_(msgid) dgettext("bison-runtime", msgid)
+#endif
+#endif
+#ifndef YY_
+#define YY_(msgid) msgid
+#endif
+#endif
+
+/* Suppress unused-variable warnings by "using" E.  */
+#if !defined lint || defined __GNUC__
+#define YYUSE(e) ((void)(e))
+#else
+#define YYUSE(e) /* empty */
+#endif
+
+/* Identity function, used to suppress warnings about constant conditions.  */
+#ifndef lint
+#define YYID(n) (n)
+#else
+#if (defined __STDC__ || defined __C99__FUNC__ || defined __cplusplus || defined _MSC_VER)
+static int YYID(int i)
+#else
+static int YYID(i)
+int i;
+#endif
+{
+    return i;
+}
+#endif
+
+#if !defined yyoverflow || YYERROR_VERBOSE
+
+/* The parser invokes alloca or malloc; define the necessary symbols.  */
+
+#ifdef YYSTACK_USE_ALLOCA
+#if YYSTACK_USE_ALLOCA
+#ifdef __GNUC__
+#define YYSTACK_ALLOC __builtin_alloca
+#elif defined __BUILTIN_VA_ARG_INCR
+#include <alloca.h> /* INFRINGES ON USER NAME SPACE */
+#elif defined _AIX
+#define YYSTACK_ALLOC __alloca
+#elif defined _MSC_VER
+#include <malloc.h> /* INFRINGES ON USER NAME SPACE */
+#define alloca _alloca
+#else
+#define YYSTACK_ALLOC alloca
+#if !defined _ALLOCA_H && !defined _STDLIB_H &&                                                    \
+    (defined __STDC__ || defined __C99__FUNC__ || defined __cplusplus || defined _MSC_VER)
+#include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
+#ifndef _STDLIB_H
+#define _STDLIB_H 1
+#endif
+#endif
+#endif
+#endif
+#endif
+
+#ifdef YYSTACK_ALLOC
+/* Pacify GCC's `empty if-body' warning.  */
+#define YYSTACK_FREE(Ptr)                                                                          \
+    do                                                                                             \
+    { /* empty */                                                                                  \
+        ;                                                                                          \
+    } while (YYID(0))
+#ifndef YYSTACK_ALLOC_MAXIMUM
+/* The OS might guarantee only one guard page at the bottom of the stack,
+   and a page size can be as small as 4096 bytes.  So we cannot safely
+   invoke alloca (N) if N exceeds 4096.  Use a slightly smaller number
+   to allow for a few compiler-allocated temporary stack slots.  */
+#define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */
+#endif
+#else
+#define YYSTACK_ALLOC YYMALLOC
+#define YYSTACK_FREE YYFREE
+#ifndef YYSTACK_ALLOC_MAXIMUM
+#define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM
+#endif
+#if (defined __cplusplus && !defined _STDLIB_H &&                                                  \
+     !((defined YYMALLOC || defined malloc) && (defined YYFREE || defined free)))
+#include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
+#ifndef _STDLIB_H
+#define _STDLIB_H 1
+#endif
+#endif
+#ifndef YYMALLOC
+#define YYMALLOC malloc
+#if !defined malloc && !defined _STDLIB_H &&                                                       \
+    (defined __STDC__ || defined __C99__FUNC__ || defined __cplusplus || defined _MSC_VER)
+void *malloc(YYSIZE_T); /* INFRINGES ON USER NAME SPACE */
+#endif
+#endif
+#ifndef YYFREE
+#define YYFREE free
+#if !defined free && !defined _STDLIB_H &&                                                         \
+    (defined __STDC__ || defined __C99__FUNC__ || defined __cplusplus || defined _MSC_VER)
+void free(void *);      /* INFRINGES ON USER NAME SPACE */
+#endif
+#endif
+#endif
+#endif /* ! defined yyoverflow || YYERROR_VERBOSE */
+
+#if (!defined yyoverflow &&                                                                        \
+     (!defined __cplusplus || (defined YYLTYPE_IS_TRIVIAL && YYLTYPE_IS_TRIVIAL &&                 \
+                               defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL)))
+
+/* A type that is properly aligned for any stack member.  */
+union yyalloc
+{
+    yytype_int16 yyss;
+    YYSTYPE yyvs;
+    YYLTYPE yyls;
+};
+
+/* The size of the maximum gap between one aligned stack and the next.  */
+#define YYSTACK_GAP_MAXIMUM (sizeof(union yyalloc) - 1)
+
+/* The size of an array large to enough to hold all stacks, each with
+   N elements.  */
+#define YYSTACK_BYTES(N)                                                                           \
+    ((N) * (sizeof(yytype_int16) + sizeof(YYSTYPE) + sizeof(YYLTYPE)) + 2 * YYSTACK_GAP_MAXIMUM)
+
+/* Copy COUNT objects from FROM to TO.  The source and destination do
+   not overlap.  */
+#ifndef YYCOPY
+#if defined __GNUC__ && 1 < __GNUC__
+#define YYCOPY(To, From, Count) __builtin_memcpy(To, From, (Count) * sizeof(*(From)))
+#else
+#define YYCOPY(To, From, Count)                                                                    \
+    do                                                                                             \
+    {                                                                                              \
+        YYSIZE_T yyi;                                                                              \
+        for (yyi = 0; yyi < (Count); yyi++)                                                        \
+            (To)[yyi] = (From)[yyi];                                                               \
+    } while (YYID(0))
+#endif
+#endif
+
+/* Relocate STACK from its old location to the new one.  The
+   local variables YYSIZE and YYSTACKSIZE give the old and new number of
+   elements in the stack, and YYPTR gives the new location of the
+   stack.  Advance YYPTR to a properly aligned location for the next
+   stack.  */
+#define YYSTACK_RELOCATE(Stack)                                                                    \
+    do                                                                                             \
+    {                                                                                              \
+        YYSIZE_T yynewbytes;                                                                       \
+        YYCOPY(&yyptr->Stack, Stack, yysize);                                                      \
+        Stack = &yyptr->Stack;                                                                     \
+        yynewbytes = yystacksize * sizeof(*Stack) + YYSTACK_GAP_MAXIMUM;                           \
+        yyptr += yynewbytes / sizeof(*yyptr);                                                      \
+    } while (YYID(0))
+
+#endif
+
+/* YYFINAL -- State number of the termination state.  */
+#define YYFINAL 16
+/* YYLAST -- Last index in YYTABLE.  */
+#define YYLAST 37
+
+/* YYNTOKENS -- Number of terminals.  */
+#define YYNTOKENS 13
+/* YYNNTS -- Number of nonterminals.  */
+#define YYNNTS 5
+/* YYNRULES -- Number of rules.  */
+#define YYNRULES 16
+/* YYNRULES -- Number of states.  */
+#define YYNSTATES 28
+
+/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX.  */
+#define YYUNDEFTOK 2
+#define YYMAXUTOK 267
+
+#define YYTRANSLATE(YYX) ((unsigned int)(YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK)
+
+/* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX.  */
+static const yytype_uint8 yytranslate[] = {
+    0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  2,  2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  2,  2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  2,  2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  2,  2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  2,  2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  2,  2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  2,  2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  2,  2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+
+#if YYDEBUG
+/* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in
+   YYRHS.  */
+static const yytype_uint8 yyprhs[] = {0,  0,  3,  4,  7,  10, 13, 16, 20,
+                                      24, 26, 28, 31, 33, 35, 39, 43};
+
+/* YYRHS -- A `-1'-separated list of the rules' RHS.  */
+static const yytype_int8 yyrhs[] = {14, 0,  -1, -1, 6,  14, -1, 15, 14, -1, 17, 14, -1, 11, 12, -1,
+                                    11, 12, 9,  -1, 16, 3,  17, -1, 17, -1, 11, -1, 11, 9,  -1, 12,
+                                    -1, 10, -1, 4,  17, 5,  -1, 17, 8,  17, -1, 7,  4,  16, 5,  -1};
+
+/* YYRLINE[YYN] -- source line where rule number YYN was defined.  */
+static const yytype_uint8 yyrline[] = {0,  51, 51, 52, 53, 54, 57, 58, 65,
+                                       66, 69, 70, 71, 72, 73, 74, 75};
+#endif
+
+#if YYDEBUG || YYERROR_VERBOSE || YYTOKEN_TABLE
+/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
+   First, the terminals, then, starting at YYNTOKENS, nonterminals.  */
+static const char *const yytname[] = {"$end",    "error", "$undefined", "COMMA",    "L_PAREN",
+                                      "R_PAREN", "ENDL",  "FUNCTION",   "OPERATOR", "INDICES",
+                                      "NUMBER",  "ALIAS", "PATH",       "$accept",  "input",
+                                      "decl",    "list",  "exp",        0};
+#endif
+
+#ifdef YYPRINT
+/* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to
+   token YYLEX-NUM.  */
+static const yytype_uint16 yytoknum[] = {0,   256, 257, 258, 259, 260, 261,
+                                         262, 263, 264, 265, 266, 267};
+#endif
+
+/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives.  */
+static const yytype_uint8 yyr1[] = {0,  13, 14, 14, 14, 14, 15, 15, 16,
+                                    16, 17, 17, 17, 17, 17, 17, 17};
+
+/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN.  */
+static const yytype_uint8 yyr2[] = {0, 2, 0, 2, 2, 2, 2, 3, 3, 1, 1, 2, 1, 1, 3, 3, 4};
+
+/* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state
+   STATE-NUM when YYTABLE doesn't specify something else to do.  Zero
+   means the default is an error.  */
+static const yytype_uint8 yydefact[] = {2,  0, 2, 0, 13, 10, 12, 0, 2, 2, 10, 0, 3,  0,
+                                        11, 6, 1, 4, 0,  5,  14, 0, 9, 7, 15, 0, 16, 8};
+
+/* YYDEFGOTO[NTERM-NUM].  */
+static const yytype_int8 yydefgoto[] = {-1, 7, 8, 21, 9};
+
+/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
+   STATE-NUM.  */
+#define YYPACT_NINF -4
+static const yytype_int8 yypact[] = {9,  18, 9,  -2, -4, 2,  -4, 6,  9,  -3, 1,  26, -4, 18,
+                                     -4, 14, -4, -4, 18, -4, -4, 32, 10, -4, 10, 18, -4, 10};
+
+/* YYPGOTO[NTERM-NUM].  */
+static const yytype_int8 yypgoto[] = {-4, 24, -4, -4, -1};
+
+/* YYTABLE[YYPACT[STATE-NUM]].  What to do in state STATE-NUM.  If
+   positive, shift that token.  If negative, reduce the rule which
+   number is the opposite.  If zero, do what YYDEFACT says.
+   If YYTABLE_NINF, syntax error.  */
+#define YYTABLE_NINF -1
+static const yytype_uint8 yytable[] = {11, 1,  13, 2,  3,  18, 16, 4,  5,  6,  14, 14, 22,
+                                       1,  15, 2,  3,  24, 18, 4,  5,  6,  1,  23, 27, 3,
+                                       12, 0,  4,  10, 6,  20, 17, 19, 18, 25, 0,  26};
+
+static const yytype_int8 yycheck[] = {1, 4,  4,  6,  7,  8, 0,  10, 11, 12, 9,  9,  13,
+                                      4, 12, 6,  7,  18, 8, 10, 11, 12, 4,  9,  25, 7,
+                                      2, -1, 10, 11, 12, 5, 8,  9,  8,  3,  -1, 5};
+
+/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
+   symbol of state STATE-NUM.  */
+static const yytype_uint8 yystos[] = {0, 4,  6, 7,  10, 11, 12, 14, 15, 17, 11, 17, 14, 4,
+                                      9, 12, 0, 14, 8,  14, 5,  16, 17, 9,  17, 3,  5,  17};
+
+#define yyerrok (yyerrstatus = 0)
+#define yyclearin (yychar = YYEMPTY)
+#define YYEMPTY (-2)
+#define YYEOF 0
+
+#define YYACCEPT goto yyacceptlab
+#define YYABORT goto yyabortlab
+#define YYERROR goto yyerrorlab
+
+/* Like YYERROR except do call yyerror.  This remains here temporarily
+   to ease the transition to the new meaning of YYERROR, for GCC.
+   Once GCC version 2 has supplanted version 1, this can go.  */
+
+#define YYFAIL goto yyerrlab
+
+#define YYRECOVERING() (!!yyerrstatus)
+
+#define YYBACKUP(Token, Value)                                                                     \
+    do                                                                                             \
+        if (yychar == YYEMPTY && yylen == 1)                                                       \
+        {                                                                                          \
+            yychar = (Token);                                                                      \
+            yylval = (Value);                                                                      \
+            yytoken = YYTRANSLATE(yychar);                                                         \
+            YYPOPSTACK(1);                                                                         \
+            goto yybackup;                                                                         \
+        }                                                                                          \
+        else                                                                                       \
+        {                                                                                          \
+            yyerror(expr_stack, YY_("syntax error: cannot back up"));                              \
+            YYERROR;                                                                               \
+        }                                                                                          \
+    while (YYID(0))
+
+#define YYTERROR 1
+#define YYERRCODE 256
+
+/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N].
+   If N is 0, then set CURRENT to the empty location which ends
+   the previous symbol: RHS[0] (always defined).  */
+
+#define YYRHSLOC(Rhs, K) ((Rhs)[K])
+#ifndef YYLLOC_DEFAULT
+#define YYLLOC_DEFAULT(Current, Rhs, N)                                                            \
+    do                                                                                             \
+        if (YYID(N))                                                                               \
+        {                                                                                          \
+            (Current).first_line = YYRHSLOC(Rhs, 1).first_line;                                    \
+            (Current).first_column = YYRHSLOC(Rhs, 1).first_column;                                \
+            (Current).last_line = YYRHSLOC(Rhs, N).last_line;                                      \
+            (Current).last_column = YYRHSLOC(Rhs, N).last_column;                                  \
+        }                                                                                          \
+        else                                                                                       \
+        {                                                                                          \
+            (Current).first_line = (Current).last_line = YYRHSLOC(Rhs, 0).last_line;               \
+            (Current).first_column = (Current).last_column = YYRHSLOC(Rhs, 0).last_column;         \
+        }                                                                                          \
+    while (YYID(0))
+#endif
+
+/* YY_LOCATION_PRINT -- Print the location on the stream.
+   This macro was not mandated originally: define only if we know
+   we won't break user code: when these are the locations we know.  */
+
+#ifndef YY_LOCATION_PRINT
+#if defined YYLTYPE_IS_TRIVIAL && YYLTYPE_IS_TRIVIAL
+#define YY_LOCATION_PRINT(File, Loc)                                                               \
+    fprintf(File, "%d.%d-%d.%d", (Loc).first_line, (Loc).first_column, (Loc).last_line,            \
+            (Loc).last_column)
+#else
+#define YY_LOCATION_PRINT(File, Loc) ((void)0)
+#endif
+#endif
+
+/* YYLEX -- calling `yylex' with the right arguments.  */
+
+#ifdef YYLEX_PARAM
+#define YYLEX yylex(YYLEX_PARAM)
+#else
+#define YYLEX yylex()
+#endif
+
+/* Enable debugging if requested.  */
+#if YYDEBUG
+
+#ifndef YYFPRINTF
+#include <stdio.h> /* INFRINGES ON USER NAME SPACE */
+#define YYFPRINTF fprintf
+#endif
+
+#define YYDPRINTF(Args)                                                                            \
+    do                                                                                             \
+    {                                                                                              \
+        if (yydebug)                                                                               \
+            YYFPRINTF Args;                                                                        \
+    } while (YYID(0))
+
+#define YY_SYMBOL_PRINT(Title, Type, Value, Location)                                              \
+    do                                                                                             \
+    {                                                                                              \
+        if (yydebug)                                                                               \
+        {                                                                                          \
+            YYFPRINTF(stderr, "%s ", Title);                                                       \
+            yy_symbol_print(stderr, Type, Value, Location, expr_stack);                            \
+            YYFPRINTF(stderr, "\n");                                                               \
+        }                                                                                          \
+    } while (YYID(0))
+
+/*--------------------------------.
+| Print this symbol on YYOUTPUT.  |
+`--------------------------------*/
+
+/*ARGSUSED*/
+#if (defined __STDC__ || defined __C99__FUNC__ || defined __cplusplus || defined _MSC_VER)
+static void yy_symbol_value_print(FILE *yyoutput, int yytype, YYSTYPE const *const yyvaluep,
+                                  YYLTYPE const *const yylocationp,
+                                  std::stack<adios2::detail::ASTNode *> *expr_stack)
+#else
+static void yy_symbol_value_print(yyoutput, yytype, yyvaluep, yylocationp,
+                                  expr_stack) FILE *yyoutput;
+int yytype;
+YYSTYPE const *const yyvaluep;
+YYLTYPE const *const yylocationp;
+std::stack<adios2::detail::ASTNode *> *expr_stack;
+#endif
+{
+    if (!yyvaluep)
+        return;
+    YYUSE(yylocationp);
+    YYUSE(expr_stack);
+#ifdef YYPRINT
+    if (yytype < YYNTOKENS)
+        YYPRINT(yyoutput, yytoknum[yytype], *yyvaluep);
+#else
+    YYUSE(yyoutput);
+#endif
+    switch (yytype)
+    {
+    default:
+        break;
+    }
+}
+
+/*--------------------------------.
+| Print this symbol on YYOUTPUT.  |
+`--------------------------------*/
+
+#if (defined __STDC__ || defined __C99__FUNC__ || defined __cplusplus || defined _MSC_VER)
+static void yy_symbol_print(FILE *yyoutput, int yytype, YYSTYPE const *const yyvaluep,
+                            YYLTYPE const *const yylocationp,
+                            std::stack<adios2::detail::ASTNode *> *expr_stack)
+#else
+static void yy_symbol_print(yyoutput, yytype, yyvaluep, yylocationp, expr_stack) FILE *yyoutput;
+int yytype;
+YYSTYPE const *const yyvaluep;
+YYLTYPE const *const yylocationp;
+std::stack<adios2::detail::ASTNode *> *expr_stack;
+#endif
+{
+    if (yytype < YYNTOKENS)
+        YYFPRINTF(yyoutput, "token %s (", yytname[yytype]);
+    else
+        YYFPRINTF(yyoutput, "nterm %s (", yytname[yytype]);
+
+    YY_LOCATION_PRINT(yyoutput, *yylocationp);
+    YYFPRINTF(yyoutput, ": ");
+    yy_symbol_value_print(yyoutput, yytype, yyvaluep, yylocationp, expr_stack);
+    YYFPRINTF(yyoutput, ")");
+}
+
+/*------------------------------------------------------------------.
+| yy_stack_print -- Print the state stack from its BOTTOM up to its |
+| TOP (included).                                                   |
+`------------------------------------------------------------------*/
+
+#if (defined __STDC__ || defined __C99__FUNC__ || defined __cplusplus || defined _MSC_VER)
+static void yy_stack_print(yytype_int16 *bottom, yytype_int16 *top)
+#else
+static void yy_stack_print(bottom, top) yytype_int16 *bottom;
+yytype_int16 *top;
+#endif
+{
+    YYFPRINTF(stderr, "Stack now");
+    for (; bottom <= top; ++bottom)
+        YYFPRINTF(stderr, " %d", *bottom);
+    YYFPRINTF(stderr, "\n");
+}
+
+#define YY_STACK_PRINT(Bottom, Top)                                                                \
+    do                                                                                             \
+    {                                                                                              \
+        if (yydebug)                                                                               \
+            yy_stack_print((Bottom), (Top));                                                       \
+    } while (YYID(0))
+
+/*------------------------------------------------.
+| Report that the YYRULE is going to be reduced.  |
+`------------------------------------------------*/
+
+#if (defined __STDC__ || defined __C99__FUNC__ || defined __cplusplus || defined _MSC_VER)
+static void yy_reduce_print(YYSTYPE *yyvsp, YYLTYPE *yylsp, int yyrule,
+                            std::stack<adios2::detail::ASTNode *> *expr_stack)
+#else
+static void yy_reduce_print(yyvsp, yylsp, yyrule, expr_stack) YYSTYPE *yyvsp;
+YYLTYPE *yylsp;
+int yyrule;
+std::stack<adios2::detail::ASTNode *> *expr_stack;
+#endif
+{
+    int yynrhs = yyr2[yyrule];
+    int yyi;
+    unsigned long int yylno = yyrline[yyrule];
+    YYFPRINTF(stderr, "Reducing stack by rule %d (line %lu):\n", yyrule - 1, yylno);
+    /* The symbols being reduced.  */
+    for (yyi = 0; yyi < yynrhs; yyi++)
+    {
+        fprintf(stderr, "   $%d = ", yyi + 1);
+        yy_symbol_print(stderr, yyrhs[yyprhs[yyrule] + yyi], &(yyvsp[(yyi + 1) - (yynrhs)]),
+                        &(yylsp[(yyi + 1) - (yynrhs)]), expr_stack);
+        fprintf(stderr, "\n");
+    }
+}
+
+#define YY_REDUCE_PRINT(Rule)                                                                      \
+    do                                                                                             \
+    {                                                                                              \
+        if (yydebug)                                                                               \
+            yy_reduce_print(yyvsp, yylsp, Rule, expr_stack);                                       \
+    } while (YYID(0))
+
+/* Nonzero means print parse trace.  It is left uninitialized so that
+   multiple parsers can coexist.  */
+int yydebug;
+#else /* !YYDEBUG */
+#define YYDPRINTF(Args)
+#define YY_SYMBOL_PRINT(Title, Type, Value, Location)
+#define YY_STACK_PRINT(Bottom, Top)
+#define YY_REDUCE_PRINT(Rule)
+#endif /* !YYDEBUG */
+
+/* YYINITDEPTH -- initial size of the parser's stacks.  */
+#ifndef YYINITDEPTH
+#define YYINITDEPTH 200
+#endif
+
+/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only
+   if the built-in stack extension method is used).
+
+   Do not make this value too large; the results are undefined if
+   YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH)
+   evaluated with infinite-precision integer arithmetic.  */
+
+#ifndef YYMAXDEPTH
+#define YYMAXDEPTH 10000
+#endif
+
+#if YYERROR_VERBOSE
+
+#ifndef yystrlen
+#if defined __GLIBC__ && defined _STRING_H
+#define yystrlen strlen
+#else
+/* Return the length of YYSTR.  */
+#if (defined __STDC__ || defined __C99__FUNC__ || defined __cplusplus || defined _MSC_VER)
+static YYSIZE_T yystrlen(const char *yystr)
+#else
+static YYSIZE_T yystrlen(yystr) const char *yystr;
+#endif
+{
+    YYSIZE_T yylen;
+    for (yylen = 0; yystr[yylen]; yylen++)
+        continue;
+    return yylen;
+}
+#endif
+#endif
+
+#ifndef yystpcpy
+#if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE
+#define yystpcpy stpcpy
+#else
+/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in
+   YYDEST.  */
+#if (defined __STDC__ || defined __C99__FUNC__ || defined __cplusplus || defined _MSC_VER)
+static char *yystpcpy(char *yydest, const char *yysrc)
+#else
+static char *yystpcpy(yydest, yysrc)
+char *yydest;
+const char *yysrc;
+#endif
+{
+    char *yyd = yydest;
+    const char *yys = yysrc;
+
+    while ((*yyd++ = *yys++) != '\0')
+        continue;
+
+    return yyd - 1;
+}
+#endif
+#endif
+
+#ifndef yytnamerr
+/* Copy to YYRES the contents of YYSTR after stripping away unnecessary
+   quotes and backslashes, so that it's suitable for yyerror.  The
+   heuristic is that double-quoting is unnecessary unless the string
+   contains an apostrophe, a comma, or backslash (other than
+   backslash-backslash).  YYSTR is taken from yytname.  If YYRES is
+   null, do not copy; instead, return the length of what the result
+   would have been.  */
+static YYSIZE_T yytnamerr(char *yyres, const char *yystr)
+{
+    if (*yystr == '"')
+    {
+        YYSIZE_T yyn = 0;
+        char const *yyp = yystr;
+
+        for (;;)
+            switch (*++yyp)
+            {
+            case '\'':
+            case ',':
+                goto do_not_strip_quotes;
+
+            case '\\':
+                if (*++yyp != '\\')
+                    goto do_not_strip_quotes;
+                /* Fall through.  */
+            default:
+                if (yyres)
+                    yyres[yyn] = *yyp;
+                yyn++;
+                break;
+
+            case '"':
+                if (yyres)
+                    yyres[yyn] = '\0';
+                return yyn;
+            }
+    do_not_strip_quotes:;
+    }
+
+    if (!yyres)
+        return yystrlen(yystr);
+
+    return yystpcpy(yyres, yystr) - yyres;
+}
+#endif
+
+/* Copy into YYRESULT an error message about the unexpected token
+   YYCHAR while in state YYSTATE.  Return the number of bytes copied,
+   including the terminating null byte.  If YYRESULT is null, do not
+   copy anything; just return the number of bytes that would be
+   copied.  As a special case, return 0 if an ordinary "syntax error"
+   message will do.  Return YYSIZE_MAXIMUM if overflow occurs during
+   size calculation.  */
+static YYSIZE_T yysyntax_error(char *yyresult, int yystate, int yychar)
+{
+    int yyn = yypact[yystate];
+
+    if (!(YYPACT_NINF < yyn && yyn <= YYLAST))
+        return 0;
+    else
+    {
+        int yytype = YYTRANSLATE(yychar);
+        YYSIZE_T yysize0 = yytnamerr(0, yytname[yytype]);
+        YYSIZE_T yysize = yysize0;
+        YYSIZE_T yysize1;
+        int yysize_overflow = 0;
+        enum
+        {
+            YYERROR_VERBOSE_ARGS_MAXIMUM = 5
+        };
+        char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM];
+        int yyx;
+
+#if 0
+      /* This is so xgettext sees the translatable formats that are
+	 constructed on the fly.  */
+      YY_("syntax error, unexpected %s");
+      YY_("syntax error, unexpected %s, expecting %s");
+      YY_("syntax error, unexpected %s, expecting %s or %s");
+      YY_("syntax error, unexpected %s, expecting %s or %s or %s");
+      YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s");
+#endif
+        char *yyfmt;
+        char const *yyf;
+        static char const yyunexpected[] = "syntax error, unexpected %s";
+        static char const yyexpecting[] = ", expecting %s";
+        static char const yyor[] = " or %s";
+        char yyformat[sizeof yyunexpected + sizeof yyexpecting - 1 +
+                      ((YYERROR_VERBOSE_ARGS_MAXIMUM - 2) * (sizeof yyor - 1))];
+        char const *yyprefix = yyexpecting;
+
+        /* Start YYX at -YYN if negative to avoid negative indexes in
+           YYCHECK.  */
+        int yyxbegin = yyn < 0 ? -yyn : 0;
+
+        /* Stay within bounds of both yycheck and yytname.  */
+        int yychecklim = YYLAST - yyn + 1;
+        int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS;
+        int yycount = 1;
+
+        yyarg[0] = yytname[yytype];
+        yyfmt = yystpcpy(yyformat, yyunexpected);
+
+        for (yyx = yyxbegin; yyx < yyxend; ++yyx)
+            if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR)
+            {
+                if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM)
+                {
+                    yycount = 1;
+                    yysize = yysize0;
+                    yyformat[sizeof yyunexpected - 1] = '\0';
+                    break;
+                }
+                yyarg[yycount++] = yytname[yyx];
+                yysize1 = yysize + yytnamerr(0, yytname[yyx]);
+                yysize_overflow |= (yysize1 < yysize);
+                yysize = yysize1;
+                yyfmt = yystpcpy(yyfmt, yyprefix);
+                yyprefix = yyor;
+            }
+
+        yyf = YY_(yyformat);
+        yysize1 = yysize + yystrlen(yyf);
+        yysize_overflow |= (yysize1 < yysize);
+        yysize = yysize1;
+
+        if (yysize_overflow)
+            return YYSIZE_MAXIMUM;
+
+        if (yyresult)
+        {
+            /* Avoid sprintf, as that infringes on the user's name space.
+               Don't have undefined behavior even if the translation
+               produced a string with the wrong number of "%s"s.  */
+            char *yyp = yyresult;
+            int yyi = 0;
+            while ((*yyp = *yyf) != '\0')
+            {
+                if (*yyp == '%' && yyf[1] == 's' && yyi < yycount)
+                {
+                    yyp += yytnamerr(yyp, yyarg[yyi++]);
+                    yyf += 2;
+                }
+                else
+                {
+                    yyp++;
+                    yyf++;
+                }
+            }
+        }
+        return yysize;
+    }
+}
+#endif /* YYERROR_VERBOSE */
+
+/*-----------------------------------------------.
+| Release the memory associated to this symbol.  |
+`-----------------------------------------------*/
+
+/*ARGSUSED*/
+#if (defined __STDC__ || defined __C99__FUNC__ || defined __cplusplus || defined _MSC_VER)
+static void yydestruct(const char *yymsg, int yytype, YYSTYPE *yyvaluep, YYLTYPE *yylocationp,
+                       std::stack<adios2::detail::ASTNode *> *expr_stack)
+#else
+static void yydestruct(yymsg, yytype, yyvaluep, yylocationp, expr_stack) const char *yymsg;
+int yytype;
+YYSTYPE *yyvaluep;
+YYLTYPE *yylocationp;
+std::stack<adios2::detail::ASTNode *> *expr_stack;
+#endif
+{
+    YYUSE(yyvaluep);
+    YYUSE(yylocationp);
+    YYUSE(expr_stack);
+
+    if (!yymsg)
+        yymsg = "Deleting";
+    YY_SYMBOL_PRINT(yymsg, yytype, yyvaluep, yylocationp);
+
+    switch (yytype)
+    {
+
+    default:
+        break;
+    }
+}
+
+/* Prevent warnings from -Wmissing-prototypes.  */
+
+#ifdef YYPARSE_PARAM
+#if defined __STDC__ || defined __cplusplus
+int yyparse(void *YYPARSE_PARAM);
+#else
+int yyparse();
+#endif
+#else /* ! YYPARSE_PARAM */
+#if defined __STDC__ || defined __cplusplus
+int yyparse(std::stack<adios2::detail::ASTNode *> *expr_stack);
+#else
+int yyparse();
+#endif
+#endif /* ! YYPARSE_PARAM */
+
+/* The look-ahead symbol.  */
+int yychar;
+
+/* The semantic value of the look-ahead symbol.  */
+YYSTYPE yylval;
+
+/* Number of syntax errors so far.  */
+int yynerrs;
+/* Location data for the look-ahead symbol.  */
+YYLTYPE yylloc;
+
+/*----------.
+| yyparse.  |
+`----------*/
+
+#ifdef YYPARSE_PARAM
+#if (defined __STDC__ || defined __C99__FUNC__ || defined __cplusplus || defined _MSC_VER)
+int yyparse(void *YYPARSE_PARAM)
+#else
+int yyparse(YYPARSE_PARAM) void *YYPARSE_PARAM;
+#endif
+#else /* ! YYPARSE_PARAM */
+#if (defined __STDC__ || defined __C99__FUNC__ || defined __cplusplus || defined _MSC_VER)
+int yyparse(std::stack<adios2::detail::ASTNode *> *expr_stack)
+#else
+int yyparse(expr_stack) std::stack<adios2::detail::ASTNode *> *expr_stack;
+#endif
+#endif
+{
+
+    int yystate;
+    int yyn;
+    int yyresult;
+    /* Number of tokens to shift before error messages enabled.  */
+    int yyerrstatus;
+    /* Look-ahead token as an internal (translated) token number.  */
+    int yytoken = 0;
+#if YYERROR_VERBOSE
+    /* Buffer for error messages, and its allocated size.  */
+    char yymsgbuf[128];
+    char *yymsg = yymsgbuf;
+    YYSIZE_T yymsg_alloc = sizeof yymsgbuf;
+#endif
+
+    /* Three stacks and their tools:
+       `yyss': related to states,
+       `yyvs': related to semantic values,
+       `yyls': related to locations.
+
+       Refer to the stacks thru separate pointers, to allow yyoverflow
+       to reallocate them elsewhere.  */
+
+    /* The state stack.  */
+    yytype_int16 yyssa[YYINITDEPTH];
+    yytype_int16 *yyss = yyssa;
+    yytype_int16 *yyssp;
+
+    /* The semantic value stack.  */
+    YYSTYPE yyvsa[YYINITDEPTH];
+    YYSTYPE *yyvs = yyvsa;
+    YYSTYPE *yyvsp;
+
+    /* The location stack.  */
+    YYLTYPE yylsa[YYINITDEPTH];
+    YYLTYPE *yyls = yylsa;
+    YYLTYPE *yylsp;
+    /* The locations where the error started and ended.  */
+    YYLTYPE yyerror_range[2];
+
+#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N), yylsp -= (N))
+
+    YYSIZE_T yystacksize = YYINITDEPTH;
+
+    /* The variables used to return semantic value and location from the
+       action routines.  */
+    YYSTYPE yyval;
+    YYLTYPE yyloc;
+
+    /* The number of symbols on the RHS of the reduced rule.
+       Keep to zero when no symbol should be popped.  */
+    int yylen = 0;
+
+    YYDPRINTF((stderr, "Starting parse\n"));
+
+    yystate = 0;
+    yyerrstatus = 0;
+    yynerrs = 0;
+    yychar = YYEMPTY; /* Cause a token to be read.  */
+
+    /* Initialize stack pointers.
+       Waste one element of value and location stack
+       so that they stay on the same level as the state stack.
+       The wasted elements are never initialized.  */
+
+    yyssp = yyss;
+    yyvsp = yyvs;
+    yylsp = yyls;
+#if defined YYLTYPE_IS_TRIVIAL && YYLTYPE_IS_TRIVIAL
+    /* Initialize the default location before parsing starts.  */
+    yylloc.first_line = yylloc.last_line = 1;
+    yylloc.first_column = yylloc.last_column = 0;
+#endif
+
+    goto yysetstate;
+
+    /*------------------------------------------------------------.
+    | yynewstate -- Push a new state, which is found in yystate.  |
+    `------------------------------------------------------------*/
+yynewstate:
+    /* In all cases, when you get here, the value and location stacks
+       have just been pushed.  So pushing a state here evens the stacks.  */
+    yyssp++;
+
+yysetstate:
+    *yyssp = yystate;
+
+    if (yyss + yystacksize - 1 <= yyssp)
+    {
+        /* Get the current used size of the three stacks, in elements.  */
+        YYSIZE_T yysize = yyssp - yyss + 1;
+
+#ifdef yyoverflow
+        {
+            /* Give user a chance to reallocate the stack.  Use copies of
+               these so that the &'s don't force the real ones into
+               memory.  */
+            YYSTYPE *yyvs1 = yyvs;
+            yytype_int16 *yyss1 = yyss;
+            YYLTYPE *yyls1 = yyls;
+
+            /* Each stack pointer address is followed by the size of the
+               data in use in that stack, in bytes.  This used to be a
+               conditional around just the two extra args, but that might
+               be undefined if yyoverflow is a macro.  */
+            yyoverflow(YY_("memory exhausted"), &yyss1, yysize * sizeof(*yyssp), &yyvs1,
+                       yysize * sizeof(*yyvsp), &yyls1, yysize * sizeof(*yylsp), &yystacksize);
+            yyls = yyls1;
+            yyss = yyss1;
+            yyvs = yyvs1;
+        }
+#else /* no yyoverflow */
+#ifndef YYSTACK_RELOCATE
+        goto yyexhaustedlab;
+#else
+        /* Extend the stack our own way.  */
+        if (YYMAXDEPTH <= yystacksize)
+            goto yyexhaustedlab;
+        yystacksize *= 2;
+        if (YYMAXDEPTH < yystacksize)
+            yystacksize = YYMAXDEPTH;
+
+        {
+            yytype_int16 *yyss1 = yyss;
+            union yyalloc *yyptr = (union yyalloc *)YYSTACK_ALLOC(YYSTACK_BYTES(yystacksize));
+            if (!yyptr)
+                goto yyexhaustedlab;
+            YYSTACK_RELOCATE(yyss);
+            YYSTACK_RELOCATE(yyvs);
+            YYSTACK_RELOCATE(yyls);
+#undef YYSTACK_RELOCATE
+            if (yyss1 != yyssa)
+                YYSTACK_FREE(yyss1);
+        }
+#endif
+#endif /* no yyoverflow */
+
+        yyssp = yyss + yysize - 1;
+        yyvsp = yyvs + yysize - 1;
+        yylsp = yyls + yysize - 1;
+
+        YYDPRINTF((stderr, "Stack size increased to %lu\n", (unsigned long int)yystacksize));
+
+        if (yyss + yystacksize - 1 <= yyssp)
+            YYABORT;
+    }
+
+    YYDPRINTF((stderr, "Entering state %d\n", yystate));
+
+    goto yybackup;
+
+/*-----------.
+| yybackup.  |
+`-----------*/
+yybackup:
+
+    /* Do appropriate processing given the current state.  Read a
+       look-ahead token if we need one and don't already have one.  */
+
+    /* First try to decide what to do without reference to look-ahead token.  */
+    yyn = yypact[yystate];
+    if (yyn == YYPACT_NINF)
+        goto yydefault;
+
+    /* Not known => get a look-ahead token if don't already have one.  */
+
+    /* YYCHAR is either YYEMPTY or YYEOF or a valid look-ahead symbol.  */
+    if (yychar == YYEMPTY)
+    {
+        YYDPRINTF((stderr, "Reading a token: "));
+        yychar = YYLEX;
+    }
+
+    if (yychar <= YYEOF)
+    {
+        yychar = yytoken = YYEOF;
+        YYDPRINTF((stderr, "Now at end of input.\n"));
+    }
+    else
+    {
+        yytoken = YYTRANSLATE(yychar);
+        YY_SYMBOL_PRINT("Next token is", yytoken, &yylval, &yylloc);
+    }
+
+    /* If the proper action on seeing token YYTOKEN is to reduce or to
+       detect an error, take that action.  */
+    yyn += yytoken;
+    if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken)
+        goto yydefault;
+    yyn = yytable[yyn];
+    if (yyn <= 0)
+    {
+        if (yyn == 0 || yyn == YYTABLE_NINF)
+            goto yyerrlab;
+        yyn = -yyn;
+        goto yyreduce;
+    }
+
+    if (yyn == YYFINAL)
+        YYACCEPT;
+
+    /* Count tokens shifted since error; after three, turn off error
+       status.  */
+    if (yyerrstatus)
+        yyerrstatus--;
+
+    /* Shift the look-ahead token.  */
+    YY_SYMBOL_PRINT("Shifting", yytoken, &yylval, &yylloc);
+
+    /* Discard the shifted token unless it is eof.  */
+    if (yychar != YYEOF)
+        yychar = YYEMPTY;
+
+    yystate = yyn;
+    *++yyvsp = yylval;
+    *++yylsp = yylloc;
+    goto yynewstate;
+
+/*-----------------------------------------------------------.
+| yydefault -- do the default action for the current state.  |
+`-----------------------------------------------------------*/
+yydefault:
+    yyn = yydefact[yystate];
+    if (yyn == 0)
+        goto yyerrlab;
+    goto yyreduce;
+
+/*-----------------------------.
+| yyreduce -- Do a reduction.  |
+`-----------------------------*/
+yyreduce:
+    /* yyn is the number of a rule to reduce with.  */
+    yylen = yyr2[yyn];
+
+    /* If YYLEN is nonzero, implement the default value of the action:
+       `$$ = $1'.
+
+       Otherwise, the following line sets YYVAL to garbage.
+       This behavior is undocumented and Bison
+       users should not rely upon it.  Assigning to YYVAL
+       unconditionally makes the parser a bit smaller, and it avoids a
+       GCC warning that YYVAL may be used uninitialized.  */
+    yyval = yyvsp[1 - yylen];
+
+    /* Default location.  */
+    YYLLOC_DEFAULT(yyloc, (yylsp - yylen), yylen);
+    YY_REDUCE_PRINT(yyn);
+    switch (yyn)
+    {
+    case 2:
+#line 51 "parser.y"
+    {
+        ;
+    }
+    break;
+
+    case 3:
+#line 52 "parser.y"
+    {
+        ;
+    }
+    break;
+
+    case 4:
+#line 53 "parser.y"
+    {
+        ;
+    }
+    break;
+
+    case 5:
+#line 54 "parser.y"
+    { /*yyparse_value = $1->expression;*/
+        ;
+    }
+    break;
+
+    case 6:
+#line 57 "parser.y"
+    {
+        adios2::detail::ASTNode::add_lookup_entry((yyvsp[(1) - (2)].sval), (yyvsp[(2) - (2)].sval),
+                                                  "");
+        ;
+    }
+    break;
+
+    case 7:
+#line 58 "parser.y"
+    {
+        adios2::detail::ASTNode::add_lookup_entry((yyvsp[(1) - (3)].sval), (yyvsp[(2) - (3)].sval),
+                                                  (yyvsp[(3) - (3)].sval));
+        ;
+    }
+    break;
+
+    case 8:
+#line 65 "parser.y"
+    {
+        (yyval.ival) = (yyvsp[(1) - (3)].ival) + 1;
+        ;
+    }
+    break;
+
+    case 9:
+#line 66 "parser.y"
+    {
+        (yyval.ival) = 1;
+        ;
+    }
+    break;
+
+    case 10:
+#line 69 "parser.y"
+    {
+        (yyval.expr_ptr) = createExpr(expr_stack, "ALIAS", (yyvsp[(1) - (1)].sval), 0, 0);
+        ;
+    }
+    break;
+
+    case 11:
+#line 70 "parser.y"
+    {
+        createExpr(expr_stack, "ALIAS", (yyvsp[(1) - (2)].sval), 0, 0);
+        (yyval.expr_ptr) = createExpr(expr_stack, "INDEX", (yyvsp[(2) - (2)].sval), 0, 1);
+        ;
+    }
+    break;
+
+    case 12:
+#line 71 "parser.y"
+    {
+        (yyval.expr_ptr) = createExpr(expr_stack, "PATH", (yyvsp[(1) - (1)].sval), 0, 0);
+        ;
+    }
+    break;
+
+    case 13:
+#line 72 "parser.y"
+    {
+        (yyval.expr_ptr) = createExpr(expr_stack, "NUM", "", (yyvsp[(1) - (1)].dval), 0);
+        ;
+    }
+    break;
+
+    case 14:
+#line 73 "parser.y"
+    {
+        (yyval.expr_ptr) = (yyvsp[(2) - (3)].expr_ptr);
+        ;
+    }
+    break;
+
+    case 15:
+#line 74 "parser.y"
+    {
+        (yyval.expr_ptr) = createExpr(expr_stack, (yyvsp[(2) - (3)].sval), "", 0, 2);
+        ;
+    }
+    break;
+
+    case 16:
+#line 75 "parser.y"
+    {
+        (yyval.expr_ptr) =
+            createExpr(expr_stack, (yyvsp[(1) - (4)].sval), "", 0, (yyvsp[(3) - (4)].ival));
+        ;
+    }
+    break;
+
+/* Line 1267 of yacc.c.  */
+#line 1480 "parser.cpp"
+    default:
+        break;
+    }
+    YY_SYMBOL_PRINT("-> $$ =", yyr1[yyn], &yyval, &yyloc);
+
+    YYPOPSTACK(yylen);
+    yylen = 0;
+    YY_STACK_PRINT(yyss, yyssp);
+
+    *++yyvsp = yyval;
+    *++yylsp = yyloc;
+
+    /* Now `shift' the result of the reduction.  Determine what state
+       that goes to, based on the state we popped back to and the rule
+       number reduced by.  */
+
+    yyn = yyr1[yyn];
+
+    yystate = yypgoto[yyn - YYNTOKENS] + *yyssp;
+    if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp)
+        yystate = yytable[yystate];
+    else
+        yystate = yydefgoto[yyn - YYNTOKENS];
+
+    goto yynewstate;
+
+/*------------------------------------.
+| yyerrlab -- here on detecting error |
+`------------------------------------*/
+yyerrlab:
+    /* If not already recovering from an error, report this error.  */
+    if (!yyerrstatus)
+    {
+        ++yynerrs;
+#if !YYERROR_VERBOSE
+        yyerror(expr_stack, YY_("syntax error"));
+#else
+        {
+            YYSIZE_T yysize = yysyntax_error(0, yystate, yychar);
+            if (yymsg_alloc < yysize && yymsg_alloc < YYSTACK_ALLOC_MAXIMUM)
+            {
+                YYSIZE_T yyalloc = 2 * yysize;
+                if (!(yysize <= yyalloc && yyalloc <= YYSTACK_ALLOC_MAXIMUM))
+                    yyalloc = YYSTACK_ALLOC_MAXIMUM;
+                if (yymsg != yymsgbuf)
+                    YYSTACK_FREE(yymsg);
+                yymsg = (char *)YYSTACK_ALLOC(yyalloc);
+                if (yymsg)
+                    yymsg_alloc = yyalloc;
+                else
+                {
+                    yymsg = yymsgbuf;
+                    yymsg_alloc = sizeof yymsgbuf;
+                }
+            }
+
+            if (0 < yysize && yysize <= yymsg_alloc)
+            {
+                (void)yysyntax_error(yymsg, yystate, yychar);
+                yyerror(expr_stack, yymsg);
+            }
+            else
+            {
+                yyerror(expr_stack, YY_("syntax error"));
+                if (yysize != 0)
+                    goto yyexhaustedlab;
+            }
+        }
+#endif
+    }
+
+    yyerror_range[0] = yylloc;
+
+    if (yyerrstatus == 3)
+    {
+        /* If just tried and failed to reuse look-ahead token after an
+           error, discard it.  */
+
+        if (yychar <= YYEOF)
+        {
+            /* Return failure if at end of input.  */
+            if (yychar == YYEOF)
+                YYABORT;
+        }
+        else
+        {
+            yydestruct("Error: discarding", yytoken, &yylval, &yylloc, expr_stack);
+            yychar = YYEMPTY;
+        }
+    }
+
+    /* Else will try to reuse look-ahead token after shifting the error
+       token.  */
+    goto yyerrlab1;
+
+/*---------------------------------------------------.
+| yyerrorlab -- error raised explicitly by YYERROR.  |
+`---------------------------------------------------*/
+yyerrorlab:
+
+    /* Pacify compilers like GCC when the user code never invokes
+       YYERROR and the label yyerrorlab therefore never appears in user
+       code.  */
+    if (/*CONSTCOND*/ 0)
+        goto yyerrorlab;
+
+    yyerror_range[0] = yylsp[1 - yylen];
+    /* Do not reclaim the symbols of the rule which action triggered
+       this YYERROR.  */
+    YYPOPSTACK(yylen);
+    yylen = 0;
+    YY_STACK_PRINT(yyss, yyssp);
+    yystate = *yyssp;
+    goto yyerrlab1;
+
+/*-------------------------------------------------------------.
+| yyerrlab1 -- common code for both syntax error and YYERROR.  |
+`-------------------------------------------------------------*/
+yyerrlab1:
+    yyerrstatus = 3; /* Each real token shifted decrements this.  */
+
+    for (;;)
+    {
+        yyn = yypact[yystate];
+        if (yyn != YYPACT_NINF)
+        {
+            yyn += YYTERROR;
+            if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR)
+            {
+                yyn = yytable[yyn];
+                if (0 < yyn)
+                    break;
+            }
+        }
+
+        /* Pop the current state because it cannot handle the error token.  */
+        if (yyssp == yyss)
+            YYABORT;
+
+        yyerror_range[0] = *yylsp;
+        yydestruct("Error: popping", yystos[yystate], yyvsp, yylsp, expr_stack);
+        YYPOPSTACK(1);
+        yystate = *yyssp;
+        YY_STACK_PRINT(yyss, yyssp);
+    }
+
+    if (yyn == YYFINAL)
+        YYACCEPT;
+
+    *++yyvsp = yylval;
+
+    yyerror_range[1] = yylloc;
+    /* Using YYLLOC is tempting, but would change the location of
+       the look-ahead.  YYLOC is available though.  */
+    YYLLOC_DEFAULT(yyloc, (yyerror_range - 1), 2);
+    *++yylsp = yyloc;
+
+    /* Shift the error token.  */
+    YY_SYMBOL_PRINT("Shifting", yystos[yyn], yyvsp, yylsp);
+
+    yystate = yyn;
+    goto yynewstate;
+
+/*-------------------------------------.
+| yyacceptlab -- YYACCEPT comes here.  |
+`-------------------------------------*/
+yyacceptlab:
+    yyresult = 0;
+    goto yyreturn;
+
+/*-----------------------------------.
+| yyabortlab -- YYABORT comes here.  |
+`-----------------------------------*/
+yyabortlab:
+    yyresult = 1;
+    goto yyreturn;
+
+#ifndef yyoverflow
+/*-------------------------------------------------.
+| yyexhaustedlab -- memory exhaustion comes here.  |
+`-------------------------------------------------*/
+yyexhaustedlab:
+    yyerror(expr_stack, YY_("memory exhausted"));
+    yyresult = 2;
+    /* Fall through.  */
+#endif
+
+yyreturn:
+    if (yychar != YYEOF && yychar != YYEMPTY)
+        yydestruct("Cleanup: discarding lookahead", yytoken, &yylval, &yylloc, expr_stack);
+    /* Do not reclaim the symbols of the rule which action triggered
+       this YYABORT or YYACCEPT.  */
+    YYPOPSTACK(yylen);
+    YY_STACK_PRINT(yyss, yyssp);
+    while (yyssp != yyss)
+    {
+        yydestruct("Cleanup: popping", yystos[*yyssp], yyvsp, yylsp, expr_stack);
+        YYPOPSTACK(1);
+    }
+#ifndef yyoverflow
+    if (yyss != yyssa)
+        YYSTACK_FREE(yyss);
+#endif
+#if YYERROR_VERBOSE
+    if (yymsg != yymsgbuf)
+        YYSTACK_FREE(yymsg);
+#endif
+    /* Make sure YYID is used.  */
+    return YYID(yyresult);
+}
+
+#line 77 "parser.y"
+
+namespace adios2
+{
+namespace detail
+{
+
+void *createExpr(std::stack<ASTNode *> *expr_stack, std::string str_op, const char *name,
+                 double value, size_t numsubexprs)
+{
+    // std::cout << "Creating ASTNode in function createExpr" << std::endl;
+    // std::cout << "\tstack size: " << expr_stack->size() << "\n\top: " << str_op << "\n\tname: "
+    // << name << "\n\tvalue: " << value << "\n\tnumsubexprs: " << numsubexprs << std::endl;
+
+    ExpressionOperator op = get_op(str_op);
+
+    ASTNode *node = new ASTNode(op);
+    switch (op)
+    {
+    case ExpressionOperator::OP_ALIAS:
+        node = new ASTNode(op, name);
+        break;
+    case ExpressionOperator::OP_PATH:
+        node = new ASTNode(op, name);
+        break;
+    case ExpressionOperator::OP_NUM:
+        node = new ASTNode(op, value);
+        break;
+    case ExpressionOperator::OP_INDEX:
+        // TODO: translate indices
+        node = new ASTNode(op, name);
+        break;
+    default:
+        node = new ASTNode(op);
+    };
+    node->extend_subexprs(numsubexprs);
+    for (size_t i = 1; i <= numsubexprs; ++i)
+    {
+        ASTNode *subexpr = expr_stack->top();
+        node->add_back_subexpr(subexpr, i);
+        expr_stack->pop();
+    }
+    expr_stack->push(node);
+
+    return &expr_stack->top();
+}
+
+ASTNode *parse_expression(std::string input)
+{
+    yy_scan_string(input.c_str());
+    std::stack<ASTNode *> expr_stack;
+    yyparse(&expr_stack);
+
+    // DEBUGGING
+    // std::cout << "yyparse complete. Stack size: " << expr_stack.size() << std::endl;
+    // std::cout << "parser prettyprint:" << std::endl;
+    // expr_stack.top()->printpretty("");
+    return expr_stack.top();
+}
+
+}
+}
+
+void yyerror(std::stack<adios2::detail::ASTNode *> *expr_stack, const char *msg)
+{
+    printf("** Line %d: %s\n", yylloc.first_line, msg);
+}
diff --git a/source/adios2/toolkit/derived/parser/parser.h b/source/adios2/toolkit/derived/parser/parser.h
new file mode 100644
index 0000000000..38c3c96d58
--- /dev/null
+++ b/source/adios2/toolkit/derived/parser/parser.h
@@ -0,0 +1,110 @@
+/* A Bison parser, made by GNU Bison 2.3.  */
+
+/* Skeleton interface for Bison's Yacc-like parsers in C
+
+   Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006
+   Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor,
+   Boston, MA 02110-1301, USA.  */
+
+/* As a special exception, you may create a larger work that contains
+   part or all of the Bison parser skeleton and distribute that work
+   under terms of your choice, so long as that work isn't itself a
+   parser generator using the skeleton or a modified version thereof
+   as a parser skeleton.  Alternatively, if you modify or redistribute
+   the parser skeleton itself, you may (at your option) remove this
+   special exception, which will cause the skeleton and the resulting
+   Bison output files to be licensed under the GNU General Public
+   License without this special exception.
+
+   This special exception was added by the Free Software Foundation in
+   version 2.2 of Bison.  */
+
+/* Tokens.  */
+#ifndef YYTOKENTYPE
+#define YYTOKENTYPE
+/* Put the tokens into the symbol table, so that GDB and other debuggers
+   know about them.  */
+enum yytokentype
+{
+    COMMA = 258,
+    L_PAREN = 259,
+    R_PAREN = 260,
+    ENDL = 261,
+    FUNCTION = 262,
+    OPERATOR = 263,
+    INDICES = 264,
+    NUMBER = 265,
+    ALIAS = 266,
+    PATH = 267
+};
+#endif
+/* Tokens.  */
+#define COMMA 258
+#define L_PAREN 259
+#define R_PAREN 260
+#define ENDL 261
+#define FUNCTION 262
+#define OPERATOR 263
+#define INDICES 264
+#define NUMBER 265
+#define ALIAS 266
+#define PATH 267
+
+#if !defined YYSTYPE && !defined YYSTYPE_IS_DECLARED
+typedef union YYSTYPE
+#line 25 "parser.y"
+{
+    double dval;
+    int ival;
+    char *sval;
+    void *expr_ptr;
+}
+/* Line 1529 of yacc.c.  */
+#line 80 "parser.h"
+YYSTYPE;
+#define yystype YYSTYPE /* obsolescent; will be withdrawn */
+#define YYSTYPE_IS_DECLARED 1
+#define YYSTYPE_IS_TRIVIAL 1
+#endif
+
+extern YYSTYPE yylval;
+
+#if !defined YYLTYPE && !defined YYLTYPE_IS_DECLARED
+typedef struct YYLTYPE
+{
+    int first_line;
+    int first_column;
+    int last_line;
+    int last_column;
+} YYLTYPE;
+#define yyltype YYLTYPE /* obsolescent; will be withdrawn */
+#define YYLTYPE_IS_DECLARED 1
+#define YYLTYPE_IS_TRIVIAL 1
+#endif
+
+extern YYLTYPE yylloc;
+
+#include "ASTNode.h"
+#include <stack>
+
+namespace adios2
+{
+namespace detail
+{
+ASTNode *parse_expression(std::string input);
+}
+}
diff --git a/source/adios2/toolkit/derived/parser/parser.output b/source/adios2/toolkit/derived/parser/parser.output
new file mode 100644
index 0000000000..ae7a9b24a1
--- /dev/null
+++ b/source/adios2/toolkit/derived/parser/parser.output
@@ -0,0 +1,350 @@
+State 5 conflicts: 1 shift/reduce
+State 24 conflicts: 1 shift/reduce
+
+
+Grammar
+
+    0 $accept: input $end
+
+    1 input: /* empty */
+    2      | ENDL input
+    3      | decl input
+    4      | exp input
+
+    5 decl: ALIAS PATH
+    6     | ALIAS PATH INDICES
+
+    7 list: list COMMA exp
+    8     | exp
+
+    9 exp: ALIAS
+   10    | ALIAS INDICES
+   11    | PATH
+   12    | NUMBER
+   13    | L_PAREN exp R_PAREN
+   14    | exp OPERATOR exp
+   15    | FUNCTION L_PAREN list R_PAREN
+
+
+Terminals, with rules where they appear
+
+$end (0) 0
+error (256)
+COMMA (258) 7
+L_PAREN (259) 13 15
+R_PAREN (260) 13 15
+ENDL (261) 2
+FUNCTION (262) 15
+OPERATOR (263) 14
+INDICES (264) 6 10
+NUMBER (265) 12
+ALIAS (266) 5 6 9 10
+PATH (267) 5 6 11
+
+
+Nonterminals, with rules where they appear
+
+$accept (13)
+    on left: 0
+input (14)
+    on left: 1 2 3 4, on right: 0 2 3 4
+decl (15)
+    on left: 5 6, on right: 3
+list (16)
+    on left: 7 8, on right: 7 15
+exp (17)
+    on left: 9 10 11 12 13 14 15, on right: 4 7 8 13 14
+
+
+state 0
+
+    0 $accept: . input $end
+
+    L_PAREN   shift, and go to state 1
+    ENDL      shift, and go to state 2
+    FUNCTION  shift, and go to state 3
+    NUMBER    shift, and go to state 4
+    ALIAS     shift, and go to state 5
+    PATH      shift, and go to state 6
+
+    $default  reduce using rule 1 (input)
+
+    input  go to state 7
+    decl   go to state 8
+    exp    go to state 9
+
+
+state 1
+
+   13 exp: L_PAREN . exp R_PAREN
+
+    L_PAREN   shift, and go to state 1
+    FUNCTION  shift, and go to state 3
+    NUMBER    shift, and go to state 4
+    ALIAS     shift, and go to state 10
+    PATH      shift, and go to state 6
+
+    exp  go to state 11
+
+
+state 2
+
+    2 input: ENDL . input
+
+    L_PAREN   shift, and go to state 1
+    ENDL      shift, and go to state 2
+    FUNCTION  shift, and go to state 3
+    NUMBER    shift, and go to state 4
+    ALIAS     shift, and go to state 5
+    PATH      shift, and go to state 6
+
+    $default  reduce using rule 1 (input)
+
+    input  go to state 12
+    decl   go to state 8
+    exp    go to state 9
+
+
+state 3
+
+   15 exp: FUNCTION . L_PAREN list R_PAREN
+
+    L_PAREN  shift, and go to state 13
+
+
+state 4
+
+   12 exp: NUMBER .
+
+    $default  reduce using rule 12 (exp)
+
+
+state 5
+
+    5 decl: ALIAS . PATH
+    6     | ALIAS . PATH INDICES
+    9 exp: ALIAS .
+   10    | ALIAS . INDICES
+
+    INDICES  shift, and go to state 14
+    PATH     shift, and go to state 15
+
+    PATH      [reduce using rule 9 (exp)]
+    $default  reduce using rule 9 (exp)
+
+
+state 6
+
+   11 exp: PATH .
+
+    $default  reduce using rule 11 (exp)
+
+
+state 7
+
+    0 $accept: input . $end
+
+    $end  shift, and go to state 16
+
+
+state 8
+
+    3 input: decl . input
+
+    L_PAREN   shift, and go to state 1
+    ENDL      shift, and go to state 2
+    FUNCTION  shift, and go to state 3
+    NUMBER    shift, and go to state 4
+    ALIAS     shift, and go to state 5
+    PATH      shift, and go to state 6
+
+    $default  reduce using rule 1 (input)
+
+    input  go to state 17
+    decl   go to state 8
+    exp    go to state 9
+
+
+state 9
+
+    4 input: exp . input
+   14 exp: exp . OPERATOR exp
+
+    L_PAREN   shift, and go to state 1
+    ENDL      shift, and go to state 2
+    FUNCTION  shift, and go to state 3
+    OPERATOR  shift, and go to state 18
+    NUMBER    shift, and go to state 4
+    ALIAS     shift, and go to state 5
+    PATH      shift, and go to state 6
+
+    $default  reduce using rule 1 (input)
+
+    input  go to state 19
+    decl   go to state 8
+    exp    go to state 9
+
+
+state 10
+
+    9 exp: ALIAS .
+   10    | ALIAS . INDICES
+
+    INDICES  shift, and go to state 14
+
+    $default  reduce using rule 9 (exp)
+
+
+state 11
+
+   13 exp: L_PAREN exp . R_PAREN
+   14    | exp . OPERATOR exp
+
+    R_PAREN   shift, and go to state 20
+    OPERATOR  shift, and go to state 18
+
+
+state 12
+
+    2 input: ENDL input .
+
+    $default  reduce using rule 2 (input)
+
+
+state 13
+
+   15 exp: FUNCTION L_PAREN . list R_PAREN
+
+    L_PAREN   shift, and go to state 1
+    FUNCTION  shift, and go to state 3
+    NUMBER    shift, and go to state 4
+    ALIAS     shift, and go to state 10
+    PATH      shift, and go to state 6
+
+    list  go to state 21
+    exp   go to state 22
+
+
+state 14
+
+   10 exp: ALIAS INDICES .
+
+    $default  reduce using rule 10 (exp)
+
+
+state 15
+
+    5 decl: ALIAS PATH .
+    6     | ALIAS PATH . INDICES
+
+    INDICES  shift, and go to state 23
+
+    $default  reduce using rule 5 (decl)
+
+
+state 16
+
+    0 $accept: input $end .
+
+    $default  accept
+
+
+state 17
+
+    3 input: decl input .
+
+    $default  reduce using rule 3 (input)
+
+
+state 18
+
+   14 exp: exp OPERATOR . exp
+
+    L_PAREN   shift, and go to state 1
+    FUNCTION  shift, and go to state 3
+    NUMBER    shift, and go to state 4
+    ALIAS     shift, and go to state 10
+    PATH      shift, and go to state 6
+
+    exp  go to state 24
+
+
+state 19
+
+    4 input: exp input .
+
+    $default  reduce using rule 4 (input)
+
+
+state 20
+
+   13 exp: L_PAREN exp R_PAREN .
+
+    $default  reduce using rule 13 (exp)
+
+
+state 21
+
+    7 list: list . COMMA exp
+   15 exp: FUNCTION L_PAREN list . R_PAREN
+
+    COMMA    shift, and go to state 25
+    R_PAREN  shift, and go to state 26
+
+
+state 22
+
+    8 list: exp .
+   14 exp: exp . OPERATOR exp
+
+    OPERATOR  shift, and go to state 18
+
+    $default  reduce using rule 8 (list)
+
+
+state 23
+
+    6 decl: ALIAS PATH INDICES .
+
+    $default  reduce using rule 6 (decl)
+
+
+state 24
+
+   14 exp: exp . OPERATOR exp
+   14    | exp OPERATOR exp .
+
+    OPERATOR  shift, and go to state 18
+
+    OPERATOR  [reduce using rule 14 (exp)]
+    $default  reduce using rule 14 (exp)
+
+
+state 25
+
+    7 list: list COMMA . exp
+
+    L_PAREN   shift, and go to state 1
+    FUNCTION  shift, and go to state 3
+    NUMBER    shift, and go to state 4
+    ALIAS     shift, and go to state 10
+    PATH      shift, and go to state 6
+
+    exp  go to state 27
+
+
+state 26
+
+   15 exp: FUNCTION L_PAREN list R_PAREN .
+
+    $default  reduce using rule 15 (exp)
+
+
+state 27
+
+    7 list: list COMMA exp .
+   14 exp: exp . OPERATOR exp
+
+    OPERATOR  shift, and go to state 18
+
+    $default  reduce using rule 7 (list)
\ No newline at end of file
diff --git a/source/adios2/toolkit/derived/parser/parser.y b/source/adios2/toolkit/derived/parser/parser.y
new file mode 100644
index 0000000000..d9bf172459
--- /dev/null
+++ b/source/adios2/toolkit/derived/parser/parser.y
@@ -0,0 +1,132 @@
+/* calculator. */
+%{
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <math.h>
+ #include <stack>
+ #include <string>
+ #include <vector>
+ #include <iostream>
+ #include "lexer.h"
+ #include "ASTNode.h"
+  
+  extern int yyparse(std::stack<ASTNode*>* expr_stack);
+
+  void* createExpr(std::stack<ASTNode*>*, std::string, const char*, double, size_t);
+  
+  static void* yyparse_value;  
+
+  void yyerror(std::stack<ASTNode*>* expr_stack, const char *msg);
+
+%}
+
+%parse-param {std::stack<ASTNode*>* expr_stack}
+
+%union{
+  double dval;
+  int ival;
+  char* sval;
+  void* expr_ptr;
+}
+
+%error-verbose
+%locations
+
+%start input
+%token COMMA L_PAREN R_PAREN ENDL
+%token FUNCTION
+%token OPERATOR
+%token INDICES
+%token NUMBER
+%token ALIAS PATH
+%type <dval> NUMBER
+%type <sval> ALIAS PATH INDICES
+%type <sval> FUNCTION OPERATOR
+%type <expr_ptr> input exp
+%type <ival> list
+
+
+%% 
+
+input:                  {}
+                        | ENDL input             {}
+                        | decl input             {}
+                        | exp input              { /*yyparse_value = $1->expression;*/ }
+			;
+
+decl:                   ALIAS PATH               { ASTNode::add_lookup_entry($1, $2, ""); }
+                        | ALIAS PATH INDICES     { ASTNode::add_lookup_entry($1, $2, $3); }
+                        ;
+
+//index:                  NUMBER comma index { ASTNode::extend_current_lookup_indices($1); }
+//                        | NUMBER { ASTNode::extend_current_lookup_indices($1); }
+//                        ;
+
+list:                   list COMMA exp { $$ = $1 +1; }
+                        | exp { $$ = 1;}
+                        ;
+
+exp:                    ALIAS                  { $$ = createExpr(expr_stack, "ALIAS", $1, 0, 0); }
+| ALIAS INDICES         { createExpr(expr_stack, "ALIAS", $1, 0, 0); $$ = createExpr(expr_stack, "INDEX", $2, 0, 1); }
+| PATH                  { $$ = createExpr(expr_stack, "PATH", $1, 0, 0); }
+| NUMBER                { $$ = createExpr(expr_stack, "NUM", "", $1, 0); }
+| L_PAREN exp R_PAREN { $$ = $2; }
+| exp OPERATOR exp { $$ = createExpr(expr_stack, $2, "", 0, 2); }
+| FUNCTION L_PAREN list R_PAREN { $$ = createExpr(expr_stack, $1, "", 0, $3); }
+			;
+%%
+
+void* createExpr(std::stack<ASTNode*>* expr_stack, std::string str_op, const char* name, double value, size_t numsubexprs) {
+  std::cout << "Creating ASTNode in function createExpr" << std::endl;
+  std::cout << "\tstack size: " << expr_stack->size() << "\n\top: " << str_op << "\n\tname: " << name << "\n\tvalue: " << value << "\n\tnumsubexprs: " << numsubexprs << std::endl;
+
+  ExprHelper::expr_op op = ExprHelper::get_op(str_op);
+
+  ASTNode *node = new ASTNode(op);
+  switch(op) {
+  case ExprHelper::OP_ALIAS:
+    node = new ASTNode(op, name);
+    break;
+  case ExprHelper::OP_PATH:
+    node = new ASTNode(op, name);
+    break;
+  case ExprHelper::OP_NUM:
+    node = new ASTNode(op, value);
+    break;
+  case ExprHelper::OP_INDEX:
+    // TODO: translate indices
+    node = new ASTNode(op, name);
+    break;
+  default:
+    node = new ASTNode(op);
+  };
+  node->extend_subexprs(numsubexprs);
+  for (size_t i = 1; i <= numsubexprs; ++i)
+    {
+      ASTNode *subexpr = expr_stack->top();
+      node->add_back_subexpr(subexpr,i);
+      expr_stack->pop();
+    }
+  expr_stack->push(node);
+
+  return &expr_stack->top();
+}
+
+Expression* parse_expression(const char* input) {
+  yy_scan_string(input);
+  std::stack<ASTNode*> expr_stack;
+  yyparse(&expr_stack);
+
+  // DEBUGGING
+  std::cout << "yyparse complete. Stack size: " << expr_stack.size() << std::endl;
+  std::cout << "parser prettyprint:" << std::endl;
+  expr_stack.top()->printpretty("");
+
+  Expression *dummy_root = new Expression();
+  expr_stack.top()->to_expr(dummy_root);
+  return std::get<0>(dummy_root->sub_exprs[0]);
+}
+
+void yyerror(std::stack<ASTNode*>* expr_stack, const char *msg) {
+   printf("** Line %d: %s\n", yylloc.first_line, msg);
+}
\ No newline at end of file
diff --git a/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp b/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
index cf9fbefc14..6e6ea7e806 100644
--- a/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
+++ b/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
@@ -1,3 +1,4 @@
+
 /*
  * Distributed under the OSI-approved Apache License, Version 2.0.  See
  * accompanying file Copyright.txt for details.
@@ -238,8 +239,8 @@ void BP5Deserializer::BreakdownArrayName(const char *Name, char **base_name_p, D
 {
     /* string formatted as bp5_%d_%d_actualname */
     char *p;
-    // + 3 to skip BP5_ or bp5_ prefix
-    long n = strtol(Name + 4, &p, 10);
+    // Prefix has already been skipped
+    long n = strtol(Name, &p, 10);
     *element_size_p = static_cast<int>(n);
     ++p; // skip '_'
     long Type = strtol(p, &p, 10);
@@ -295,6 +296,59 @@ BP5Deserializer::BP5VarRec *BP5Deserializer::CreateVarRec(const char *ArrayName)
     return Ret;
 }
 
+/*
+ * Decode base64 data to 'output'.  Decode in-place if 'output' is NULL.
+ * Return the length of the decoded data, or -1 if there was an error.
+ */
+static const char signed char_to_num[256] = {
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
+    52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, -1, 0,  1,  2,  3,  4,  5,  6,
+    7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
+    -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+    49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+};
+static int base64_decode(unsigned char *input, unsigned char *output)
+{
+    int len = 0;
+    int c1, c2, c3, c4;
+
+    if (output == NULL)
+        output = input;
+    while (*input)
+    {
+        c1 = *input++;
+        if (char_to_num[c1] == -1)
+            return -1;
+        c2 = *input++;
+        if (char_to_num[c2] == -1)
+            return -1;
+        c3 = *input++;
+        if (c3 != '=' && char_to_num[c3] == -1)
+            return -1;
+        c4 = *input++;
+        if (c4 != '=' && char_to_num[c4] == -1)
+            return -1;
+        *output++ = (char_to_num[c1] << 2) | (char_to_num[c2] >> 4);
+        ++len;
+        if (c3 == '=')
+            break;
+        *output++ = ((char_to_num[c2] << 4) & 0xf0) | (char_to_num[c3] >> 2);
+        ++len;
+        if (c4 == '=')
+            break;
+        *output++ = ((char_to_num[c3] << 6) & 0xc0) | char_to_num[c4];
+        ++len;
+    }
+
+    return len;
+}
+
 BP5Deserializer::ControlInfo *BP5Deserializer::BuildControl(FMFormat Format)
 {
     FMStructDescList FormatList = format_list_of_FMFormat(Format);
@@ -312,6 +366,9 @@ BP5Deserializer::ControlInfo *BP5Deserializer::BuildControl(FMFormat Format)
     size_t VarIndex = 0;
     while (FieldList[i].field_name)
     {
+        size_t HeaderSkip;
+        char *ExprStr = NULL;
+        int Derived = 0;
         ret = (ControlInfo *)realloc(ret, sizeof(*ret) + ControlCount * sizeof(struct ControlInfo));
         struct ControlStruct *C = &(ret->Controls[ControlCount]);
         ControlCount++;
@@ -336,6 +393,29 @@ BP5Deserializer::ControlInfo *BP5Deserializer::BuildControl(FMFormat Format)
             C->OrigShapeID = ShapeID::LocalArray;
             break;
         }
+        if (FieldList[i].field_name[3] == '_')
+        {
+            HeaderSkip = 4;
+        }
+        else if (FieldList[i].field_name[3] == '-')
+        {
+            // Expression follows
+            Derived = 1;
+            int EncLen;
+            int NumberLen;
+            if (sscanf(&FieldList[i].field_name[4], "%d%n", &EncLen, &NumberLen) == 1)
+            { // Expression
+                ExprStr = (char *)malloc(EncLen + 1);
+                const char *Dash = strchr(&FieldList[i].field_name[4], '-');
+                base64_decode((unsigned char *)Dash + 1, (unsigned char *)ExprStr);
+                HeaderSkip = 6 + NumberLen + EncLen;
+            }
+            else
+            {
+                fprintf(stderr, "Bad Expression spec in field %s\n", FieldList[i].field_name);
+            }
+        }
+        //
         BP5VarRec *VarRec = nullptr;
         if (NameIndicatesArray(FieldList[i].field_name))
         {
@@ -356,8 +436,8 @@ BP5Deserializer::ControlInfo *BP5Deserializer::BuildControl(FMFormat Format)
             else
             {
                 BreakdownFieldType(FieldList[i].field_type, Operator, MinMax);
-                BreakdownArrayName(FieldList[i].field_name, &ArrayName, &Type, &ElementSize,
-                                   &StructFormat);
+                BreakdownArrayName(FieldList[i].field_name + HeaderSkip, &ArrayName, &Type,
+                                   &ElementSize, &StructFormat);
             }
             VarRec = LookupVarByName(ArrayName);
             if (!VarRec)
@@ -366,6 +446,8 @@ BP5Deserializer::ControlInfo *BP5Deserializer::BuildControl(FMFormat Format)
                 VarRec->Type = Type;
                 VarRec->ElementSize = ElementSize;
                 VarRec->OrigShapeID = C->OrigShapeID;
+                VarRec->Derived = Derived;
+                VarRec->ExprStr = ExprStr;
                 if (StructFormat)
                 {
                     core::StructDefinition *Def =
@@ -1501,6 +1583,8 @@ BP5Deserializer::GenerateReadRequests(const bool doAllocTempBuffers, size_t *max
                     RR.Timestep = Req->Step;
                     RR.WriterRank = WriterRank;
                     RR.StartOffset = writer_meta_base->DataBlockLocation[NeededBlock];
+                    if (RR.StartOffset == (size_t)-1)
+                        throw std::runtime_error("No data exists for this variable");
                     if (Req->MemSpace != MemorySpace::Host)
                         RR.DirectToAppMemory = false;
                     else
@@ -1574,6 +1658,8 @@ BP5Deserializer::GenerateReadRequests(const bool doAllocTempBuffers, size_t *max
                             RR.StartOffset = writer_meta_base->DataBlockLocation[Block];
                             RR.ReadLength = writer_meta_base->DataBlockSize[Block];
                             RR.DestinationAddr = nullptr;
+                            if (RR.StartOffset == (size_t)-1)
+                                throw std::runtime_error("No data exists for this variable");
                             if (doAllocTempBuffers)
                             {
                                 RR.DestinationAddr = (char *)malloc(RR.ReadLength);
@@ -1611,6 +1697,8 @@ BP5Deserializer::GenerateReadRequests(const bool doAllocTempBuffers, size_t *max
                             RR.WriterRank = WriterRank;
                             RR.StartOffset =
                                 writer_meta_base->DataBlockLocation[Block] + StartOffsetInBlock;
+                            if (writer_meta_base->DataBlockLocation[Block] == (size_t)-1)
+                                throw std::runtime_error("No data exists for this variable");
                             RR.ReadLength = EndOffsetInBlock - StartOffsetInBlock;
                             if (Req->MemSpace != MemorySpace::Host)
                                 RR.DirectToAppMemory = false;
diff --git a/source/adios2/toolkit/format/bp5/BP5Deserializer.h b/source/adios2/toolkit/format/bp5/BP5Deserializer.h
index 3c6aefa355..c2d6f39568 100644
--- a/source/adios2/toolkit/format/bp5/BP5Deserializer.h
+++ b/source/adios2/toolkit/format/bp5/BP5Deserializer.h
@@ -120,6 +120,8 @@ class BP5Deserializer : virtual public BP5Base
         size_t JoinedDimen = SIZE_MAX;
         size_t *LastJoinedOffset = NULL;
         size_t *LastJoinedShape = NULL;
+        bool Derived = false;
+        char *ExprStr = NULL;
         ShapeID OrigShapeID;
         core::StructDefinition *Def = nullptr;
         core::StructDefinition *ReaderDef = nullptr;
diff --git a/source/adios2/toolkit/format/bp5/BP5Serializer.cpp b/source/adios2/toolkit/format/bp5/BP5Serializer.cpp
index 4bd0140175..d7145bd7fb 100644
--- a/source/adios2/toolkit/format/bp5/BP5Serializer.cpp
+++ b/source/adios2/toolkit/format/bp5/BP5Serializer.cpp
@@ -10,6 +10,9 @@
 #include "adios2/core/Engine.h"
 #include "adios2/core/IO.h"
 #include "adios2/core/VariableBase.h"
+#ifdef ADIOS2_HAVE_DERIVED_VARIABLE
+#include "adios2/core/VariableDerived.h"
+#endif
 #include "adios2/helper/adiosFunctions.h"
 #include "adios2/toolkit/format/buffer/ffs/BufferFFS.h"
 
@@ -234,25 +237,104 @@ char *BP5Serializer::BuildVarName(const char *base_name, const ShapeID Shape, co
     return Ret;
 }
 
+/*
+ * Do base64 encoding of binary buffer, returning a malloc'd string
+ */
+static const char num_to_char[] =
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+static char *base64_encode(const char *buffer, unsigned int len)
+{
+    char *buf;
+    int buflen = 0;
+    int c1, c2, c3;
+    int maxlen = len * 4 / 3 + 4;
+#ifdef OVERKILL
+    maxlen = len * 2 + 2;
+#endif
+
+    buf = (char *)malloc(maxlen * sizeof(char));
+    if (buf == NULL)
+    {
+        return NULL;
+    }
+    else
+    {
+        memset(buf, 0, maxlen * sizeof(char));
+    }
+
+    while (len)
+    {
+
+        c1 = (unsigned char)*buffer++;
+        buf[buflen++] = num_to_char[c1 >> 2];
+
+        if (--len == 0)
+            c2 = 0;
+        else
+            c2 = (unsigned char)*buffer++;
+        buf[buflen++] = num_to_char[((c1 & 0x3) << 4) | ((c2 & 0xf0) >> 4)];
+
+        if (len == 0)
+        {
+            buf[buflen++] = '=';
+            buf[buflen++] = '=';
+            break;
+        }
+
+        if (--len == 0)
+            c3 = 0;
+        else
+            c3 = (unsigned char)*buffer++;
+
+        buf[buflen++] = num_to_char[((c2 & 0xf) << 2) | ((c3 & 0xc0) >> 6)];
+        if (len == 0)
+        {
+            buf[buflen++] = '=';
+
+            break;
+        }
+
+        --len;
+        buf[buflen++] = num_to_char[c3 & 0x3f];
+    }
+
+    buf[buflen] = 0;
+
+    return buf;
+}
+
 static char *BuildLongName(const char *base_name, const ShapeID Shape, const int type,
-                           const size_t element_size, const char *StructID)
+                           const size_t element_size, const char *StructID, const char *ExprStr)
 {
     const char *Prefix = NamePrefix(Shape);
     size_t StructIDLen = 0;
+    size_t ExprLen = 0;
+    char *ExpressionInsert = (char *)"_";
     if (StructID)
         StructIDLen = strlen(StructID);
-    size_t Len = strlen(base_name) + 3 + strlen(Prefix) + StructIDLen + 16;
+    if (ExprStr)
+    {
+        char *ExprEnc = base64_encode(ExprStr, (int)(strlen(ExprStr) + 1));
+        ExprLen = strlen(ExprEnc);
+        ExpressionInsert = (char *)malloc(ExprLen + 16); // str + enough for len and separators
+        snprintf(ExpressionInsert, ExprLen + 16, "-%zu-%s-", ExprLen, ExprEnc);
+        free(ExprEnc);
+    }
+    size_t Len = strlen(base_name) + 3 + ExprLen + strlen(Prefix) + StructIDLen + 16;
     char *Ret = (char *)malloc(Len);
     if (StructID)
     {
-        snprintf(Ret, Len, "%s_%zd_%d_%s", Prefix, element_size, type, StructID);
+        snprintf(Ret, Len, "%s%s%zd_%d_%s", Prefix, ExpressionInsert, element_size, type, StructID);
     }
     else
     {
-        snprintf(Ret, Len, "%s_%zd_%d", Prefix, element_size, type);
+        snprintf(Ret, Len, "%s%s%zd_%d", Prefix, ExpressionInsert, element_size, type);
     }
     strcat(Ret, "_");
     strcat(Ret, base_name);
+    if (ExprStr)
+        free(ExpressionInsert);
     return Ret;
 }
 
@@ -420,6 +502,9 @@ BP5Serializer::BP5WriterRec BP5Serializer::CreateWriterRec(void *Variable, const
                                                            size_t DimCount)
 {
     core::VariableBase *VB = static_cast<core::VariableBase *>(Variable);
+#ifdef ADIOS2_HAVE_DERIVED_VARIABLE
+    core::VariableDerived *VD = dynamic_cast<core::VariableDerived *>(VB);
+#endif
     auto obj = Info.RecMap.insert(std::make_pair(Variable, _BP5WriterRec()));
     BP5WriterRec Rec = &obj.first->second;
     if (Type == DataType::String)
@@ -497,7 +582,12 @@ BP5Serializer::BP5WriterRec BP5Serializer::CreateWriterRec(void *Variable, const
         }
         // Array field.  To Metadata, add FMFields for DimCount, Shape, Count
         // and Offsets matching _MetaArrayRec
-        char *LongName = BuildLongName(Name, VB->m_ShapeID, (int)Type, ElemSize, TextStructID);
+        const char *ExprString = NULL;
+#ifdef ADIOS2_HAVE_DERIVED_VARIABLE
+        ExprString = VD ? VD->m_Expr.ExprString.c_str() : NULL;
+#endif
+        char *LongName =
+            BuildLongName(Name, VB->m_ShapeID, (int)Type, ElemSize, TextStructID, ExprString);
 
         const char *ArrayTypeName = "MetaArray";
         int FieldSize = sizeof(MetaArrayRec);
@@ -643,7 +733,18 @@ void BP5Serializer::Marshal(void *Variable, const char *Name, const DataType Typ
     };
 
     core::VariableBase *VB = static_cast<core::VariableBase *>(Variable);
+#ifdef ADIOS2_HAVE_DERIVED_VARIABLE
+    core::VariableDerived *VD = dynamic_cast<core::VariableDerived *>(VB);
+#endif
 
+    bool WriteData = true;
+#ifdef ADIOS2_HAVE_DERIVED_VARIABLE
+    if (VD)
+    {
+        // All other types of Derived types we don't write data
+        WriteData = (VD->GetDerivedType() == DerivedVarType::StoreData);
+    }
+#endif
     BP5MetadataInfoStruct *MBase;
 
     BP5WriterRec Rec = LookupWriterRec(Variable);
@@ -714,7 +815,12 @@ void BP5Serializer::Marshal(void *Variable, const char *Name, const DataType Typ
 
         MinMaxStruct MinMax;
         MinMax.Init(Type);
-        if ((m_StatsLevel > 0) && !Span)
+        bool DerivedWithoutStats = false;
+#ifdef ADIOS2_HAVE_DERIVED_VARIABLE
+        DerivedWithoutStats = VD && (VD->GetDerivedType() == DerivedVarType::ExpressionString);
+#endif
+        bool DoMinMax = ((m_StatsLevel > 0) && !DerivedWithoutStats);
+        if (DoMinMax && !Span)
         {
             GetMinMax(Data, ElemCount, (DataType)Rec->Type, MinMax, MemSpace);
         }
@@ -744,6 +850,11 @@ void BP5Serializer::Marshal(void *Variable, const char *Name, const DataType Typ
                     VB->m_Operations[0]->GetHeaderSize(), MemSpace);
             CurDataBuffer->DownsizeLastAlloc(AllocSize, CompressedSize);
         }
+        else if (!WriteData)
+        {
+            DataOffset = (size_t)-1;
+            DeferAddToVec = false;
+        }
         else if (Span == nullptr)
         {
             if (!DeferAddToVec)
@@ -781,7 +892,7 @@ void BP5Serializer::Marshal(void *Variable, const char *Name, const DataType Typ
                 MetaEntry->Offsets = CopyDims(DimCount, Offsets);
             else
                 MetaEntry->Offsets = NULL;
-            if (m_StatsLevel > 0)
+            if (DoMinMax)
             {
                 void **MMPtrLoc = (void **)(((char *)MetaEntry) + Rec->MinMaxOffset);
                 *MMPtrLoc = (void *)malloc(ElemSize * 2);
@@ -823,7 +934,7 @@ void BP5Serializer::Marshal(void *Variable, const char *Name, const DataType Typ
                     (size_t *)realloc(OpEntry->DataBlockSize, OpEntry->BlockCount * sizeof(size_t));
                 OpEntry->DataBlockSize[OpEntry->BlockCount - 1] = CompressedSize;
             }
-            if (m_StatsLevel > 0)
+            if (DoMinMax)
             {
                 void **MMPtrLoc = (void **)(((char *)MetaEntry) + Rec->MinMaxOffset);
                 *MMPtrLoc = (void *)realloc(*MMPtrLoc, MetaEntry->BlockCount * ElemSize * 2);
@@ -854,6 +965,18 @@ void BP5Serializer::Marshal(void *Variable, const char *Name, const DataType Typ
     }
 }
 
+const void *BP5Serializer::SearchDeferredBlocks(size_t MetaOffset, size_t BlockID)
+{
+    for (auto &Def : DeferredExterns)
+    {
+        if ((Def.MetaOffset == MetaOffset) && (Def.BlockID == BlockID))
+        {
+            return Def.Data;
+        }
+    }
+    return NULL;
+}
+
 MinVarInfo *BP5Serializer::MinBlocksInfo(const core::VariableBase &Var)
 {
     BP5WriterRec VarRec = LookupWriterRec((void *)&Var);
@@ -912,8 +1035,10 @@ MinVarInfo *BP5Serializer::MinBlocksInfo(const core::VariableBase &Var)
             }
             else
             {
-                Blk.BufferP = CurDataBuffer->GetPtr(MetaEntry->DataBlockLocation[b] -
-                                                    m_PriorDataBufferSizeTotal);
+                Blk.BufferP = (void *)SearchDeferredBlocks(VarRec->MetaOffset, b);
+                if (!Blk.BufferP)
+                    Blk.BufferP = CurDataBuffer->GetPtr(MetaEntry->DataBlockLocation[b] -
+                                                        m_PriorDataBufferSizeTotal);
             }
             MV->BlocksInfo.push_back(Blk);
         }
diff --git a/source/adios2/toolkit/format/bp5/BP5Serializer.h b/source/adios2/toolkit/format/bp5/BP5Serializer.h
index 94025f1b2b..d07aa727fa 100644
--- a/source/adios2/toolkit/format/bp5/BP5Serializer.h
+++ b/source/adios2/toolkit/format/bp5/BP5Serializer.h
@@ -244,6 +244,9 @@ class BP5Serializer : virtual public BP5Base
         size_t ElemCount;
         void *Array;
     } ArrayRec;
+
+private:
+    const void *SearchDeferredBlocks(size_t MetaOffset, size_t blocknum);
 };
 
 } // end namespace format
diff --git a/source/adios2/toolkit/format/buffer/malloc/MallocV.cpp b/source/adios2/toolkit/format/buffer/malloc/MallocV.cpp
index 90359071fb..efe28d79bd 100644
--- a/source/adios2/toolkit/format/buffer/malloc/MallocV.cpp
+++ b/source/adios2/toolkit/format/buffer/malloc/MallocV.cpp
@@ -7,6 +7,7 @@
  */
 
 #include "MallocV.h"
+#include "adios2/helper/adiosFunctions.h"
 #include "adios2/toolkit/format/buffer/BufferV.h"
 
 #include <algorithm>
@@ -74,7 +75,12 @@ size_t MallocV::AddToVec(const size_t size, const void *buf, size_t align, bool
             m_InternalBlock = (char *)realloc(m_InternalBlock, NewSize);
             m_AllocatedSize = NewSize;
         }
-        memcpy(m_InternalBlock + m_internalPos, buf, size);
+#ifdef ADIOS2_HAVE_GPU_SUPPORT
+        if (MemSpace == MemorySpace::GPU)
+            helper::CopyFromGPUToBuffer(m_InternalBlock, m_internalPos, buf, MemSpace, size);
+#endif
+        if (MemSpace == MemorySpace::Host)
+            memcpy(m_InternalBlock + m_internalPos, buf, size);
 
         if (DataV.size() && !DataV.back().External &&
             (m_internalPos == (DataV.back().Offset + DataV.back().Size)))
diff --git a/source/adios2/toolkit/format/dataman/DataManSerializer.cpp b/source/adios2/toolkit/format/dataman/DataManSerializer.cpp
index daa72cfb9c..1740afff71 100644
--- a/source/adios2/toolkit/format/dataman/DataManSerializer.cpp
+++ b/source/adios2/toolkit/format/dataman/DataManSerializer.cpp
@@ -600,8 +600,8 @@ void DataManSerializer::Log(const int level, const std::string &message, const b
 void DataManSerializer::PutData(const std::string *inputData, const std::string &varName,
                                 const Dims &varShape, const Dims &varStart, const Dims &varCount,
                                 const Dims &varMemStart, const Dims &varMemCount,
-                                const std::string &doid, const size_t step, const int rank,
-                                const std::string &address,
+                                const MemorySpace varMemSpace, const std::string &doid,
+                                const size_t step, const int rank, const std::string &address,
                                 const std::vector<std::shared_ptr<core::Operator>> &ops,
                                 VecPtr localBuffer, JsonPtr metadataJson)
 {
@@ -646,8 +646,14 @@ void DataManSerializer::PutData(const std::string *inputData, const std::string
 
     localBuffer->resize(localBuffer->size() + inputData->size());
 
-    std::memcpy(localBuffer->data() + localBuffer->size() - inputData->size(), inputData->data(),
-                inputData->size());
+#ifdef ADIOS2_HAVE_GPU_SUPPORT
+    if (varMemSpace == MemorySpace::GPU)
+        helper::CopyFromGPUToBuffer(localBuffer->data(), localBuffer->size() - inputData->size(),
+                                    inputData->data(), varMemSpace, inputData->size());
+#endif
+    if (varMemSpace == MemorySpace::Host)
+        std::memcpy(localBuffer->data() + localBuffer->size() - inputData->size(),
+                    inputData->data(), inputData->size());
 
     if (metadataJson == nullptr)
     {
@@ -665,7 +671,8 @@ void DataManSerializer::PutData(const std::string *inputData, const std::string
 template <>
 int DataManSerializer::GetData(std::string *outputData, const std::string &varName,
                                const Dims &varStart, const Dims &varCount, const size_t step,
-                               const Dims &varMemStart, const Dims &varMemCount)
+                               const MemorySpace varMemSpace, const Dims &varMemStart,
+                               const Dims &varMemCount)
 {
     PERFSTUBS_SCOPED_TIMER_FUNC();
 
diff --git a/source/adios2/toolkit/format/dataman/DataManSerializer.h b/source/adios2/toolkit/format/dataman/DataManSerializer.h
index 9adc515368..4e2c2ada6a 100644
--- a/source/adios2/toolkit/format/dataman/DataManSerializer.h
+++ b/source/adios2/toolkit/format/dataman/DataManSerializer.h
@@ -94,16 +94,16 @@ class DataManSerializer
     // put a variable for writer
     void PutData(const std::string *inputData, const std::string &varName, const Dims &varShape,
                  const Dims &varStart, const Dims &varCount, const Dims &varMemStart,
-                 const Dims &varMemCount, const std::string &doid, const size_t step,
-                 const int rank, const std::string &address,
+                 const Dims &varMemCount, const MemorySpace varMemSpace, const std::string &doid,
+                 const size_t step, const int rank, const std::string &address,
                  const std::vector<std::shared_ptr<core::Operator>> &ops,
                  VecPtr localBuffer = nullptr, JsonPtr metadataJson = nullptr);
 
     template <class T>
     void PutData(const T *inputData, const std::string &varName, const Dims &varShape,
                  const Dims &varStart, const Dims &varCount, const Dims &varMemStart,
-                 const Dims &varMemCount, const std::string &doid, const size_t step,
-                 const int rank, const std::string &address,
+                 const Dims &varMemCount, const MemorySpace varMemSpace, const std::string &doid,
+                 const size_t step, const int rank, const std::string &address,
                  const std::vector<std::shared_ptr<core::Operator>> &ops,
                  VecPtr localBuffer = nullptr, JsonPtr metadataJson = nullptr);
 
@@ -134,8 +134,8 @@ class DataManSerializer
 
     template <class T>
     int GetData(T *output_data, const std::string &varName, const Dims &varStart,
-                const Dims &varCount, const size_t step, const Dims &varMemStart = Dims(),
-                const Dims &varMemCount = Dims());
+                const Dims &varCount, const size_t step, const MemorySpace varMemSpace,
+                const Dims &varMemStart = Dims(), const Dims &varMemCount = Dims());
 
     void Erase(const size_t step, const bool allPreviousSteps = false);
 
@@ -166,7 +166,8 @@ class DataManSerializer
     nlohmann::json DeserializeJson(const char *start, size_t size);
 
     template <typename T>
-    void CalculateMinMax(const T *data, const Dims &count, nlohmann::json &metaj);
+    void CalculateMinMax(const T *data, const Dims &count, const MemorySpace varMemSpace,
+                         nlohmann::json &metaj);
 
     bool StepHasMinimumBlocks(const size_t step, const int requireMinimumBlocks);
 
diff --git a/source/adios2/toolkit/format/dataman/DataManSerializer.tcc b/source/adios2/toolkit/format/dataman/DataManSerializer.tcc
index 17b99d2071..e3fd9b29d3 100644
--- a/source/adios2/toolkit/format/dataman/DataManSerializer.tcc
+++ b/source/adios2/toolkit/format/dataman/DataManSerializer.tcc
@@ -25,35 +25,43 @@ namespace format
 template <>
 inline void DataManSerializer::CalculateMinMax<std::complex<float>>(const std::complex<float> *data,
                                                                     const Dims &count,
+                                                                    const MemorySpace varMemSpace,
                                                                     nlohmann::json &metaj)
 {
 }
 
 template <>
-inline void
-DataManSerializer::CalculateMinMax<std::complex<double>>(const std::complex<double> *data,
-                                                         const Dims &count, nlohmann::json &metaj)
+inline void DataManSerializer::CalculateMinMax<std::complex<double>>(
+    const std::complex<double> *data, const Dims &count, const MemorySpace varMemSpace,
+    nlohmann::json &metaj)
 {
 }
 
 template <typename T>
-void DataManSerializer::CalculateMinMax(const T *data, const Dims &count, nlohmann::json &metaj)
+void DataManSerializer::CalculateMinMax(const T *data, const Dims &count,
+                                        const MemorySpace varMemSpace, nlohmann::json &metaj)
 {
     PERFSTUBS_SCOPED_TIMER_FUNC();
     size_t size = std::accumulate(count.begin(), count.end(), 1, std::multiplies<size_t>());
     T max = std::numeric_limits<T>::min();
     T min = std::numeric_limits<T>::max();
-
-    for (size_t j = 0; j < size; ++j)
+#ifdef ADIOS2_HAVE_GPU_SUPPORT
+    if (varMemSpace == MemorySpace::GPU)
+        helper::GetGPUMinMax(data, size, min, max);
+#endif
+    if (varMemSpace == MemorySpace::Host)
     {
-        T value = data[j];
-        if (value > max)
+        for (size_t j = 0; j < size; ++j)
         {
-            max = value;
-        }
-        if (value < min)
-        {
-            min = value;
+            T value = data[j];
+            if (value > max)
+            {
+                max = value;
+            }
+            if (value < min)
+            {
+                min = value;
+            }
         }
     }
 
@@ -73,16 +81,16 @@ void DataManSerializer::PutData(const core::Variable<T> &variable, const std::st
 {
     PERFSTUBS_SCOPED_TIMER_FUNC();
     PutData(variable.GetData(), variable.m_Name, variable.m_Shape, variable.m_Start,
-            variable.m_Count, variable.m_MemoryStart, variable.m_MemoryCount, doid, step, rank,
-            address, variable.m_Operations, localBuffer, metadataJson);
+            variable.m_Count, variable.m_MemoryStart, variable.m_MemoryCount, variable.m_MemSpace,
+            doid, step, rank, address, variable.m_Operations, localBuffer, metadataJson);
 }
 
 template <class T>
 void DataManSerializer::PutData(const T *inputData, const std::string &varName,
                                 const Dims &varShape, const Dims &varStart, const Dims &varCount,
                                 const Dims &varMemStart, const Dims &varMemCount,
-                                const std::string &doid, const size_t step, const int rank,
-                                const std::string &address,
+                                const MemorySpace varMemSpace, const std::string &doid,
+                                const size_t step, const int rank, const std::string &address,
                                 const std::vector<std::shared_ptr<core::Operator>> &ops,
                                 VecPtr localBuffer, JsonPtr metadataJson)
 {
@@ -111,7 +119,7 @@ void DataManSerializer::PutData(const T *inputData, const std::string &varName,
 
     if (m_EnableStat)
     {
-        CalculateMinMax(inputData, varCount, metaj);
+        CalculateMinMax(inputData, varCount, varMemSpace, metaj);
     }
 
     if (not m_IsRowMajor)
@@ -171,7 +179,13 @@ void DataManSerializer::PutData(const T *inputData, const std::string &varName,
     }
     else
     {
-        std::memcpy(localBuffer->data() + localBuffer->size() - datasize, inputData, datasize);
+#ifdef ADIOS2_HAVE_GPU_SUPPORT
+        if (varMemSpace == MemorySpace::GPU)
+            helper::CopyFromGPUToBuffer(localBuffer->data(), localBuffer->size() - datasize,
+                                        inputData, varMemSpace, datasize);
+#endif
+        if (varMemSpace == MemorySpace::Host)
+            std::memcpy(localBuffer->data() + localBuffer->size() - datasize, inputData, datasize);
     }
 
     if (metadataJson == nullptr)
@@ -189,7 +203,8 @@ void DataManSerializer::PutData(const T *inputData, const std::string &varName,
 
 template <class T>
 int DataManSerializer::GetData(T *outputData, const std::string &varName, const Dims &varStart,
-                               const Dims &varCount, const size_t step, const Dims &varMemStart,
+                               const Dims &varCount, const size_t step,
+                               const MemorySpace varMemSpace, const Dims &varMemStart,
                                const Dims &varMemCount)
 {
     PERFSTUBS_SCOPED_TIMER_FUNC();
@@ -238,7 +253,7 @@ int DataManSerializer::GetData(T *outputData, const std::string &varName, const
                 m_OperatorMapMutex.unlock();
                 decompressBuffer.reserve(helper::GetTotalSize(j.count, sizeof(T)));
                 core::Decompress(j.buffer->data() + j.position, j.size, decompressBuffer.data(),
-                                 MemorySpace::Host);
+                                 varMemSpace);
                 decompressed = true;
                 input_data = decompressBuffer.data();
             }
@@ -261,14 +276,14 @@ int DataManSerializer::GetData(T *outputData, const std::string &varName, const
                     helper::NdCopy(input_data, j.start, j.count, true, j.isLittleEndian,
                                    reinterpret_cast<char *>(outputData), varStart, varCount, true,
                                    m_IsLittleEndian, sizeof(T), j.start, j.count, varMemStart,
-                                   varMemCount);
+                                   varMemCount, false, varMemSpace);
                 }
                 else
                 {
                     helper::NdCopy(input_data, j.start, j.count, j.isRowMajor, j.isLittleEndian,
                                    reinterpret_cast<char *>(outputData), varStart, varCount,
                                    m_IsRowMajor, m_IsLittleEndian, sizeof(T), j.start, j.count,
-                                   varMemStart, varMemCount);
+                                   varMemStart, varMemCount, false, varMemSpace);
                 }
             }
             else
diff --git a/source/adios2/toolkit/remote/remote_common.cpp b/source/adios2/toolkit/remote/remote_common.cpp
index b7391e76c4..bab73b0462 100644
--- a/source/adios2/toolkit/remote/remote_common.cpp
+++ b/source/adios2/toolkit/remote/remote_common.cpp
@@ -115,6 +115,25 @@ FMStructDescRec KillResponseStructs[] = {
     {"KillResponse", KillResponseList, sizeof(struct _KillResponseMsg), NULL},
     {NULL, NULL, 0, NULL}};
 
+FMField StatusServerList[] = {{"StatusResponseCondition", "integer", sizeof(long),
+                               FMOffset(StatusServerMsg, StatusResponseCondition)},
+                              {NULL, NULL, 0, 0}};
+
+FMStructDescRec StatusServerStructs[] = {
+    {"StatusServer", StatusServerList, sizeof(struct _StatusServerMsg), NULL},
+    {NULL, NULL, 0, NULL}};
+
+FMField StatusResponseList[] = {
+    {"StatusResponseCondition", "integer", sizeof(long),
+     FMOffset(StatusResponseMsg, StatusResponseCondition)},
+    {"Hostname", "string", sizeof(char *), FMOffset(StatusResponseMsg, Hostname)},
+    {"Status", "string", sizeof(char *), FMOffset(StatusResponseMsg, Status)},
+    {NULL, NULL, 0, 0}};
+
+FMStructDescRec StatusResponseStructs[] = {
+    {"StatusResponse", StatusResponseList, sizeof(struct _StatusResponseMsg), NULL},
+    {NULL, NULL, 0, NULL}};
+
 void RegisterFormats(RemoteCommon::Remote_evpath_state &ev_state)
 {
     ev_state.OpenFileFormat = CMregister_format(ev_state.cm, RemoteCommon::OpenFileStructs);
@@ -129,6 +148,9 @@ void RegisterFormats(RemoteCommon::Remote_evpath_state &ev_state)
     ev_state.CloseFileFormat = CMregister_format(ev_state.cm, RemoteCommon::CloseFileStructs);
     ev_state.KillServerFormat = CMregister_format(ev_state.cm, RemoteCommon::KillServerStructs);
     ev_state.KillResponseFormat = CMregister_format(ev_state.cm, RemoteCommon::KillResponseStructs);
+    ev_state.StatusServerFormat = CMregister_format(ev_state.cm, RemoteCommon::StatusServerStructs);
+    ev_state.StatusResponseFormat =
+        CMregister_format(ev_state.cm, RemoteCommon::StatusResponseStructs);
 }
 }
 }
diff --git a/source/adios2/toolkit/remote/remote_common.h b/source/adios2/toolkit/remote/remote_common.h
index 53f84aaf61..fc75b4a1f0 100644
--- a/source/adios2/toolkit/remote/remote_common.h
+++ b/source/adios2/toolkit/remote/remote_common.h
@@ -103,6 +103,18 @@ typedef struct _KillResponseMsg
     char *Status;
 } *KillResponseMsg;
 
+typedef struct _StatusServerMsg
+{
+    int StatusResponseCondition;
+} *StatusServerMsg;
+
+typedef struct _StatusResponseMsg
+{
+    int StatusResponseCondition;
+    char *Hostname;
+    char *Status;
+} *StatusResponseMsg;
+
 enum VerbosityLevel
 {
     NoVerbose = 0,       // Generally no output (but not absolutely quiet?)
@@ -129,6 +141,8 @@ struct Remote_evpath_state
     CMFormat CloseFileFormat;
     CMFormat KillServerFormat;
     CMFormat KillResponseFormat;
+    CMFormat StatusServerFormat;
+    CMFormat StatusResponseFormat;
 };
 
 void RegisterFormats(struct Remote_evpath_state &ev_state);
diff --git a/source/adios2/toolkit/remote/remote_server.cpp b/source/adios2/toolkit/remote/remote_server.cpp
index 202d0be8c7..98ff1bcdcd 100644
--- a/source/adios2/toolkit/remote/remote_server.cpp
+++ b/source/adios2/toolkit/remote/remote_server.cpp
@@ -272,8 +272,8 @@ static void GetRequestHandler(CManager cm, CMConnection conn, void *vevent, void
         Response.ReadResponseCondition = GetMsg->GetResponseCondition;                             \
         Response.Dest = GetMsg->Dest; /* final data destination in client memory space */          \
         if (verbose >= 2)                                                                          \
-            std::cout << "Returning " << Response.Size << " " << readable_size(Response.Size)      \
-                      << " for Get<" << TypeOfVar << ">(" << VarName << ")" << b << std::endl;     \
+            std::cout << "Returning " << readable_size(Response.Size) << " for Get<" << TypeOfVar  \
+                      << ">(" << VarName << ")" << b << std::endl;                                 \
         f->m_BytesSent += Response.Size;                                                           \
         f->m_OperationCount++;                                                                     \
         TotalGetBytesSent += Response.Size;                                                        \
@@ -349,6 +349,38 @@ static void KillResponseHandler(CManager cm, CMConnection conn, void *vevent, vo
     exit(0);
 }
 
+static void StatusServerHandler(CManager cm, CMConnection conn, void *vevent, void *client_data,
+                                attr_list attrs)
+{
+    StatusServerMsg status_msg = static_cast<StatusServerMsg>(vevent);
+    struct Remote_evpath_state *ev_state = static_cast<struct Remote_evpath_state *>(client_data);
+    _StatusResponseMsg status_response_msg;
+    char hostbuffer[256];
+
+    // To retrieve hostname
+    gethostname(hostbuffer, sizeof(hostbuffer));
+    memset(&status_response_msg, 0, sizeof(status_response_msg));
+    status_response_msg.StatusResponseCondition = status_msg->StatusResponseCondition;
+    status_response_msg.Hostname = &hostbuffer[0];
+    std::stringstream Status;
+    Status << "ADIOS files Opened: " << ADIOSFilesOpened << " (" << TotalGets << " gets for "
+           << readable_size(TotalGetBytesSent) << ")  Simple files opened: " << SimpleFilesOpened
+           << " (" << TotalSimpleReads << " reads for " << readable_size(TotalSimpleBytesSent)
+           << ")";
+    status_response_msg.Status = strdup(Status.str().c_str());
+    CMwrite(conn, ev_state->StatusResponseFormat, &status_response_msg);
+    free(status_response_msg.Status);
+}
+
+static void StatusResponseHandler(CManager cm, CMConnection conn, void *vevent, void *client_data,
+                                  attr_list attrs)
+{
+    StatusResponseMsg status_response_msg = static_cast<StatusResponseMsg>(vevent);
+    std::cout << "Server running on " << status_response_msg->Hostname
+              << " current status: " << status_response_msg->Status << std::endl;
+    exit(0);
+}
+
 void ServerRegisterHandlers(struct Remote_evpath_state &ev_state)
 {
     CMregister_handler(ev_state.OpenFileFormat, OpenHandler, &ev_state);
@@ -357,6 +389,8 @@ void ServerRegisterHandlers(struct Remote_evpath_state &ev_state)
     CMregister_handler(ev_state.ReadRequestFormat, ReadRequestHandler, &ev_state);
     CMregister_handler(ev_state.KillServerFormat, KillServerHandler, &ev_state);
     CMregister_handler(ev_state.KillResponseFormat, KillResponseHandler, &ev_state);
+    CMregister_handler(ev_state.StatusServerFormat, StatusServerHandler, &ev_state);
+    CMregister_handler(ev_state.StatusResponseFormat, StatusResponseHandler, &ev_state);
 }
 
 static const char *hostname = "localhost";
@@ -390,12 +424,41 @@ void connect_and_kill(int ServerPort)
     exit(0);
 }
 
+void connect_and_get_status(int ServerPort)
+{
+    CManager cm = CManager_create();
+    _StatusServerMsg status_msg;
+    struct Remote_evpath_state ev_state;
+    attr_list contact_list = create_attr_list();
+    atom_t CM_IP_PORT = -1;
+    atom_t CM_IP_HOSTNAME = -1;
+    CM_IP_HOSTNAME = attr_atom_from_string("IP_HOST");
+    CM_IP_PORT = attr_atom_from_string("IP_PORT");
+    add_attr(contact_list, CM_IP_HOSTNAME, Attr_String, (attr_value)hostname);
+    add_attr(contact_list, CM_IP_PORT, Attr_Int4, (attr_value)ServerPort);
+    CMConnection conn = CMinitiate_conn(cm, contact_list);
+    if (!conn)
+        return;
+
+    ev_state.cm = cm;
+
+    RegisterFormats(ev_state);
+
+    ServerRegisterHandlers(ev_state);
+
+    memset(&status_msg, 0, sizeof(status_msg));
+    status_msg.StatusResponseCondition = CMCondition_get(ev_state.cm, conn);
+    CMwrite(conn, ev_state.StatusServerFormat, &status_msg);
+    CMCondition_wait(ev_state.cm, status_msg.StatusResponseCondition);
+    exit(0);
+}
+
 static atom_t CM_IP_PORT = -1;
 
 static bool server_timeout(void *CMvoid, int time_since_service)
 {
     CManager cm = (CManager)CMvoid;
-    if (verbose)
+    if (verbose && (time_since_service > 90))
         std::cout << time_since_service << " seconds since last service.\n";
     if (time_since_service > 600)
     {
@@ -430,6 +493,7 @@ int main(int argc, char **argv)
     struct Remote_evpath_state ev_state;
     int background = 0;
     int kill_server = 0;
+    int status_server = 0;
     int no_timeout = 0; // default to timeout
 
     for (int i = 1; i < argc; i++)
@@ -442,31 +506,27 @@ int main(int argc, char **argv)
         {
             kill_server++;
         }
+        else if (strcmp(argv[i], "-status") == 0)
+        {
+            status_server++;
+        }
         else if (strcmp(argv[i], "-no_timeout") == 0)
         {
             no_timeout++;
         }
-        if (argv[i][0] == '-')
+        else if (strcmp(argv[i], "-v") == 0)
         {
-            size_t j = 1;
-            while (argv[i][j] != 0)
-            {
-                if (argv[i][j] == 'v')
-                {
-                    verbose++;
-                }
-                else if (argv[i][j] == 'q')
-                {
-                    verbose--;
-                }
-                j++;
-            }
+            verbose++;
+        }
+        else if (strcmp(argv[i], "-q") == 0)
+        {
+            verbose--;
         }
         else
         {
             fprintf(stderr, "Unknown argument \"%s\"\n", argv[i]);
-            fprintf(stderr,
-                    "Usage:  remote_server [-background] [-kill_server] [-no_timeout] [-v] [-q]\n");
+            fprintf(stderr, "Usage:  remote_server [-background] [-kill_server] [-no_timeout] "
+                            "[-status] [-v] [-q]\n");
             exit(1);
         }
     }
@@ -476,6 +536,11 @@ int main(int argc, char **argv)
         connect_and_kill(ServerPort);
         exit(0);
     }
+    if (status_server)
+    {
+        connect_and_get_status(ServerPort);
+        exit(0);
+    }
     if (background)
     {
         if (verbose)
@@ -519,7 +584,6 @@ int main(int argc, char **argv)
 
     ServerRegisterHandlers(ev_state);
 
-    std::cout << "doing Run Network" << std::endl;
     CMrun_network(cm);
     return 0;
 }
diff --git a/source/adios2/toolkit/sst/CMakeLists.txt b/source/adios2/toolkit/sst/CMakeLists.txt
index 070c8f41a1..c77fc0459b 100644
--- a/source/adios2/toolkit/sst/CMakeLists.txt
+++ b/source/adios2/toolkit/sst/CMakeLists.txt
@@ -65,6 +65,7 @@ set(SST_CONFIG_OPTS
   UCX
   FI_GNI
   CRAY_DRC
+  CRAY_CXI
   NVStream
   MPI
 )
diff --git a/source/adios2/toolkit/sst/dp/rdma_dp.c b/source/adios2/toolkit/sst/dp/rdma_dp.c
index e61c6f5f3f..e3800f4ff0 100644
--- a/source/adios2/toolkit/sst/dp/rdma_dp.c
+++ b/source/adios2/toolkit/sst/dp/rdma_dp.c
@@ -19,6 +19,13 @@
 #include <rdma/fi_endpoint.h>
 #include <rdma/fi_rma.h>
 
+#ifdef SST_HAVE_CRAY_CXI
+#include <stdbool.h>
+// This comment prevents clang-format from reordering these includes.
+// The CXI extension header requires the bool header, but does not include it on its own.
+#include <rdma/fi_cxi_ext.h>
+#endif
+
 #if defined(__has_feature)
 #if __has_feature(thread_sanitizer)
 #define NO_SANITIZE_THREAD __attribute__((no_sanitize("thread")))
@@ -53,6 +60,66 @@ static pthread_mutex_t fabric_mutex = PTHREAD_MUTEX_INITIALIZER;
 pthread_mutex_t wsr_mutex = PTHREAD_MUTEX_INITIALIZER;
 pthread_mutex_t ts_mutex = PTHREAD_MUTEX_INITIALIZER;
 
+/*
+ * Wrapper for fi_mr_reg() with additional parameters endpoint and mr_mode.
+ * If mr_mode includes FI_MR_ENDPOINT, the memory region must be bound to the
+ * endpoint and enabled before use.
+ */
+int sst_fi_mr_reg(
+    /* first two parameters for verbose logging */
+    CP_Services Svcs, void *CP_Stream,
+    /* regular fi_mir_reg() parameters*/
+    struct fid_domain *domain, const void *buf, size_t len, uint64_t acs, uint64_t offset,
+    uint64_t requested_key, uint64_t flags, struct fid_mr **mr, void *context,
+    /* additional parameters for binding the mr to the endpoint*/
+    struct fid_ep *endpoint, int mr_mode)
+{
+    int res = fi_mr_reg(domain, buf, len, acs, offset, requested_key, flags, mr, context);
+    int is_mr_endpoint = (mr_mode & FI_MR_ENDPOINT) != 0;
+    if (!is_mr_endpoint)
+    {
+        return res;
+    }
+    if (res != FI_SUCCESS)
+    {
+        Svcs->verbose(CP_Stream, DPCriticalVerbose, "fi_mr_reg failed with %ul (%s)\n", res,
+                      fi_strerror(res));
+        return res;
+    }
+
+    /*
+     * When the domain_attr->mr_mode includes FI_MR_ENDPOINT, the memory region
+     * needs to be bound to the endpoint and explicitly enabled after that.
+     */
+    res = fi_mr_bind(*mr, &endpoint->fid, 0);
+    if (res != FI_SUCCESS)
+    {
+        Svcs->verbose(CP_Stream, DPCriticalVerbose, "fi_mr_bind failed with %ul (%s)\n", res,
+                      fi_strerror(res));
+        return res;
+    }
+    res = fi_mr_enable(*mr);
+    if (res != FI_SUCCESS)
+    {
+        Svcs->verbose(CP_Stream, DPCriticalVerbose, "fi_mr_enable failed with %ul (%s)\n", res,
+                      fi_strerror(res));
+        return res;
+    }
+    return res;
+}
+
+/*
+ * Simple wrapper to create a log entry upon failing fi_*() function calls.
+ */
+int guard_fi_return(int code, CP_Services Svcs, CManager cm, char const *msg)
+{
+    if (code != FI_SUCCESS)
+    {
+        Svcs->verbose(cm, DPCriticalVerbose, "%s: %s (%lu)\n", msg, fi_strerror(code), code);
+    }
+    return code;
+}
+
 struct fabric_state
 {
     struct fi_context *ctx;
@@ -60,6 +127,7 @@ struct fabric_state
     struct fi_info *info;
     // struct fi_info *linfo;
     int local_mr_req;
+    int mr_virt_addr; /* Stores if the mr_mode includes FI_MR_VIRT_ADDR */
     int rx_cq_data;
     size_t addr_len;
     size_t msg_prefix_size;
@@ -69,6 +137,9 @@ struct fabric_state
     struct fid_cq *cq_signal;
     struct fid_av *av;
     pthread_t listener;
+#ifdef SST_HAVE_CRAY_CXI
+    struct cxi_auth_key *cxi_auth_key;
+#endif
 #ifdef SST_HAVE_CRAY_DRC
     drc_info_handle_t drc_info;
     uint32_t credential;
@@ -112,13 +183,24 @@ struct fabric_state
  *   plane would replace one or both of these with RDMA functionality.
  */
 
+static char const *get_preferred_domain(struct _SstParams *Params)
+{
+    if (Params->DataInterface)
+    {
+        return Params->DataInterface;
+    }
+    else
+    {
+        return getenv("FABRIC_IFACE");
+    }
+}
+
 static void init_fabric(struct fabric_state *fabric, struct _SstParams *Params, CP_Services Svcs,
-                        void *CP_Stream)
+                        void *CP_Stream, char const *ifname)
 {
     struct fi_info *hints, *info, *originfo, *useinfo;
     struct fi_av_attr av_attr = {FI_AV_UNSPEC};
     struct fi_cq_attr cq_attr = {0};
-    char *ifname;
     int result;
 
     hints = fi_allocinfo();
@@ -126,24 +208,76 @@ static void init_fabric(struct fabric_state *fabric, struct _SstParams *Params,
         FI_MSG | FI_SEND | FI_RECV | FI_REMOTE_READ | FI_REMOTE_WRITE | FI_RMA | FI_READ | FI_WRITE;
     hints->mode =
         FI_CONTEXT | FI_LOCAL_MR | FI_CONTEXT2 | FI_MSG_PREFIX | FI_ASYNC_IOV | FI_RX_CQ_DATA;
-    hints->domain_attr->mr_mode = FI_MR_BASIC;
-    hints->domain_attr->control_progress = FI_PROGRESS_AUTO;
-    hints->domain_attr->data_progress = FI_PROGRESS_AUTO;
     hints->ep_attr->type = FI_EP_RDM;
 
-    if (Params->DataInterface)
+    uint32_t fi_version;
+#ifdef SST_HAVE_CRAY_CXI
+    if (fabric->cxi_auth_key)
     {
-        ifname = Params->DataInterface;
+        fi_version = FI_VERSION(1, 11);
+
+        hints->domain_attr->mr_mode = FI_MR_ENDPOINT;
+        hints->domain_attr->control_progress = FI_PROGRESS_MANUAL;
+        hints->domain_attr->data_progress = FI_PROGRESS_MANUAL;
+
+        // Authentication is needed
+        // TODO: the first ID in SLINGSHOT_SVC_IDS is chosen, but we should
+        // rather choose the one corresponding with the FABRIC_IFACE
+        // example:
+        // SLINGSHOT_SVC_IDS=5,5,5,5
+        // SLINGSHOT_VNIS=1310,1271
+        // SLINGSHOT_DEVICES=cxi0,cxi1,cxi2,cxi3
+        // FABRIC_IFACE=cxi2 (user specified)
+
+        hints->ep_attr->auth_key = malloc(sizeof(struct cxi_auth_key));
+        memcpy(hints->ep_attr->auth_key, fabric->cxi_auth_key, sizeof(struct cxi_auth_key));
+        hints->ep_attr->auth_key_size = sizeof(struct cxi_auth_key);
+
+        hints->domain_attr->auth_key = malloc(sizeof(struct cxi_auth_key));
+        memcpy(hints->domain_attr->auth_key, fabric->cxi_auth_key, sizeof(struct cxi_auth_key));
+        hints->domain_attr->auth_key_size = sizeof(struct cxi_auth_key);
     }
     else
     {
-        ifname = getenv("FABRIC_IFACE");
+        fi_version = FI_VERSION(1, 5);
+
+        hints->domain_attr->mr_mode = FI_MR_BASIC;
+        hints->domain_attr->control_progress = FI_PROGRESS_AUTO;
+        hints->domain_attr->data_progress = FI_PROGRESS_AUTO;
+    }
+#else
+    fi_version = FI_VERSION(1, 5);
+
+    // Alternatively, one could set mr_mode to
+    // FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY | FI_MR_LOCAL
+    // here. These flags are equivalent to FI_MR_BASIC, but unlike basic
+    // registration, providers are not forced to keep those flags when they
+    // think that not using the flags is better.
+    // The RDMA DP is able to deal with this appropriately, and does so right
+    // before calling fi_fabric() further below in this function.
+    // The main reason for keeping FI_MR_BASIC here is backward compatibility.
+    hints->domain_attr->mr_mode = FI_MR_BASIC;
+    hints->domain_attr->control_progress = FI_PROGRESS_AUTO;
+    hints->domain_attr->data_progress = FI_PROGRESS_AUTO;
+#endif
+
+    /*
+     * ifname is passed as a function parameter of init_fabric() if
+     * a provider-specific key was configured and sent to the reader.
+     * Since the key is generally domain-specific, we must use that one in this
+     * case.
+     * The preferred domain is already considered upon key configuration,
+     * so this is fine.
+     */
+    if (!ifname)
+    {
+        ifname = get_preferred_domain(Params);
     }
 
     fabric->info = NULL;
 
     pthread_mutex_lock(&fabric_mutex);
-    fi_getinfo(FI_VERSION(1, 5), NULL, NULL, 0, hints, &info);
+    fi_getinfo(fi_version, NULL, NULL, 0, hints, &info);
     pthread_mutex_unlock(&fabric_mutex);
     if (!info)
     {
@@ -167,7 +301,8 @@ static void init_fabric(struct fabric_state *fabric, struct _SstParams *Params,
             break;
         }
         if ((((strcmp(prov_name, "verbs") == 0) && info->src_addr) ||
-             (strcmp(prov_name, "gni") == 0) || (strcmp(prov_name, "psm2") == 0)) &&
+             (strcmp(prov_name, "gni") == 0) || (strcmp(prov_name, "psm2") == 0) ||
+             (strcmp(prov_name, "cxi") == 0)) &&
             (!useinfo || !ifname || (strcmp(useinfo->domain_attr->name, ifname) != 0)))
         {
             Svcs->verbose(CP_Stream, DPTraceVerbose,
@@ -177,7 +312,7 @@ static void init_fabric(struct fabric_state *fabric, struct _SstParams *Params,
             useinfo = info;
         }
         else if (((strstr(prov_name, "verbs") && info->src_addr) || strstr(prov_name, "gni") ||
-                  strstr(prov_name, "psm2")) &&
+                  strstr(prov_name, "psm2") || strstr(prov_name, "cxi")) &&
                  !useinfo)
         {
             Svcs->verbose(CP_Stream, DPTraceVerbose,
@@ -253,7 +388,30 @@ static void init_fabric(struct fabric_state *fabric, struct _SstParams *Params,
 
     fabric->addr_len = info->src_addrlen;
 
-    info->domain_attr->mr_mode = FI_MR_BASIC;
+    /*
+     * The libfabric data-plane of SST was originally programmed to use
+     * FI_MR_BASIC as mr_mode, which is equivalent to
+     * FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY | FI_MR_LOCAL.
+     *
+     * However, HPE's CXI provider requires two changes to that:
+     * (1) It does not support FI_MR_VIRT_ADDR.
+     * (2) It requires use of FI_MR_ENDPOINT.
+     *
+     * So we propagate the bit value currently contained in the mr_mode
+     * for these flags.
+     */
+    if (info->domain_attr->mr_mode != FI_MR_BASIC)
+    {
+        info->domain_attr->mr_mode = FI_MR_ALLOCATED | FI_MR_PROV_KEY | FI_MR_LOCAL |
+                                     (FI_MR_ENDPOINT & info->domain_attr->mr_mode) |
+                                     (FI_MR_VIRT_ADDR & info->domain_attr->mr_mode);
+        fabric->mr_virt_addr = info->domain_attr->mr_mode & FI_MR_VIRT_ADDR ? 1 : 0;
+    }
+    else
+    {
+        fabric->mr_virt_addr = 1;
+    }
+
 #ifdef SST_HAVE_CRAY_DRC
     if (strstr(info->fabric_attr->prov_name, "gni") && fabric->auth_key)
     {
@@ -411,6 +569,12 @@ static void fini_fabric(struct fabric_state *fabric, CP_Services Svcs, void *CP_
         free(fabric->ctx);
     }
 
+#ifdef SST_HAVE_CRAY_CXI
+    if (fabric->cxi_auth_key)
+    {
+        free(fabric->cxi_auth_key);
+    }
+#endif
 #ifdef SST_HAVE_CRAY_DRC
     if (Fabric->auth_key)
     {
@@ -599,6 +763,191 @@ static TimestepList GetStep(Rdma_WS_Stream Stream, long Timestep)
     return (Step);
 }
 
+#ifdef SST_HAVE_CRAY_CXI
+static int get_cxi_auth_key_from_env(CP_Services Svcs, void *CP_Stream, struct _SstParams *Params,
+                                     struct cxi_auth_key *key, char **used_device)
+{
+    int vni, first_vni, second_vni, svc_id;
+
+    // Just some safety against faulty strings in string processing.
+    size_t const no_infinite_loops = 10000;
+
+    // struct cxi_auth_key {
+    //     /* The CXI service assigned to the Domain and Endpoints. A CXI
+    //     service
+    //     * is associated with a set of local resource limits, VNIs, and
+    //     Traffic
+    //     * Classes.
+    //     *
+    //     * The svc_id used by an OFI Domain must match all Endpoints belonging
+    //     * to the Domain.
+    //     */
+    //     uint32_t svc_id;
+
+    //     /* The Virtual Network ID (VNI) assigned to the Endpoint. Two
+    //     Endpoints
+    //     * must use the same VNI in order to communicate.
+    //     *
+    //     * Note that while the CXI service may define one or more VNIs which a
+    //     * process can access, an Endpoint is assigned to only one.
+    //     */
+    //     uint16_t vni;
+    // };
+
+    // typical value SLINGSHOT_DEVICES=cxi0,cxi1,cxi2,cxi3
+    char const *slingshot_devices = getenv("SLINGSHOT_DEVICES");
+    char const *preferred_device = get_preferred_domain(Params);
+
+    /*
+     * In the following loop, find out if the preferred_device is found within
+     * the slingshot_devices.
+     * If the preferred_device is NULL, just pick the first.
+     * Upon success, modifies the output parameter used_device and stores
+     * the retrieved device index.
+     */
+    size_t device_index = 0;
+    for (size_t no_infinite_loop_counter = 0;; ++device_index, ++no_infinite_loop_counter)
+    {
+        if (no_infinite_loop_counter > no_infinite_loops)
+        {
+            return EXIT_FAILURE;
+        }
+
+        // Are we at the end of the environment variable?
+        int found_end = 0;
+
+        // Find out the length of the current item in slingshot_devices.
+        size_t find_end_of_current_string = 0;
+        for (size_t no_infinite_loop_inner_counter = 0;;
+             ++find_end_of_current_string, ++no_infinite_loop_inner_counter)
+        {
+            if (no_infinite_loop_inner_counter > no_infinite_loops)
+            {
+                return EXIT_FAILURE;
+            }
+
+            switch (slingshot_devices[find_end_of_current_string])
+            {
+            case '\0':
+                found_end = 1;
+                goto break_first_loop;
+            case ',':
+                goto break_first_loop;
+            default:
+                break;
+            }
+        }
+    break_first_loop:;
+        int use_this_device = !preferred_device || (strncmp(preferred_device, slingshot_devices,
+                                                            find_end_of_current_string) == 0);
+        if (use_this_device)
+        {
+            char *construct_used_device = malloc(find_end_of_current_string + 1);
+            memcpy(construct_used_device, slingshot_devices, find_end_of_current_string);
+            construct_used_device[find_end_of_current_string] = '\0';
+            *used_device = construct_used_device;
+            break;
+        }
+        else if (found_end)
+        {
+            return EXIT_FAILURE;
+        }
+        else
+        {
+            // go to next iteration
+            slingshot_devices += find_end_of_current_string + 1;
+        }
+    }
+
+    Svcs->verbose(CP_Stream, DPTraceVerbose, "Found device %s at index %zu\n", *used_device,
+                  device_index);
+
+    // typical value SLINGSHOT_VNIS=4576,4530
+    char const *vni_env_str = getenv("SLINGSHOT_VNIS");
+    if (!vni_env_str)
+    {
+        return EXIT_FAILURE;
+    }
+
+    // typical value SLINGSHOT_SVC_IDS=5,5,5,5
+    char const *svc_ids_env_str = getenv("SLINGSHOT_SVC_IDS");
+    if (!svc_ids_env_str)
+    {
+        return EXIT_FAILURE;
+    }
+
+    {
+        int num_vnis = sscanf(vni_env_str, "%d,%d", &first_vni, &second_vni);
+        switch (num_vnis)
+        {
+        // first VNI is the subjob's VNI
+        case 1:
+            Svcs->verbose(CP_Stream, DPTraceVerbose, "Using first vni.\n");
+            vni = first_vni;
+            break;
+        // if present, the second VNI is the containing job's VNI
+        // the first VNI belongs to the subjob
+        case 2:
+            Svcs->verbose(CP_Stream, DPTraceVerbose, "Using second vni.\n");
+            vni = second_vni;
+            break;
+        default:
+            return EXIT_FAILURE;
+        }
+    }
+
+    {
+        // Pick the service ID according to the device_index found above.
+        for (size_t svc_id_index = 0; svc_id_index < device_index; ++svc_id_index)
+        {
+            for (size_t no_infinite_loop_counter = 0;; ++no_infinite_loop_counter)
+            {
+                if (no_infinite_loop_counter > no_infinite_loops)
+                {
+                    return EXIT_FAILURE;
+                }
+
+                switch (*(svc_ids_env_str++))
+                {
+                case ',':
+                    goto break_second_loop;
+                case '\0':
+                    return EXIT_FAILURE;
+                default:
+                    continue;
+                }
+            }
+        break_second_loop:;
+        }
+
+        int num_svc_ids = sscanf(svc_ids_env_str, "%d", &svc_id);
+        switch (num_svc_ids)
+        {
+        case 1:
+            break;
+        default:
+            return EXIT_FAILURE;
+        }
+    }
+
+    key->vni = vni;
+    key->svc_id = svc_id;
+
+    return EXIT_SUCCESS;
+}
+
+static int get_cxi_auth_key_from_writer(struct cxi_auth_key *key, attr_list WriterContact)
+{
+    long vni;
+    if (!get_long_attr(WriterContact, attr_atom_from_string("vni"), &vni))
+    {
+        return EXIT_FAILURE;
+    }
+    key->vni = (uint16_t)vni;
+    return EXIT_SUCCESS;
+}
+#endif
+
 static DP_RS_Stream RdmaInitReader(CP_Services Svcs, void *CP_Stream, void **ReaderContactInfoPtr,
                                    struct _SstParams *Params, attr_list WriterContact,
                                    SstStats Stats)
@@ -645,6 +994,38 @@ static DP_RS_Stream RdmaInitReader(CP_Services Svcs, void *CP_Stream, void **Rea
         Stream->PreloadAvail = 0;
     }
 
+    char *required_device = NULL;
+#ifdef SST_HAVE_CRAY_CXI
+    struct
+    {
+        struct cxi_auth_key key;
+        int valid;
+    } tagged_key;
+
+    /*
+     * The svc_id of the key must match the device that this particular reader
+     * connects with.
+     * The vni (virtual network ID) must be the same across all communicating
+     * instances (get this from the writer).
+     */
+
+    tagged_key.valid =
+        get_cxi_auth_key_from_env(Svcs, CP_Stream, Params, &tagged_key.key, &required_device);
+
+    if (tagged_key.valid == EXIT_SUCCESS &&
+        get_cxi_auth_key_from_writer(&tagged_key.key, WriterContact) == EXIT_SUCCESS)
+    {
+        Svcs->verbose(CP_Stream, DPSummaryVerbose, "Reader found CXI auth key: %d %d\n",
+                      tagged_key.key.vni, tagged_key.key.svc_id);
+        Stream->Fabric->cxi_auth_key = calloc(1, sizeof(struct cxi_auth_key));
+        memcpy(Stream->Fabric->cxi_auth_key, &tagged_key.key, sizeof(struct cxi_auth_key));
+    }
+    else
+    {
+        Svcs->verbose(CP_Stream, DPSummaryVerbose, "Reader found no CXI auth key\n");
+    }
+#endif
+
 #ifdef SST_HAVE_CRAY_DRC
     int attr_cred, try_left, rc;
     if (!get_int_attr(WriterContact, attr_atom_from_string("RDMA_DRC_KEY"), &attr_cred))
@@ -675,7 +1056,11 @@ static DP_RS_Stream RdmaInitReader(CP_Services Svcs, void *CP_Stream, void **Rea
 
 #endif /* SST_HAVE_CRAY_DRC */
 
-    init_fabric(Stream->Fabric, Stream->Params, Svcs, CP_Stream);
+    init_fabric(Stream->Fabric, Stream->Params, Svcs, CP_Stream, required_device);
+    if (required_device)
+    {
+        free(required_device);
+    }
     if (!Fabric->info)
     {
         Svcs->verbose(CP_Stream, DPCriticalVerbose, "Could not find a valid transport fabric.\n");
@@ -684,7 +1069,12 @@ static DP_RS_Stream RdmaInitReader(CP_Services Svcs, void *CP_Stream, void **Rea
 
     ContactInfo->Length = Fabric->info->src_addrlen;
     ContactInfo->Address = malloc(ContactInfo->Length);
-    fi_getname((fid_t)Fabric->signal, ContactInfo->Address, &ContactInfo->Length);
+    if (guard_fi_return(
+            fi_getname((fid_t)Fabric->signal, ContactInfo->Address, &ContactInfo->Length), Svcs,
+            CP_Stream, "[RdmaInitReader] fi_getname() failed with:") != FI_SUCCESS)
+    {
+        return NULL;
+    }
 
     Stream->PreloadStep = -1;
     Stream->ContactInfo = ContactInfo;
@@ -775,6 +1165,42 @@ static DP_WS_Stream RdmaInitWriter(CP_Services Svcs, void *CP_Stream, struct _Ss
 
     Stream->Fabric = calloc(1, sizeof(struct fabric_state));
     Fabric = Stream->Fabric;
+
+    char *required_device = NULL;
+#ifdef SST_HAVE_CRAY_CXI
+    struct
+    {
+        struct cxi_auth_key key;
+        int valid;
+    } tagged_key;
+
+    /*
+     * The svc_id of the key must match the device that this particular writer
+     * connects with.
+     * The vni (virtual network ID) must be the same across all communicating
+     * instances (use the one seen by rank 0).
+     */
+    tagged_key.valid =
+        get_cxi_auth_key_from_env(Svcs, CP_Stream, Params, &tagged_key.key, &required_device);
+
+    // Ensure that all writers use the same virtual network ID
+    SMPI_Bcast(&tagged_key.key.vni, sizeof(tagged_key.key.vni), SMPI_BYTE, 0, comm);
+
+    if (tagged_key.valid == EXIT_SUCCESS)
+    {
+        Svcs->verbose(CP_Stream, DPSummaryVerbose, "Writer found CXI auth key: %d %d\n",
+                      tagged_key.key.vni, tagged_key.key.svc_id);
+
+        set_long_attr(DPAttrs, attr_atom_from_string("vni"), tagged_key.key.vni);
+        Stream->Fabric->cxi_auth_key = calloc(1, sizeof(struct cxi_auth_key));
+        memcpy(Stream->Fabric->cxi_auth_key, &tagged_key.key, sizeof(struct cxi_auth_key));
+    }
+    else
+    {
+        Svcs->verbose(CP_Stream, DPSummaryVerbose, "Writer found no CXI auth key");
+    }
+#endif
+
 #ifdef SST_HAVE_CRAY_DRC
     int try_left, rc;
     if (Stream->Rank == 0)
@@ -818,7 +1244,11 @@ static DP_WS_Stream RdmaInitWriter(CP_Services Svcs, void *CP_Stream, struct _Ss
     set_long_attr(DPAttrs, attr_atom_from_string("RDMA_DRC_CRED"), attr_cred);
 #endif /* SST_HAVE_CRAY_DRC */
 
-    init_fabric(Stream->Fabric, Params, Svcs, CP_Stream);
+    init_fabric(Stream->Fabric, Params, Svcs, CP_Stream, required_device);
+    if (required_device)
+    {
+        free(required_device);
+    }
     Fabric = Stream->Fabric;
     if (!Fabric->info)
     {
@@ -872,8 +1302,15 @@ static DP_WSR_Stream RdmaInitWriterPerReader(CP_Services Svcs, DP_WS_Stream WS_S
 
     for (i = 0; i < readerCohortSize; i++)
     {
-        fi_av_insert(Fabric->av, providedReaderInfo[i]->Address, 1, &WSR_Stream->ReaderAddr[i], 0,
-                     NULL);
+        if (fi_av_insert(Fabric->av, providedReaderInfo[i]->Address, 1, &WSR_Stream->ReaderAddr[i],
+                         0, NULL) < 1)
+        {
+
+            Svcs->verbose(WS_Stream->CP_Stream, DPCriticalVerbose,
+                          "[RdmaInitWRiterPerReader] Failed inserting address "
+                          "into vector\n");
+            return NULL;
+        }
         Svcs->verbose(WS_Stream->CP_Stream, DPTraceVerbose,
                       "Received contact info for RS_Stream %p, WSR Rank %d\n",
                       providedReaderInfo[i]->RS_Stream, i);
@@ -895,13 +1332,20 @@ static DP_WSR_Stream RdmaInitWriterPerReader(CP_Services Svcs, DP_WS_Stream WS_S
 
     ContactInfo->Length = Fabric->info->src_addrlen;
     ContactInfo->Address = malloc(ContactInfo->Length);
-    fi_getname((fid_t)Fabric->signal, ContactInfo->Address, &ContactInfo->Length);
+    if (guard_fi_return(
+            fi_getname((fid_t)Fabric->signal, ContactInfo->Address, &ContactInfo->Length), Svcs,
+            WS_Stream->CP_Stream,
+            "[RdmaInitWriterPerReader] fi_getname() failed with") != FI_SUCCESS)
+    {
+        return NULL;
+    }
 
     ReaderRollHandle = &ContactInfo->ReaderRollHandle;
     ReaderRollHandle->Block = calloc(readerCohortSize, sizeof(struct _RdmaBuffer));
-    fi_mr_reg(Fabric->domain, ReaderRollHandle->Block,
-              readerCohortSize * sizeof(struct _RdmaBuffer), FI_REMOTE_WRITE, 0, 0, 0,
-              &WSR_Stream->rrmr, Fabric->ctx);
+    sst_fi_mr_reg(Svcs, WS_Stream->CP_Stream, Fabric->domain, ReaderRollHandle->Block,
+                  readerCohortSize * sizeof(struct _RdmaBuffer), FI_REMOTE_WRITE, 0, 0, 0,
+                  &WSR_Stream->rrmr, Fabric->ctx, Fabric->signal,
+                  Fabric->info->domain_attr->mr_mode);
     ReaderRollHandle->Key = fi_mr_key(WSR_Stream->rrmr);
 
     WSR_Stream->WriterContactInfo = ContactInfo;
@@ -943,8 +1387,15 @@ static void RdmaProvideWriterDataToReader(CP_Services Svcs, DP_RS_Stream RS_Stre
     for (int i = 0; i < writerCohortSize; i++)
     {
         RS_Stream->WriterContactInfo[i].WS_Stream = providedWriterInfo[i]->WS_Stream;
-        fi_av_insert(Fabric->av, providedWriterInfo[i]->Address, 1, &RS_Stream->WriterAddr[i], 0,
-                     NULL);
+        if (fi_av_insert(Fabric->av, providedWriterInfo[i]->Address, 1, &RS_Stream->WriterAddr[i],
+                         0, NULL) < 1)
+        {
+            Svcs->verbose(RS_Stream->CP_Stream, DPCriticalVerbose,
+                          "[RdmaProvideWriterDataToReader] "
+                          "Failed inserting address "
+                          "into vector\n");
+            return;
+        }
         RS_Stream->WriterRoll[i] = providedWriterInfo[i]->ReaderRollHandle;
         Svcs->verbose(RS_Stream->CP_Stream, DPTraceVerbose,
                       "Received contact info for WS_stream %p, WSR Rank %d\n",
@@ -1032,11 +1483,21 @@ static ssize_t PostRead(CP_Services Svcs, Rdma_RS_Stream RS_Stream, int Rank, lo
     if (Fabric->local_mr_req)
     {
         // register dest buffer
-        fi_mr_reg(Fabric->domain, Buffer, Length, FI_READ, 0, 0, 0, &ret->LocalMR, Fabric->ctx);
+        sst_fi_mr_reg(Svcs, RS_Stream->CP_Stream, Fabric->domain, Buffer, Length, FI_READ, 0, 0, 0,
+                      &ret->LocalMR, Fabric->ctx, Fabric->signal,
+                      Fabric->info->domain_attr->mr_mode);
         LocalDesc = fi_mr_desc(ret->LocalMR);
     }
 
-    Addr = Info->Block + Offset;
+    if (Fabric->mr_virt_addr)
+    {
+        Addr = Info->Block + Offset;
+    }
+    else
+    {
+        Addr = NULL;
+        Addr += Offset;
+    }
 
     Svcs->verbose(RS_Stream->CP_Stream, DPTraceVerbose,
                   "Remote read target is Rank %d (Offset = %zi, Length = %zi)\n", Rank, Offset,
@@ -1203,8 +1664,14 @@ static int DoPushWait(CP_Services Svcs, Rdma_RS_Stream Stream, RdmaCompletionHan
         rc = fi_cq_sread(Fabric->cq_signal, (void *)(&CQEntry), 1, NULL, -1);
         if (rc < 1)
         {
-            Svcs->verbose(Stream->CP_Stream, DPCriticalVerbose,
-                          "failure while waiting for completions (%d).\n", rc);
+            struct fi_cq_err_entry error;
+            fi_cq_readerr(Fabric->cq_signal, &error, 0);
+            Svcs->verbose(
+                Stream->CP_Stream, DPCriticalVerbose,
+                "failure while waiting for completions inside "
+                "DoPushWait() (%d (%s - %s)).\n",
+                rc, fi_strerror(error.err),
+                fi_cq_strerror(Fabric->cq_signal, error.err, error.err_data, NULL, error.len));
             return 0;
         }
         else if (CQEntry.flags & FI_REMOTE_CQ_DATA)
@@ -1276,8 +1743,14 @@ static int WaitForAnyPull(CP_Services Svcs, Rdma_RS_Stream Stream)
     rc = fi_cq_sread(Fabric->cq_signal, (void *)(&CQEntry), 1, NULL, -1);
     if (rc < 1)
     {
-        Svcs->verbose(Stream->CP_Stream, DPCriticalVerbose,
-                      "failure while waiting for completions (%d).\n", rc);
+        struct fi_cq_err_entry error;
+        fi_cq_readerr(Fabric->cq_signal, &error, 0);
+        Svcs->verbose(
+            Stream->CP_Stream, DPCriticalVerbose,
+            "failure while waiting for completions inside "
+            "WaitForAnyPull() (%d (%s - %s)).\n",
+            rc, fi_strerror(error.err),
+            fi_cq_strerror(Fabric->cq_signal, error.err, error.err_data, NULL, error.len));
         return 0;
     }
     else
@@ -1345,8 +1818,9 @@ static void RdmaProvideTimestep(CP_Services Svcs, DP_WS_Stream Stream_v, struct
     Entry->BufferSlot = -1;
     Entry->Desc = NULL;
 
-    fi_mr_reg(Fabric->domain, Data->block, Data->DataSize, FI_WRITE | FI_REMOTE_READ, 0, 0, 0,
-              &Entry->mr, Fabric->ctx);
+    sst_fi_mr_reg(Svcs, Stream->CP_Stream, Fabric->domain, Data->block, Data->DataSize,
+                  FI_WRITE | FI_REMOTE_READ, 0, 0, 0, &Entry->mr, Fabric->ctx, Fabric->signal,
+                  Fabric->info->domain_attr->mr_mode);
     Entry->Key = fi_mr_key(Entry->mr);
     if (Fabric->local_mr_req)
     {
@@ -1608,7 +2082,7 @@ static struct _CP_DP_Interface RdmaDPInterface = {0};
 static int RdmaGetPriority(CP_Services Svcs, void *CP_Stream, struct _SstParams *Params)
 {
     struct fi_info *hints, *info, *originfo;
-    char *ifname;
+    char const *ifname;
     char *forkunsafe;
     int Ret = -1;
 
@@ -1617,20 +2091,41 @@ static int RdmaGetPriority(CP_Services Svcs, void *CP_Stream, struct _SstParams
         FI_MSG | FI_SEND | FI_RECV | FI_REMOTE_READ | FI_REMOTE_WRITE | FI_RMA | FI_READ | FI_WRITE;
     hints->mode =
         FI_CONTEXT | FI_LOCAL_MR | FI_CONTEXT2 | FI_MSG_PREFIX | FI_ASYNC_IOV | FI_RX_CQ_DATA;
-    hints->domain_attr->mr_mode = FI_MR_BASIC;
-    hints->domain_attr->control_progress = FI_PROGRESS_AUTO;
-    hints->domain_attr->data_progress = FI_PROGRESS_AUTO;
     hints->ep_attr->type = FI_EP_RDM;
 
-    if (Params->DataInterface)
+    char const *vni_env_str = getenv("SLINGSHOT_VNIS");
+
+    uint32_t fi_version;
+    if (vni_env_str)
     {
-        ifname = Params->DataInterface;
+        // try fishing for the CXI provider
+        Svcs->verbose(CP_Stream, DPSummaryVerbose,
+                      "RDMA Dataplane trying to check for an available CXI "
+                      "provider since environment variable SLINGSHOT_VNIS is "
+                      "defined (value: '%s').\n",
+                      vni_env_str);
+        fi_version = FI_VERSION(1, 11);
+
+        hints->domain_attr->mr_mode = FI_MR_ENDPOINT;
+        hints->domain_attr->control_progress = FI_PROGRESS_MANUAL;
+        hints->domain_attr->data_progress = FI_PROGRESS_MANUAL;
     }
     else
     {
-        ifname = getenv("FABRIC_IFACE");
+        Svcs->verbose(CP_Stream, DPSummaryVerbose,
+                      "RDMA Dataplane trying to check for an available non-CXI "
+                      "provider since environment variable SLINGSHOT_VNIS is "
+                      "not defined.\n");
+
+        fi_version = FI_VERSION(1, 5);
+
+        hints->domain_attr->mr_mode = FI_MR_BASIC;
+        hints->domain_attr->control_progress = FI_PROGRESS_AUTO;
+        hints->domain_attr->data_progress = FI_PROGRESS_AUTO;
     }
 
+    ifname = get_preferred_domain(Params);
+
     forkunsafe = getenv("FI_FORK_UNSAFE");
     if (!forkunsafe)
     {
@@ -1638,7 +2133,7 @@ static int RdmaGetPriority(CP_Services Svcs, void *CP_Stream, struct _SstParams
     }
 
     pthread_mutex_lock(&fabric_mutex);
-    fi_getinfo(FI_VERSION(1, 5), NULL, NULL, 0, hints, &info);
+    fi_getinfo(fi_version, NULL, NULL, 0, hints, &info);
     pthread_mutex_unlock(&fabric_mutex);
     fi_freeinfo(hints);
 
@@ -1656,6 +2151,10 @@ static int RdmaGetPriority(CP_Services Svcs, void *CP_Stream, struct _SstParams
 
         prov_name = info->fabric_attr->prov_name;
         domain_name = info->domain_attr->name;
+        Svcs->verbose(CP_Stream, DPPerStepVerbose,
+                      "[RdmaGetPriority] Seeing and evaluating fabric with "
+                      "provider: '%s', domain: '%s'\n",
+                      prov_name, domain_name);
         if (ifname && strcmp(ifname, domain_name) == 0)
         {
             Svcs->verbose(CP_Stream, DPPerStepVerbose,
@@ -1666,7 +2165,7 @@ static int RdmaGetPriority(CP_Services Svcs, void *CP_Stream, struct _SstParams
             break;
         }
         if ((strstr(prov_name, "verbs") && info->src_addr) || strstr(prov_name, "gni") ||
-            strstr(prov_name, "psm2"))
+            strstr(prov_name, "psm2") || strstr(prov_name, "cxi"))
         {
 
             Svcs->verbose(CP_Stream, DPPerStepVerbose,
@@ -1733,7 +2232,12 @@ static void PushData(CP_Services Svcs, Rdma_WSR_Stream Stream, TimestepList Step
             {
                 rc = fi_writedata(Fabric->signal, StepBuffer + Req->Offset, Req->BufferLen,
                                   Step->Desc, Data, Stream->ReaderAddr[RankReq->Rank],
-                                  (uint64_t)Req->Handle.Block +
+                                  /*
+                                   * If mr_virt_addr is zero, we need just the offset,
+                                   * otherwise we need the remote virtual address composed by
+                                   * base pointer + offset.
+                                   */
+                                  Fabric->mr_virt_addr * (uint64_t)Req->Handle.Block +
                                       (BufferSlot * RankReq->PreloadBufferSize),
                                   RollBuffer->Offset, (void *)(Step->Timestep));
             } while (rc == -EAGAIN);
@@ -1822,15 +2326,17 @@ static void PostPreload(CP_Services Svcs, Rdma_RS_Stream Stream, long Timestep)
 
     PreloadBuffer->BufferLen = 2 * StepLog->BufferSize;
     PreloadBuffer->Handle.Block = malloc(PreloadBuffer->BufferLen);
-    fi_mr_reg(Fabric->domain, PreloadBuffer->Handle.Block, PreloadBuffer->BufferLen,
-              FI_REMOTE_WRITE, 0, 0, 0, &Stream->pbmr, Fabric->ctx);
+    sst_fi_mr_reg(Svcs, Stream->CP_Stream, Fabric->domain, PreloadBuffer->Handle.Block,
+                  PreloadBuffer->BufferLen, FI_REMOTE_WRITE, 0, 0, 0, &Stream->pbmr, Fabric->ctx,
+                  Fabric->signal, Fabric->info->domain_attr->mr_mode);
     PreloadKey = fi_mr_key(Stream->pbmr);
 
     SBSize = sizeof(*SendBuffer) * StepLog->WRanks;
     SendBuffer = malloc(SBSize);
     if (Fabric->local_mr_req)
     {
-        fi_mr_reg(Fabric->domain, SendBuffer, SBSize, FI_WRITE, 0, 0, 0, &sbmr, Fabric->ctx);
+        sst_fi_mr_reg(Svcs, Stream->CP_Stream, Fabric->domain, SendBuffer, SBSize, FI_WRITE, 0, 0,
+                      0, &sbmr, Fabric->ctx, Fabric->signal, Fabric->info->domain_attr->mr_mode);
         sbdesc = fi_mr_desc(sbmr);
     }
 
@@ -1838,8 +2344,9 @@ static void PostPreload(CP_Services Svcs, Rdma_RS_Stream Stream, long Timestep)
     {
         RBLen = 2 * StepLog->Entries * DP_DATA_RECV_SIZE;
         Stream->RecvDataBuffer = malloc(RBLen);
-        fi_mr_reg(Fabric->domain, Stream->RecvDataBuffer, RBLen, FI_RECV, 0, 0, 0, &Stream->rbmr,
-                  Fabric->ctx);
+        sst_fi_mr_reg(Svcs, Stream->CP_Stream, Fabric->domain, Stream->RecvDataBuffer, RBLen,
+                      FI_RECV, 0, 0, 0, &Stream->rbmr, Fabric->ctx, Fabric->signal,
+                      Fabric->info->domain_attr->mr_mode);
         Stream->rbdesc = fi_mr_desc(Stream->rbmr);
         RecvBuffer = (uint8_t *)Stream->RecvDataBuffer;
         for (i = 0; i < 2 * StepLog->Entries; i++)
@@ -1862,9 +2369,10 @@ static void PostPreload(CP_Services Svcs, Rdma_RS_Stream Stream, long Timestep)
         if (RankLog->Entries > 0)
         {
             RankLog->Buffer = (void *)RawPLBuffer;
-            fi_mr_reg(Fabric->domain, RankLog->ReqLog,
-                      (sizeof(struct _RdmaBuffer) * RankLog->Entries) + sizeof(uint64_t),
-                      FI_REMOTE_READ, 0, 0, 0, &RankLog->preqbmr, Fabric->ctx);
+            sst_fi_mr_reg(Svcs, Stream->CP_Stream, Fabric->domain, RankLog->ReqLog,
+                          (sizeof(struct _RdmaBuffer) * RankLog->Entries) + sizeof(uint64_t),
+                          FI_REMOTE_READ, 0, 0, 0, &RankLog->preqbmr, Fabric->ctx, Fabric->signal,
+                          Fabric->info->domain_attr->mr_mode);
             for (j = 0; j < RankLog->Entries; j++)
             {
                 ReqLog = &RankLog->ReqLog[j];
@@ -1883,11 +2391,17 @@ static void PostPreload(CP_Services Svcs, Rdma_RS_Stream Stream, long Timestep)
             SendBuffer[WRidx].Offset = (uint64_t)PreloadKey;
             SendBuffer[WRidx].Handle.Block = (void *)RankLog->ReqLog;
             SendBuffer[WRidx].Handle.Key = fi_mr_key(RankLog->preqbmr);
-            RollDest =
-                (uint64_t)Stream->WriterRoll[i].Block + (sizeof(struct _RdmaBuffer) * Stream->Rank);
-            fi_write(Fabric->signal, &SendBuffer[WRidx], sizeof(struct _RdmaBuffer), sbdesc,
-                     Stream->WriterAddr[i], RollDest, Stream->WriterRoll[i].Key,
-                     &SendBuffer[WRidx]);
+            /*
+             * If mr_virt_addr is zero, we need just the offset,
+             * otherwise we need the remote virtual address composed by
+             * base pointer + offset.
+             */
+            RollDest = Fabric->mr_virt_addr * (uint64_t)Stream->WriterRoll[i].Block +
+                       (sizeof(struct _RdmaBuffer) * Stream->Rank);
+            guard_fi_return((int)fi_write(Fabric->signal, &SendBuffer[WRidx],
+                                          sizeof(struct _RdmaBuffer), sbdesc, Stream->WriterAddr[i],
+                                          RollDest, Stream->WriterRoll[i].Key, &SendBuffer[WRidx]),
+                            Svcs, Stream->CP_Stream, "[PostPreload] fi_write failed with:");
             RankLog->PreloadHandles = malloc(sizeof(void *) * 2);
             RankLog->PreloadHandles[0] =
                 calloc(sizeof(struct _RdmaCompletionHandle), RankLog->Entries);
@@ -1899,7 +2413,19 @@ static void PostPreload(CP_Services Svcs, Rdma_RS_Stream Stream, long Timestep)
 
     while (WRidx > 0)
     {
-        fi_cq_sread(Fabric->cq_signal, (void *)(&CQEntry), 1, NULL, -1);
+        ssize_t rc = fi_cq_sread(Fabric->cq_signal, (void *)(&CQEntry), 1, NULL, -1);
+        if (rc < 1)
+        {
+            struct fi_cq_err_entry error;
+            fi_cq_readerr(Fabric->cq_signal, &error, 0);
+            Svcs->verbose(
+                Stream->CP_Stream, DPCriticalVerbose,
+                "[PostPreload] failure while waiting for completions "
+                "(%d (%s - %s)).\n",
+                rc, fi_strerror(error.err),
+                fi_cq_strerror(Fabric->cq_signal, error.err, error.err_data, NULL, error.len));
+            return;
+        }
         CQBuffer = CQEntry.op_context;
         if (CQBuffer >= SendBuffer && CQBuffer < (SendBuffer + StepLog->WRanks))
         {
@@ -2000,24 +2526,45 @@ static void PullSelection(CP_Services Svcs, Rdma_WSR_Stream Stream)
     ReqBuffer.Handle.Block = ReadBuffer = malloc(ReqBuffer.BufferLen);
     if (Fabric->local_mr_req)
     {
-        fi_mr_reg(Fabric->domain, ReqBuffer.Handle.Block, ReqBuffer.BufferLen, FI_READ, 0, 0, 0,
-                  &rrmr, Fabric->ctx);
+        sst_fi_mr_reg(Svcs, WS_Stream->CP_Stream, Fabric->domain, ReqBuffer.Handle.Block,
+                      ReqBuffer.BufferLen, FI_READ, 0, 0, 0, &rrmr, Fabric->ctx, Fabric->signal,
+                      Fabric->info->domain_attr->mr_mode);
         rrdesc = fi_mr_desc(rrmr);
     }
 
     for (RankReq = Stream->PreloadReq; RankReq; RankReq = RankReq->next)
     {
         RankReq->ReqLog = (RdmaBuffer)ReadBuffer;
-        fi_read(Fabric->signal, RankReq->ReqLog, RankReq->BufferSize, rrdesc,
-                Stream->ReaderAddr[RankReq->Rank], (uint64_t)ReaderRoll[RankReq->Rank].Handle.Block,
-                ReaderRoll[RankReq->Rank].Handle.Key, RankReq);
+        guard_fi_return(
+            (int)fi_read(Fabric->signal, RankReq->ReqLog, RankReq->BufferSize, rrdesc,
+                         Stream->ReaderAddr[RankReq->Rank],
+                         /*
+                          * If mr_virt_addr is 0, then this is a simple
+                          * null-pointer, indicating no offset. Otherwise, we
+                          * need the remote virtual memory read address.
+                          */
+                         Fabric->mr_virt_addr * (uint64_t)ReaderRoll[RankReq->Rank].Handle.Block,
+                         ReaderRoll[RankReq->Rank].Handle.Key, RankReq),
+            Svcs, WS_Stream->CP_Stream, "[PullSelection] fi_read() failed with:");
         ReadBuffer += RankReq->BufferSize;
     }
 
     RankReq = Stream->PreloadReq;
     while (RankReq)
     {
-        fi_cq_sread(Fabric->cq_signal, (void *)(&CQEntry), 1, NULL, -1);
+        ssize_t rc = fi_cq_sread(Fabric->cq_signal, (void *)(&CQEntry), 1, NULL, -1);
+        if (rc < 1)
+        {
+            struct fi_cq_err_entry error;
+            fi_cq_readerr(Fabric->cq_signal, &error, 0);
+            Svcs->verbose(
+                WS_Stream->CP_Stream, DPCriticalVerbose,
+                "[PullSelection] failure while waiting for completions "
+                "(%d (%s - %s)).\n",
+                rc, fi_strerror(error.err),
+                fi_cq_strerror(Fabric->cq_signal, error.err, error.err_data, NULL, error.len));
+            return;
+        }
         CQRankReq = CQEntry.op_context;
         if (CQEntry.flags & FI_READ)
         {
@@ -2049,7 +2596,19 @@ static void CompletePush(CP_Services Svcs, Rdma_WSR_Stream Stream, TimestepList
 
     while (Step->OutstandingWrites > 0)
     {
-        fi_cq_sread(Fabric->cq_signal, (void *)(&CQEntry), 1, NULL, -1);
+        ssize_t rc = fi_cq_sread(Fabric->cq_signal, (void *)(&CQEntry), 1, NULL, -1);
+        if (rc < 1)
+        {
+            struct fi_cq_err_entry error;
+            fi_cq_readerr(Fabric->cq_signal, &error, 0);
+            Svcs->verbose(
+                WS_Stream->CP_Stream, DPCriticalVerbose,
+                "[CompletePush] failure while waiting for completions "
+                "(%d (%s - %s)).\n",
+                rc, fi_strerror(error.err),
+                fi_cq_strerror(Fabric->cq_signal, error.err, error.err_data, NULL, error.len));
+            return;
+        }
         if (CQEntry.flags & FI_WRITE)
         {
             CQTimestep = (long)CQEntry.op_context;
diff --git a/testing/adios2/CMakeLists.txt b/testing/adios2/CMakeLists.txt
index bddb8485f4..dc1ede6bca 100644
--- a/testing/adios2/CMakeLists.txt
+++ b/testing/adios2/CMakeLists.txt
@@ -13,3 +13,6 @@ add_subdirectory(performance)
 add_subdirectory(helper)
 add_subdirectory(hierarchy)
 add_subdirectory(backward_compatibility)
+if (ADIOS2_HAVE_Derived_Variable)
+add_subdirectory(derived)
+endif()
diff --git a/testing/adios2/derived/CMakeLists.txt b/testing/adios2/derived/CMakeLists.txt
new file mode 100644
index 0000000000..2df2938853
--- /dev/null
+++ b/testing/adios2/derived/CMakeLists.txt
@@ -0,0 +1,6 @@
+#------------------------------------------------------------------------------#
+#Distributed under the OSI - approved Apache License, Version 2.0. See
+#accompanying file Copyright.txt for details.
+#------------------------------------------------------------------------------#
+
+gtest_add_tests_helper(DerivedCorrectness MPI_ALLOW BP Derived. "")
diff --git a/testing/adios2/derived/TestBPDerivedCorrectness.cpp b/testing/adios2/derived/TestBPDerivedCorrectness.cpp
new file mode 100644
index 0000000000..002015b843
--- /dev/null
+++ b/testing/adios2/derived/TestBPDerivedCorrectness.cpp
@@ -0,0 +1,183 @@
+#include <cstdint>
+#include <cstring>
+
+#include <cmath>
+#include <iostream>
+#include <numeric>
+#include <random>
+#include <stdexcept>
+#include <vector>
+
+#include <adios2.h>
+#include <gtest/gtest.h>
+
+TEST(DerivedCorrectness, AddCorrectnessTest)
+{
+    const size_t Nx = 10, Ny = 3, Nz = 6;
+    const size_t steps = 2;
+    /** Application variable */
+    std::default_random_engine generator;
+    std::uniform_real_distribution<float> distribution(0.0, 10.0);
+
+    std::vector<float> simArray1(Nx * Ny * Nz);
+    std::vector<float> simArray2(Nx * Ny * Nz);
+    std::vector<float> simArray3(Nx * Ny * Nz);
+    for (size_t i = 0; i < Nx * Ny * Nz; ++i)
+    {
+        simArray1[i] = distribution(generator);
+        simArray2[i] = distribution(generator);
+        simArray3[i] = distribution(generator);
+    }
+
+    adios2::ADIOS adios;
+
+    adios2::IO bpOut = adios.DeclareIO("BPWriteAddExpression");
+
+    std::vector<std::string> varname = {"sim1/Ux", "sim1/Uy", "sim1/Uz"};
+    std::string derivedname = "derived/addU";
+
+    std::cout << "Define Variable " << varname[0] << std::endl;
+    auto Ux = bpOut.DefineVariable<float>(varname[0], {Nx, Ny, Nz}, {0, 0, 0}, {Nx, Ny, Nz});
+    std::cout << "Define Variable " << varname[1] << std::endl;
+    auto Uy = bpOut.DefineVariable<float>(varname[1], {Nx, Ny, Nz}, {0, 0, 0}, {Nx, Ny, Nz});
+    std::cout << "Define Variable " << varname[2] << std::endl;
+    auto Uz = bpOut.DefineVariable<float>(varname[2], {Nx, Ny, Nz}, {0, 0, 0}, {Nx, Ny, Nz});
+    std::cout << "Define Derived Variable " << derivedname << std::endl;
+    // clang-format off
+    auto addU = bpOut.DefineDerivedVariable(derivedname,
+                                            "x:" + varname[0] + " \n"
+                                            "y:" + varname[1] + " \n"
+                                            "z:" + varname[2] + " \n"
+                                            "x+y+z",
+                                            adios2::DerivedVarType::StoreData);
+    // clang-format on
+    std::string filename = "expAdd.bp";
+    adios2::Engine bpFileWriter = bpOut.Open(filename, adios2::Mode::Write);
+
+    for (int i = 0; i < steps; i++)
+    {
+        bpFileWriter.BeginStep();
+        bpFileWriter.Put(Ux, simArray1.data());
+        bpFileWriter.Put(Uy, simArray2.data());
+        bpFileWriter.Put(Uz, simArray3.data());
+        bpFileWriter.EndStep();
+    }
+    bpFileWriter.Close();
+
+    adios2::IO bpIn = adios.DeclareIO("BPReadExpression");
+    adios2::Engine bpFileReader = bpIn.Open(filename, adios2::Mode::Read);
+
+    std::vector<float> readUx;
+    std::vector<float> readUy;
+    std::vector<float> readUz;
+    std::vector<float> readAdd;
+
+    float calcA;
+    float epsilon = 0.01;
+    for (int i = 0; i < steps; i++)
+    {
+        bpFileReader.BeginStep();
+        bpFileReader.Get(varname[0], readUx);
+        bpFileReader.Get(varname[1], readUy);
+        bpFileReader.Get(varname[2], readUz);
+        bpFileReader.Get(derivedname, readAdd);
+        bpFileReader.EndStep();
+
+        for (size_t ind = 0; ind < Nx * Ny * Nz; ++ind)
+        {
+            calcA = readUx[ind] + readUy[ind] + readUz[ind];
+            EXPECT_TRUE(fabs(calcA - readAdd[ind]) < epsilon);
+        }
+    }
+    bpFileReader.Close();
+}
+
+TEST(DerivedCorrectness, MagCorrectnessTest)
+{
+    const size_t Nx = 2, Ny = 3, Nz = 10;
+    const size_t steps = 2;
+    // Application variable
+    std::default_random_engine generator;
+    std::uniform_real_distribution<float> distribution(0.0, 10.0);
+
+    std::vector<float> simArray1(Nx * Ny * Nz);
+    std::vector<float> simArray2(Nx * Ny * Nz);
+    std::vector<float> simArray3(Nx * Ny * Nz);
+    for (size_t i = 0; i < Nx * Ny * Nz; ++i)
+    {
+        simArray1[i] = distribution(generator);
+        simArray2[i] = distribution(generator);
+        simArray3[i] = distribution(generator);
+    }
+
+    adios2::ADIOS adios;
+    adios2::IO bpOut = adios.DeclareIO("BPWriteExpression");
+    std::vector<std::string> varname = {"sim2/Ux", "sim2/Uy", "sim2/Uz"};
+    std::string derivedname = "derived/magU";
+
+    auto Ux = bpOut.DefineVariable<float>(varname[0], {Nx, Ny, Nz}, {0, 0, 0}, {Nx, Ny, Nz});
+    auto Uy = bpOut.DefineVariable<float>(varname[1], {Nx, Ny, Nz}, {0, 0, 0}, {Nx, Ny, Nz});
+    auto Uz = bpOut.DefineVariable<float>(varname[2], {Nx, Ny, Nz}, {0, 0, 0}, {Nx, Ny, Nz});
+    // clang-format off
+    auto magU = bpOut.DefineDerivedVariable(derivedname,
+                                            "x:" + varname[0] + " \n"
+                                            "y:" + varname[1] + " \n"
+                                            "z:" + varname[2] + " \n"
+                                            "magnitude(x,y,z)",
+                                            adios2::DerivedVarType::StoreData);
+    // clang-format on
+    std::string filename = "expMagnitude.bp";
+    adios2::Engine bpFileWriter = bpOut.Open(filename, adios2::Mode::Write);
+
+    for (int i = 0; i < steps; i++)
+    {
+        bpFileWriter.BeginStep();
+        bpFileWriter.Put(Ux, simArray1.data());
+        bpFileWriter.Put(Uy, simArray2.data());
+        bpFileWriter.Put(Uz, simArray3.data());
+        bpFileWriter.EndStep();
+    }
+    bpFileWriter.Close();
+
+    adios2::IO bpIn = adios.DeclareIO("BPReadMagExpression");
+    adios2::Engine bpFileReader = bpIn.Open(filename, adios2::Mode::Read);
+
+    std::vector<float> readUx;
+    std::vector<float> readUy;
+    std::vector<float> readUz;
+    std::vector<float> readMag;
+
+    float calcM;
+    float epsilon = 0.01;
+    for (int i = 0; i < steps; i++)
+    {
+        bpFileReader.BeginStep();
+        auto varx = bpIn.InquireVariable<float>(varname[0]);
+        auto vary = bpIn.InquireVariable<float>(varname[1]);
+        auto varz = bpIn.InquireVariable<float>(varname[2]);
+        auto varmag = bpIn.InquireVariable<float>(derivedname);
+
+        bpFileReader.Get(varx, readUx);
+        bpFileReader.Get(vary, readUy);
+        bpFileReader.Get(varz, readUz);
+        bpFileReader.Get(varmag, readMag);
+        bpFileReader.EndStep();
+
+        for (size_t ind = 0; ind < Nx * Ny * Nz; ++ind)
+        {
+            calcM = sqrt(pow(readUx[ind], 2) + pow(readUy[ind], 2) + pow(readUz[ind], 2));
+            EXPECT_TRUE(fabs(calcM - readMag[ind]) < epsilon);
+        }
+    }
+    bpFileReader.Close();
+}
+
+int main(int argc, char **argv)
+{
+    int result;
+    ::testing::InitGoogleTest(&argc, argv);
+
+    result = RUN_ALL_TESTS();
+
+    return result;
+}
diff --git a/testing/contract/examples/build.sh b/testing/contract/examples/build.sh
new file mode 100755
index 0000000000..0d2a15a363
--- /dev/null
+++ b/testing/contract/examples/build.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+set -x
+set -e
+
+# shellcheck disable=SC1091
+source "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"/setup.sh
+
+# Fail if is not set
+build_dir="${build_dir:?}"
+
+cmake --build "${build_dir}" -j8
\ No newline at end of file
diff --git a/testing/contract/examples/config.sh b/testing/contract/examples/config.sh
new file mode 100755
index 0000000000..79d1ea5964
--- /dev/null
+++ b/testing/contract/examples/config.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+set -x
+set -e
+
+# shellcheck disable=SC1091
+source "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"/setup.sh
+
+# Fail if is not set
+source_dir="${source_dir:?}"
+build_dir="${build_dir:?}"
+install_dir="${install_dir:?}"
+
+mkdir -p "${build_dir}"
+cd "${build_dir}"
+
+cmake -DCMAKE_INSTALL_PREFIX="${install_dir}" "${source_dir}"
\ No newline at end of file
diff --git a/testing/contract/examples/depends.sh b/testing/contract/examples/depends.sh
new file mode 100755
index 0000000000..8c3cbfc39d
--- /dev/null
+++ b/testing/contract/examples/depends.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+exit 0
diff --git a/testing/contract/examples/install.sh b/testing/contract/examples/install.sh
new file mode 100755
index 0000000000..3581430e48
--- /dev/null
+++ b/testing/contract/examples/install.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+set -x
+set -e
+
+# shellcheck disable=SC1091
+source "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"/setup.sh
+
+# Fail if is not set
+build_dir="${build_dir:?}"
+
+cmake --install "${build_dir}"
diff --git a/testing/contract/examples/setup.sh b/testing/contract/examples/setup.sh
new file mode 100755
index 0000000000..ee88b0a2cc
--- /dev/null
+++ b/testing/contract/examples/setup.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+source_dir="/opt/adios2/source/examples"
+build_dir=$(readlink -f "${PWD}")/build
+install_dir=$(readlink -f "${PWD}")/install
+
+export source_dir
+export build_dir
+export install_dir
+
+echo "source_dir  = \"${source_dir}\""
+echo "build_dir   = \"${build_dir}\""
+echo "install_dir = \"${install_dir}\""
diff --git a/testing/contract/examples/test.sh b/testing/contract/examples/test.sh
new file mode 100755
index 0000000000..1eae099c25
--- /dev/null
+++ b/testing/contract/examples/test.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+set -x
+set -e
+
+# shellcheck disable=SC1091
+source "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"/setup.sh
+
+# Fail if is not set
+install_dir="${install_dir:?}"
+
+"${install_dir}"/bin/adios2_hello_helloWorld
diff --git a/testing/contract/lammps/build.sh b/testing/contract/lammps/build.sh
index 0290ec8f3d..436861e621 100755
--- a/testing/contract/lammps/build.sh
+++ b/testing/contract/lammps/build.sh
@@ -3,6 +3,10 @@
 set -x
 set -e
 
-source $(dirname $(readlink -f ${BASH_SOURCE}))/setup.sh
+# shellcheck disable=SC1091
+source "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")/setup.sh"
 
-cmake --build ${build_dir} -j8
+# Fail if is not set
+build_dir="${build_dir:?}"
+
+cmake --build "${build_dir}" -j8
diff --git a/testing/contract/lammps/config.sh b/testing/contract/lammps/config.sh
index 14837d843a..256306608d 100755
--- a/testing/contract/lammps/config.sh
+++ b/testing/contract/lammps/config.sh
@@ -3,17 +3,23 @@
 set -x
 set -e
 
-source $(dirname $(readlink -f ${BASH_SOURCE}))/setup.sh
+# shellcheck disable=SC1091
+source "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")/setup.sh"
 
-mkdir -p ${build_dir}
-cd ${build_dir}
+# Fail if is not set
+source_dir="${source_dir:?}"
+build_dir="${build_dir:?}"
+install_dir="${install_dir:?}"
+
+mkdir -p "${build_dir}"
+cd "${build_dir}"
 
 cmake \
-  -DCMAKE_INSTALL_PREFIX=${install_dir} \
+  -DCMAKE_INSTALL_PREFIX="${install_dir}" \
   -DBUILD_MPI=yes \
   -DBUILD_EXE=yes \
   -DBUILD_LIB=no \
   -DBUILD_DOC=no \
   -DLAMMPS_SIZES=smallbig \
   -DPKG_ADIOS=yes \
-  ${source_dir}/cmake
+  "${source_dir}/cmake"
diff --git a/testing/contract/lammps/install.sh b/testing/contract/lammps/install.sh
index f319d25356..26b65c1592 100755
--- a/testing/contract/lammps/install.sh
+++ b/testing/contract/lammps/install.sh
@@ -3,6 +3,10 @@
 set -x
 set -e
 
-source $(dirname $(readlink -f ${BASH_SOURCE}))/setup.sh
+# shellcheck disable=SC1091
+source "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")/setup.sh"
 
-cmake --install ${build_dir}
+# Fail if is not set
+build_dir="${build_dir:?}"
+
+cmake --install "${build_dir}"
diff --git a/testing/contract/lammps/setup.sh b/testing/contract/lammps/setup.sh
index ca74ccf68a..00a4a7f1d5 100755
--- a/testing/contract/lammps/setup.sh
+++ b/testing/contract/lammps/setup.sh
@@ -1,7 +1,14 @@
-source_dir=$(readlink -f ${PWD})/source
-build_dir=$(readlink -f ${PWD})/build
-install_dir=$(readlink -f ${PWD})/install
-test_dir=$(readlink -f ${PWD})/test
+#!/bin/bash
+
+source_dir=$(readlink -f "${PWD}")/source
+build_dir=$(readlink -f "${PWD}")/build
+install_dir=$(readlink -f "${PWD}")/install
+test_dir=$(readlink -f "${PWD}")/test
+
+export source_dir
+export build_dir
+export install_dir
+export test_dir
 
 echo "source_dir  = \"${source_dir}\""
 echo "build_dir   = \"${build_dir}\""
diff --git a/testing/contract/lammps/test.sh b/testing/contract/lammps/test.sh
index 2c378c1fda..28a0486b34 100755
--- a/testing/contract/lammps/test.sh
+++ b/testing/contract/lammps/test.sh
@@ -3,13 +3,18 @@
 set -x
 set -e
 
-source $(dirname $(readlink -f ${BASH_SOURCE}))/setup.sh
+# shellcheck disable=SC1091
+source "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")/setup.sh"
 
-mkdir -p ${test_dir}
-cd ${test_dir}
+# Fail if is not set
+install_dir="${install_dir:?}"
+test_dir="${test_dir:?}"
+
+mkdir -p "${test_dir}"
+cd "${test_dir}"
 cp -v /opt/adios2/source/testing/contract/lammps/{adios2_config.xml,check_results.sh,in.test} .
 
 
-mpiexec -np 4 --oversubscribe ${install_dir}/bin/lmp -in in.test
+mpiexec -np 4 --oversubscribe "${install_dir}/bin/lmp" -in in.test
 
 ./check_results.sh
diff --git a/testing/contract/scorpio/build.sh b/testing/contract/scorpio/build.sh
index 0290ec8f3d..436861e621 100755
--- a/testing/contract/scorpio/build.sh
+++ b/testing/contract/scorpio/build.sh
@@ -3,6 +3,10 @@
 set -x
 set -e
 
-source $(dirname $(readlink -f ${BASH_SOURCE}))/setup.sh
+# shellcheck disable=SC1091
+source "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")/setup.sh"
 
-cmake --build ${build_dir} -j8
+# Fail if is not set
+build_dir="${build_dir:?}"
+
+cmake --build "${build_dir}" -j8
diff --git a/testing/contract/scorpio/config.sh b/testing/contract/scorpio/config.sh
index bb88cc834c..9ff170bd49 100755
--- a/testing/contract/scorpio/config.sh
+++ b/testing/contract/scorpio/config.sh
@@ -3,22 +3,28 @@
 set -x
 set -e
 
-source $(dirname $(readlink -f ${BASH_SOURCE}))/setup.sh
+# shellcheck disable=SC1091
+source "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")/setup.sh"
 
-mkdir -p ${build_dir}
-cd ${build_dir}
+# Fail if is not set
+source_dir="${source_dir:?}"
+build_dir="${build_dir:?}"
+install_dir="${install_dir:?}"
+
+mkdir -p "${build_dir}"
+cd "${build_dir}"
 
 export CC=mpicc
 export CXX=mpic++
 export FC=mpifort
 
 cmake \
-  -DCMAKE_INSTALL_PREFIX=${install_dir} \
+  -DCMAKE_INSTALL_PREFIX="${install_dir}" \
   -DFPHSA_NAME_MISMATCHED=ON \
   -DPIO_ENABLE_TESTS=ON \
   -DPIO_ENABLE_EXAMPLES=ON \
   -DWITH_NETCDF=OFF \
   -DWITH_PNETCDF=ON \
-  -DPnetCDF_PATH=$(spack location -i parallel-netcdf) \
+  -DPnetCDF_PATH="$(spack location -i parallel-netcdf)" \
   -DWITH_ADIOS2=ON \
-  ${source_dir}
+  "${source_dir}"
diff --git a/testing/contract/scorpio/install.sh b/testing/contract/scorpio/install.sh
index f319d25356..26b65c1592 100755
--- a/testing/contract/scorpio/install.sh
+++ b/testing/contract/scorpio/install.sh
@@ -3,6 +3,10 @@
 set -x
 set -e
 
-source $(dirname $(readlink -f ${BASH_SOURCE}))/setup.sh
+# shellcheck disable=SC1091
+source "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")/setup.sh"
 
-cmake --install ${build_dir}
+# Fail if is not set
+build_dir="${build_dir:?}"
+
+cmake --install "${build_dir}"
diff --git a/testing/contract/scorpio/setup.sh b/testing/contract/scorpio/setup.sh
index 08fbcaa5e2..1e32491be1 100755
--- a/testing/contract/scorpio/setup.sh
+++ b/testing/contract/scorpio/setup.sh
@@ -1,7 +1,14 @@
-source_dir=$(readlink -f ${PWD})/source
-build_dir=$(readlink -f ${PWD})/build
-install_dir=$(readlink -f ${PWD})/install
-test_dir=$(readlink -f ${PWD})/test
+#!/bin/bash
+
+source_dir=$(readlink -f "${PWD}")/source
+build_dir=$(readlink -f "${PWD}")/build
+install_dir=$(readlink -f "${PWD}")/install
+test_dir=$(readlink -f "${PWD}")/test
+
+export source_dir
+export build_dir
+export install_dir
+export test_dir
 
 echo "source_dir  = \"${source_dir}\""
 echo "build_dir   = \"${build_dir}\""
diff --git a/testing/contract/scorpio/test.sh b/testing/contract/scorpio/test.sh
index 0249fa7d5f..94b4fca85d 100755
--- a/testing/contract/scorpio/test.sh
+++ b/testing/contract/scorpio/test.sh
@@ -3,12 +3,17 @@
 set -x
 set -e
 
-source $(dirname $(readlink -f ${BASH_SOURCE}))/setup.sh
+# shellcheck disable=SC1091
+source "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")/setup.sh"
 
-mkdir -p ${test_dir}
-cd ${test_dir}
+# Fail if is not set
+build_dir="${build_dir:?}"
+test_dir="${test_dir:?}"
 
-mpiexec --oversubscribe -np 4 ${build_dir}/examples/adios/example3 -v
+mkdir -p "${test_dir}"
+cd "${test_dir}"
+
+mpiexec --oversubscribe -np 4 "${build_dir}/examples/adios/example3" -v
 
 bpls -d example3_1.nc.bp.dir/example3_1.nc.bp.0 > 0.dump
 diff -u 0.dump /opt/adios2/source/testing/contract/scorpio/0.dump
diff --git a/testing/contract/tau/build.sh b/testing/contract/tau/build.sh
index 0290ec8f3d..436861e621 100755
--- a/testing/contract/tau/build.sh
+++ b/testing/contract/tau/build.sh
@@ -3,6 +3,10 @@
 set -x
 set -e
 
-source $(dirname $(readlink -f ${BASH_SOURCE}))/setup.sh
+# shellcheck disable=SC1091
+source "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")/setup.sh"
 
-cmake --build ${build_dir} -j8
+# Fail if is not set
+build_dir="${build_dir:?}"
+
+cmake --build "${build_dir}" -j8
diff --git a/testing/contract/tau/config.sh b/testing/contract/tau/config.sh
index a89b2deece..0fd803a1a6 100755
--- a/testing/contract/tau/config.sh
+++ b/testing/contract/tau/config.sh
@@ -3,11 +3,15 @@
 set -x
 set -e
 
-source $(dirname $(readlink -f ${BASH_SOURCE}))/setup.sh
+# shellcheck disable=SC1091
+source "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")/setup.sh"
 
-mkdir -p ${build_dir}
-cd ${build_dir}
+# Fail if is not set
+source_dir="${source_dir:?}"
+build_dir="${build_dir:?}"
+install_dir="${install_dir:?}"
 
-cmake \
-  -DCMAKE_INSTALL_PREFIX=${install_dir} \
-  ${source_dir}
+mkdir -p "${build_dir}"
+cd "${build_dir}"
+
+cmake -DCMAKE_INSTALL_PREFIX="${install_dir}" "${source_dir}"
diff --git a/testing/contract/tau/install.sh b/testing/contract/tau/install.sh
index f319d25356..26b65c1592 100755
--- a/testing/contract/tau/install.sh
+++ b/testing/contract/tau/install.sh
@@ -3,6 +3,10 @@
 set -x
 set -e
 
-source $(dirname $(readlink -f ${BASH_SOURCE}))/setup.sh
+# shellcheck disable=SC1091
+source "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")/setup.sh"
 
-cmake --install ${build_dir}
+# Fail if is not set
+build_dir="${build_dir:?}"
+
+cmake --install "${build_dir}"
diff --git a/testing/contract/tau/setup.sh b/testing/contract/tau/setup.sh
index d79a4d9ee9..a9e1aff92d 100755
--- a/testing/contract/tau/setup.sh
+++ b/testing/contract/tau/setup.sh
@@ -1,7 +1,14 @@
+#!/bin/bash
+
 source_dir="/opt/adios2/source/examples/basics/variablesShapes"
-build_dir=$(readlink -f ${PWD})/build
-install_dir=$(readlink -f ${PWD})/install
-test_dir=$(readlink -f ${PWD})/test
+build_dir=$(readlink -f "${PWD}")/build
+install_dir=$(readlink -f "${PWD}")/install
+test_dir=$(readlink -f "${PWD}")/test
+
+export source_dir
+export build_dir
+export install_dir
+export test_dir
 
 echo "source_dir  = \"${source_dir}\""
 echo "build_dir   = \"${build_dir}\""
diff --git a/testing/contract/tau/test.sh b/testing/contract/tau/test.sh
index a696145de3..47ac7b1769 100755
--- a/testing/contract/tau/test.sh
+++ b/testing/contract/tau/test.sh
@@ -3,14 +3,19 @@
 set -x
 set -e
 
-source $(dirname $(readlink -f ${BASH_SOURCE}))/setup.sh
+# shellcheck disable=SC1091
+source "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")/setup.sh"
 
-mkdir -p ${test_dir}
-cd ${test_dir}
+# Fail if is not set
+install_dir="${install_dir:?}"
+test_dir="${test_dir:?}"
+
+mkdir -p "${test_dir}"
+cd "${test_dir}"
 
 TAU=$(spack location -i tau)/bin/tau_exec
 
-mpiexec -np 2 ${TAU} ${install_dir}/bin/adios2_basics_variablesShapes
+mpiexec -np 2 "${TAU}" "${install_dir}/bin/adios2_basics_variablesShapes"
 
 [ ! -f profile.0.0.0 ] || [ ! -s profile.0.0.0 ] && { echo "Error: file profile.0.0.0 not found or empty"; exit 1; }
 [ ! -f profile.1.0.0 ] || [ ! -s profile.1.0.0 ] && { echo "Error: file profile.1.0.0 not found or empty"; exit 1; }
diff --git a/thirdparty/perfstubs/perfstubs/LICENSE b/thirdparty/perfstubs/perfstubs/LICENSE
index b10754dbe6..667733e1f6 100644
--- a/thirdparty/perfstubs/perfstubs/LICENSE
+++ b/thirdparty/perfstubs/perfstubs/LICENSE
@@ -1,6 +1,6 @@
 BSD 3-Clause License
 
-Copyright (c) 2019-2020, Kevin Huck
+Copyright (c) 2019-2022, Kevin Huck and University of Oregon
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/thirdparty/perfstubs/perfstubs/README.md b/thirdparty/perfstubs/perfstubs/README.md
index 75f77ec1bb..60bcf5565f 100644
--- a/thirdparty/perfstubs/perfstubs/README.md
+++ b/thirdparty/perfstubs/perfstubs/README.md
@@ -1,6 +1,6 @@
 # PerfStubs
 
-Copyright (c) 2019-2020 University of Oregon
+Copyright (c) 2019-2022 University of Oregon
 Distributed under the BSD Software License
 (See accompanying file LICENSE.txt)
 
diff --git a/thirdparty/perfstubs/perfstubs/perfstubs_api/README.md b/thirdparty/perfstubs/perfstubs/perfstubs_api/README.md
index b0a86dfb3e..cb1579599c 100644
--- a/thirdparty/perfstubs/perfstubs/perfstubs_api/README.md
+++ b/thirdparty/perfstubs/perfstubs/perfstubs_api/README.md
@@ -1,6 +1,6 @@
-# Profiling Interface for ADIOS2
+# Profiling Interface for Libraries and Applications
 
-Copyright (c) 2019-2020 University of Oregon
+Copyright (c) 2019-2022 University of Oregon
 Distributed under the BSD Software License
 (See accompanying file LICENSE.txt)
 
@@ -10,7 +10,7 @@ This is a generic design and implementation for other libraries and tools.
 ## Todo Items
 - [x] Make the interface generic.
     - [x] Replace ADIOST-specific symbols with generic versions that will be
-      implemented by interested measurement libraries (i.e. Score-P). 
+      implemented by interested measurement libraries (i.e. Score-P).
     - ~~[ ] New environment variable specifying location of library containing
       function implementations.~~
     - [x] Remove dynamic-linking specific approach (checking ```LD_PRELOAD```)
diff --git a/thirdparty/perfstubs/perfstubs/perfstubs_api/config.h.in b/thirdparty/perfstubs/perfstubs/perfstubs_api/config.h.in
index 222b53cc62..afc84f1418 100644
--- a/thirdparty/perfstubs/perfstubs/perfstubs_api/config.h.in
+++ b/thirdparty/perfstubs/perfstubs/perfstubs_api/config.h.in
@@ -1,5 +1,5 @@
 // the configured options and settings for Tutorial
-// Copyright (c) 2019-2020 University of Oregon
+// Copyright (c) 2019-2022 University of Oregon
 // Distributed under the BSD Software License
 // (See accompanying file LICENSE.txt)
 #define PerfStubs_VERSION_MAJOR @PerfStubs_VERSION_MAJOR@
diff --git a/thirdparty/perfstubs/perfstubs/perfstubs_api/timer.c b/thirdparty/perfstubs/perfstubs/perfstubs_api/timer.c
index b4177e5521..d219ea5e5b 100644
--- a/thirdparty/perfstubs/perfstubs/perfstubs_api/timer.c
+++ b/thirdparty/perfstubs/perfstubs/perfstubs_api/timer.c
@@ -1,22 +1,20 @@
-// Copyright (c) 2019-2020 University of Oregon
+// Copyright (c) 2019-2022 University of Oregon
 // Distributed under the BSD Software License
 // (See accompanying file LICENSE.txt)
 
 #ifndef _GNU_SOURCE
 #define _GNU_SOURCE // needed to define RTLD_DEFAULT
 #endif
+#include <dlfcn.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <math.h>
-#include <dlfcn.h>
 #include "pthread.h"
 #ifndef PERFSTUBS_USE_TIMERS
 #define PERFSTUBS_USE_TIMERS
 #endif
 #include "perfstubs_api/timer.h"
 
-#define MAX_TOOLS 1
-
 /* Make sure that the Timer singleton is constructed when the
  * library is loaded.  This will ensure (on linux, anyway) that
  * we can assert that we have m_Initialized on the main thread. */
@@ -39,30 +37,30 @@ static void make_key(void) {
 
 /* Function pointers */
 
-ps_initialize_t initialize_functions[MAX_TOOLS];
-ps_finalize_t finalize_functions[MAX_TOOLS];
-ps_pause_measurement_t pause_measurement_functions[MAX_TOOLS];
-ps_resume_measurement_t resume_measurement_functions[MAX_TOOLS];
-ps_register_thread_t register_thread_functions[MAX_TOOLS];
-ps_dump_data_t dump_data_functions[MAX_TOOLS];
-ps_timer_create_t timer_create_functions[MAX_TOOLS];
-ps_timer_start_t timer_start_functions[MAX_TOOLS];
-ps_timer_stop_t timer_stop_functions[MAX_TOOLS];
-ps_start_string_t start_string_functions[MAX_TOOLS];
-ps_stop_string_t stop_string_functions[MAX_TOOLS];
-ps_stop_current_t stop_current_functions[MAX_TOOLS];
-ps_set_parameter_t set_parameter_functions[MAX_TOOLS];
-ps_dynamic_phase_start_t dynamic_phase_start_functions[MAX_TOOLS];
-ps_dynamic_phase_stop_t dynamic_phase_stop_functions[MAX_TOOLS];
-ps_create_counter_t create_counter_functions[MAX_TOOLS];
-ps_sample_counter_t sample_counter_functions[MAX_TOOLS];
-ps_set_metadata_t set_metadata_functions[MAX_TOOLS];
-ps_get_timer_data_t get_timer_data_functions[MAX_TOOLS];
-ps_get_counter_data_t get_counter_data_functions[MAX_TOOLS];
-ps_get_metadata_t get_metadata_functions[MAX_TOOLS];
-ps_free_timer_data_t free_timer_data_functions[MAX_TOOLS];
-ps_free_counter_data_t free_counter_data_functions[MAX_TOOLS];
-ps_free_metadata_t free_metadata_functions[MAX_TOOLS];
+ps_initialize_t initialize_function;
+ps_finalize_t finalize_function;
+ps_pause_measurement_t pause_measurement_function;
+ps_resume_measurement_t resume_measurement_function;
+ps_register_thread_t register_thread_function;
+ps_dump_data_t dump_data_function;
+ps_timer_create_t timer_create_function;
+ps_timer_start_t timer_start_function;
+ps_timer_stop_t timer_stop_function;
+ps_start_string_t start_string_function;
+ps_stop_string_t stop_string_function;
+ps_stop_current_t stop_current_function;
+ps_set_parameter_t set_parameter_function;
+ps_dynamic_phase_start_t dynamic_phase_start_function;
+ps_dynamic_phase_stop_t dynamic_phase_stop_function;
+ps_create_counter_t create_counter_function;
+ps_sample_counter_t sample_counter_function;
+ps_set_metadata_t set_metadata_function;
+ps_get_timer_data_t get_timer_data_function;
+ps_get_counter_data_t get_counter_data_function;
+ps_get_metadata_t get_metadata_function;
+ps_free_timer_data_t free_timer_data_function;
+ps_free_counter_data_t free_counter_data_function;
+ps_free_metadata_t free_metadata_function;
 
 #ifdef PERFSTUBS_USE_STATIC
 
@@ -102,93 +100,94 @@ PS_WEAK_PRE void ps_tool_free_counter_data(ps_tool_counter_data_t *) PS_WEAK_POS
 PS_WEAK_PRE void ps_tool_free_metadata(ps_tool_metadata_t *) PS_WEAK_POST;
 #endif
 
-void initialize_library() {
+void initialize_library(void) {
 #ifdef PERFSTUBS_USE_STATIC
     /* The initialization function is the only required one */
-    initialize_functions[0] = &ps_tool_initialize;
-    if (initialize_functions[0] == NULL) {
+    initialize_function = &ps_tool_initialize;
+    if (initialize_function == NULL) {
         perfstubs_initialized = PERFSTUBS_FAILURE;
         return;
     }
-    printf("Found ps_tool_initialize(), registering tool\n");
-    finalize_functions[0] = &ps_tool_finalize;
-    pause_measurement_functions[0] = &ps_tool_pause_measurement;
-    resume_measurement_functions[0] = &ps_tool_resume_measurement;
-    register_thread_functions[0] = &ps_tool_register_thread;
-    dump_data_functions[0] = &ps_tool_dump_data;
-    timer_create_functions[0] = &ps_tool_timer_create;
-    timer_start_functions[0] = &ps_tool_timer_start;
-    timer_stop_functions[0] = &ps_tool_timer_stop;
-    start_string_functions[0] = &ps_tool_start_string;
-    stop_string_functions[0] = &ps_tool_stop_string;
-    stop_current_functions[0] = &ps_tool_stop_current;
-    set_parameter_functions[0] = &ps_tool_set_parameter;
-    dynamic_phase_start_functions[0] = &ps_tool_dynamic_phase_start;
-    dynamic_phase_stop_functions[0] = &ps_tool_dynamic_phase_stop;
-    create_counter_functions[0] = &ps_tool_create_counter;
-    sample_counter_functions[0] = &ps_tool_sample_counter;
-    set_metadata_functions[0] = &ps_tool_set_metadata;
-    get_timer_data_functions[0] = &ps_tool_get_timer_data;
-    get_counter_data_functions[0] = &ps_tool_get_counter_data;
-    get_metadata_functions[0] = &ps_tool_get_metadata;
-    free_timer_data_functions[0] = &ps_tool_free_timer_data;
-    free_counter_data_functions[0] = &ps_tool_free_counter_data;
-    free_metadata_functions[0] = &ps_tool_free_metadata;
+    // removing printf statement for now, it's too noisy.
+    //printf("Found ps_tool_initialize(), registering tool\n");
+    finalize_function = &ps_tool_finalize;
+    pause_measurement_function = &ps_tool_pause_measurement;
+    resume_measurement_function = &ps_tool_resume_measurement;
+    register_thread_function = &ps_tool_register_thread;
+    dump_data_function = &ps_tool_dump_data;
+    timer_create_function = &ps_tool_timer_create;
+    timer_start_function = &ps_tool_timer_start;
+    timer_stop_function = &ps_tool_timer_stop;
+    start_string_function = &ps_tool_start_string;
+    stop_string_function = &ps_tool_stop_string;
+    stop_current_function = &ps_tool_stop_current;
+    set_parameter_function = &ps_tool_set_parameter;
+    dynamic_phase_start_function = &ps_tool_dynamic_phase_start;
+    dynamic_phase_stop_function = &ps_tool_dynamic_phase_stop;
+    create_counter_function = &ps_tool_create_counter;
+    sample_counter_function = &ps_tool_sample_counter;
+    set_metadata_function = &ps_tool_set_metadata;
+    get_timer_data_function = &ps_tool_get_timer_data;
+    get_counter_data_function = &ps_tool_get_counter_data;
+    get_metadata_function = &ps_tool_get_metadata;
+    free_timer_data_function = &ps_tool_free_timer_data;
+    free_counter_data_function = &ps_tool_free_counter_data;
+    free_metadata_function = &ps_tool_free_metadata;
 #else
-    initialize_functions[0] =
+    initialize_function =
         (ps_initialize_t)dlsym(RTLD_DEFAULT, "ps_tool_initialize");
-    if (initialize_functions[0] == NULL) {
+    if (initialize_function == NULL) {
         perfstubs_initialized = PERFSTUBS_FAILURE;
         return;
     }
     printf("Found ps_tool_initialize(), registering tool\n");
-    finalize_functions[0] =
+    finalize_function =
         (ps_finalize_t)dlsym(RTLD_DEFAULT, "ps_tool_finalize");
-    pause_measurement_functions[0] =
+    pause_measurement_function =
         (ps_pause_measurement_t)dlsym(RTLD_DEFAULT, "ps_tool_pause_measurement");
-    resume_measurement_functions[0] =
+    resume_measurement_function =
         (ps_resume_measurement_t)dlsym(RTLD_DEFAULT, "ps_tool_resume_measurement");
-    register_thread_functions[0] =
+    register_thread_function =
         (ps_register_thread_t)dlsym(RTLD_DEFAULT, "ps_tool_register_thread");
-    dump_data_functions[0] =
+    dump_data_function =
         (ps_dump_data_t)dlsym(RTLD_DEFAULT, "ps_tool_dump_data");
-    timer_create_functions[0] =
+    timer_create_function =
         (ps_timer_create_t)dlsym(RTLD_DEFAULT,
-        "ps_tool_timer_create");
-    timer_start_functions[0] =
+                "ps_tool_timer_create");
+    timer_start_function =
         (ps_timer_start_t)dlsym(RTLD_DEFAULT, "ps_tool_timer_start");
-    timer_stop_functions[0] =
+    timer_stop_function =
         (ps_timer_stop_t)dlsym(RTLD_DEFAULT, "ps_tool_timer_stop");
-    start_string_functions[0] =
+    start_string_function =
         (ps_start_string_t)dlsym(RTLD_DEFAULT, "ps_tool_start_string");
-    stop_string_functions[0] =
+    stop_string_function =
         (ps_stop_string_t)dlsym(RTLD_DEFAULT, "ps_tool_stop_string");
-    stop_current_functions[0] =
+    stop_current_function =
         (ps_stop_current_t)dlsym(RTLD_DEFAULT, "ps_tool_stop_current");
-    set_parameter_functions[0] =
+    set_parameter_function =
         (ps_set_parameter_t)dlsym(RTLD_DEFAULT, "ps_tool_set_parameter");
-    dynamic_phase_start_functions[0] = (ps_dynamic_phase_start_t)dlsym(
-        RTLD_DEFAULT, "ps_tool_dynamic_phase_start");
-    dynamic_phase_stop_functions[0] = (ps_dynamic_phase_stop_t)dlsym(
-        RTLD_DEFAULT, "ps_tool_dynamic_phase_stop");
-    create_counter_functions[0] = (ps_create_counter_t)dlsym(
-        RTLD_DEFAULT, "ps_tool_create_counter");
-    sample_counter_functions[0] = (ps_sample_counter_t)dlsym(
-        RTLD_DEFAULT, "ps_tool_sample_counter");
-    set_metadata_functions[0] =
+    dynamic_phase_start_function = (ps_dynamic_phase_start_t)dlsym(
+            RTLD_DEFAULT, "ps_tool_dynamic_phase_start");
+    dynamic_phase_stop_function = (ps_dynamic_phase_stop_t)dlsym(
+            RTLD_DEFAULT, "ps_tool_dynamic_phase_stop");
+    create_counter_function = (ps_create_counter_t)dlsym(
+            RTLD_DEFAULT, "ps_tool_create_counter");
+    sample_counter_function = (ps_sample_counter_t)dlsym(
+            RTLD_DEFAULT, "ps_tool_sample_counter");
+    set_metadata_function =
         (ps_set_metadata_t)dlsym(RTLD_DEFAULT, "ps_tool_set_metadata");
-    get_timer_data_functions[0] = (ps_get_timer_data_t)dlsym(
-        RTLD_DEFAULT, "ps_tool_get_timer_data");
-    get_counter_data_functions[0] = (ps_get_counter_data_t)dlsym(
-        RTLD_DEFAULT, "ps_tool_get_counter_data");
-    get_metadata_functions[0] = (ps_get_metadata_t)dlsym(
-        RTLD_DEFAULT, "ps_tool_get_metadata");
-    free_timer_data_functions[0] = (ps_free_timer_data_t)dlsym(
-        RTLD_DEFAULT, "ps_tool_free_timer_data");
-    free_counter_data_functions[0] = (ps_free_counter_data_t)dlsym(
-        RTLD_DEFAULT, "ps_tool_free_counter_data");
-    free_metadata_functions[0] = (ps_free_metadata_t)dlsym(
-        RTLD_DEFAULT, "ps_tool_free_metadata");
+    get_timer_data_function = (ps_get_timer_data_t)dlsym(
+            RTLD_DEFAULT, "ps_tool_get_timer_data");
+    get_counter_data_function = (ps_get_counter_data_t)dlsym(
+            RTLD_DEFAULT, "ps_tool_get_counter_data");
+    get_metadata_function = (ps_get_metadata_t)dlsym(
+            RTLD_DEFAULT, "ps_tool_get_metadata");
+    free_timer_data_function = (ps_free_timer_data_t)dlsym(
+            RTLD_DEFAULT, "ps_tool_free_timer_data");
+    free_counter_data_function = (ps_free_counter_data_t)dlsym(
+            RTLD_DEFAULT, "ps_tool_free_counter_data");
+    free_metadata_function = (ps_free_metadata_t)dlsym(
+            RTLD_DEFAULT, "ps_tool_free_metadata");
 #endif
     perfstubs_initialized = PERFSTUBS_SUCCESS;
     /* Increment the number of tools */
@@ -196,7 +195,7 @@ void initialize_library() {
 }
 
 char * ps_make_timer_name_(const char * file,
-    const char * func, int line) {
+        const char * func, int line) {
     /* The length of the line number as a string is floor(log10(abs(num))) */
     int string_length = (strlen(file) + strlen(func) + floor(log10(abs(line))) + 12);
     char * name = calloc(string_length, sizeof(char));
@@ -206,59 +205,41 @@ char * ps_make_timer_name_(const char * file,
 
 // used internally to the class
 static inline void ps_register_thread_internal(void) {
-    //if (thread_seen == 0) {
     if (pthread_getspecific(key) == NULL) {
-    	int i;
-    	for (i = 0 ; i < num_tools_registered ; i++) {
-        	register_thread_functions[i]();
-    	}
-    	//thread_seen = 1;
-    	pthread_setspecific(key, (void*)1UL);
+        if (register_thread_function != NULL) {
+            register_thread_function();
+            pthread_setspecific(key, (void*)1UL);
+        }
     }
 }
 
 /* Initialization */
 void ps_initialize_(void) {
-    int i;
     /* Only do this once */
     if (perfstubs_initialized != PERFSTUBS_UNKNOWN) {
         return;
     }
     initialize_library();
-    for (i = 0 ; i < num_tools_registered ; i++) {
-        initialize_functions[i]();
-    }
-    /* No need to register the main thread */
-    //thread_seen = 1;
-    (void) pthread_once(&key_once, make_key);
-    if (pthread_getspecific(key) == NULL) {
-        // set the key to 1, indicating we have seen this thread
+    if (initialize_function != NULL) {
+        initialize_function();
+        (void) pthread_once(&key_once, make_key);
         pthread_setspecific(key, (void*)1UL);
     }
 }
 
 void ps_finalize_(void) {
-    int i;
-    for (i = 0 ; i < num_tools_registered ; i++) {
-        if (finalize_functions[i] != NULL)
-            finalize_functions[i]();
-    }
+    if (finalize_function != NULL)
+        finalize_function();
 }
 
 void ps_pause_measurement_(void) {
-    int i;
-    for (i = 0 ; i < num_tools_registered ; i++) {
-        if (pause_measurement_functions[i] != NULL)
-            pause_measurement_functions[i]();
-    }
+    if (pause_measurement_function != NULL)
+        pause_measurement_function();
 }
 
 void ps_resume_measurement_(void) {
-    int i;
-    for (i = 0 ; i < num_tools_registered ; i++) {
-        if (resume_measurement_functions[i] != NULL)
-            resume_measurement_functions[i]();
-    }
+    if (resume_measurement_function != NULL)
+        resume_measurement_function();
 }
 
 void ps_register_thread_(void) {
@@ -266,13 +247,10 @@ void ps_register_thread_(void) {
 }
 
 void* ps_timer_create_(const char *timer_name) {
-	ps_register_thread_internal();
+    ps_register_thread_internal();
     void ** objects = (void **)calloc(num_tools_registered, sizeof(void*));
-    int i;
-    for (i = 0 ; i < num_tools_registered ; i++) {
-        if (timer_create_functions[i] != NULL)
-            objects[i] = (void *)timer_create_functions[i](timer_name);
-    }
+    if (timer_create_function != NULL)
+        objects = (void *)timer_create_function(timer_name);
     return (void*)(objects);
 }
 
@@ -281,14 +259,10 @@ void ps_timer_create_fortran_(void ** object, const char *timer_name) {
 }
 
 void ps_timer_start_(void *timer) {
-	ps_register_thread_internal();
+    ps_register_thread_internal();
     void ** objects = (void **)timer;
-    int i;
-    for (i = 0; i < num_tools_registered ; i++) {
-        if (timer_start_functions[i] != NULL &&
-            objects[i] != NULL)
-            timer_start_functions[i](objects[i]);
-    }
+    if (timer_start_function != NULL && objects != NULL)
+        timer_start_function(objects);
 }
 
 void ps_timer_start_fortran_(void **timer) {
@@ -297,12 +271,9 @@ void ps_timer_start_fortran_(void **timer) {
 
 void ps_timer_stop_(void *timer) {
     void ** objects = (void **)timer;
-    int i;
-    for (i = 0; i < num_tools_registered ; i++) {
-        if (timer_stop_functions[i] != NULL &&
-            objects[i] != NULL)
-            timer_stop_functions[i](objects[i]);
-    }
+    if (timer_stop_function != NULL &&
+            objects != NULL)
+        timer_stop_function(objects);
 }
 
 void ps_timer_stop_fortran_(void **timer) {
@@ -310,62 +281,41 @@ void ps_timer_stop_fortran_(void **timer) {
 }
 
 void ps_start_string_(const char *timer_name) {
-	ps_register_thread_internal();
-    int i;
-    for (i = 0 ; i < num_tools_registered ; i++) {
-        if (start_string_functions[i] != NULL)
-            start_string_functions[i](timer_name);
-    }
+    ps_register_thread_internal();
+    if (start_string_function != NULL)
+        start_string_function(timer_name);
 }
 
 void ps_stop_string_(const char *timer_name) {
-    int i;
-    for (i = 0 ; i < num_tools_registered ; i++) {
-        if (stop_string_functions[i] != NULL)
-            stop_string_functions[i](timer_name);
-    }
+    if (stop_string_function != NULL)
+        stop_string_function(timer_name);
 }
 
 void ps_stop_current_(void) {
-    int i;
-    for (i = 0 ; i < num_tools_registered ; i++) {
-        if (stop_current_functions[i] != NULL)
-            stop_current_functions[i]();
-    }
+    if (stop_current_function != NULL)
+        stop_current_function();
 }
 
 void ps_set_parameter_(const char * parameter_name, int64_t parameter_value) {
-    int i;
-    for (i = 0; i < num_tools_registered ; i++) {
-        if (set_parameter_functions[i] != NULL)
-            set_parameter_functions[i](parameter_name, parameter_value);
-    }
+    if (set_parameter_function != NULL)
+        set_parameter_function(parameter_name, parameter_value);
 }
 
 void ps_dynamic_phase_start_(const char *phase_prefix, int iteration_index) {
-    int i;
-    for (i = 0; i < num_tools_registered ; i++) {
-        if (dynamic_phase_start_functions[i] != NULL)
-            dynamic_phase_start_functions[i](phase_prefix, iteration_index);
-    }
+    if (dynamic_phase_start_function != NULL)
+        dynamic_phase_start_function(phase_prefix, iteration_index);
 }
 
 void ps_dynamic_phase_stop_(const char *phase_prefix, int iteration_index) {
-    int i;
-    for (i = 0; i < num_tools_registered ; i++) {
-        if (dynamic_phase_stop_functions[i] != NULL)
-            dynamic_phase_stop_functions[i](phase_prefix, iteration_index);
-    }
+    if (dynamic_phase_stop_function != NULL)
+        dynamic_phase_stop_function(phase_prefix, iteration_index);
 }
 
 void* ps_create_counter_(const char *name) {
-	ps_register_thread_internal();
+    ps_register_thread_internal();
     void ** objects = (void **)calloc(num_tools_registered, sizeof(void*));
-    int i;
-    for (i = 0 ; i < num_tools_registered ; i++) {
-        if (create_counter_functions[i] != NULL)
-            objects[i] = (void*)create_counter_functions[i](name);
-    }
+    if (create_counter_function != NULL)
+        objects = (void*)create_counter_function(name);
     return (void*)(objects);
 }
 
@@ -375,12 +325,9 @@ void ps_create_counter_fortran_(void ** object, const char *name) {
 
 void ps_sample_counter_(void *counter, const double value) {
     void ** objects = (void **)counter;
-    int i;
-    for (i = 0; i < num_tools_registered ; i++) {
-        if (sample_counter_functions[i] != NULL &&
-            objects[i] != NULL)
-            sample_counter_functions[i](objects[i], value);
-    }
+    if (sample_counter_function != NULL &&
+            objects != NULL)
+        sample_counter_function(objects, value);
 }
 
 void ps_sample_counter_fortran_(void **counter, const double value) {
@@ -388,61 +335,43 @@ void ps_sample_counter_fortran_(void **counter, const double value) {
 }
 
 void ps_set_metadata_(const char *name, const char *value) {
-	ps_register_thread_internal();
-    int i;
-    for (i = 0; i < num_tools_registered ; i++) {
-        if (set_metadata_functions[i] != NULL)
-            set_metadata_functions[i](name, value);
-    }
+    ps_register_thread_internal();
+    if (set_metadata_function != NULL)
+        set_metadata_function(name, value);
 }
 
 void ps_dump_data_(void) {
-    int i;
-    for (i = 0; i < num_tools_registered ; i++) {
-        if (dump_data_functions[i] != NULL)
-            dump_data_functions[i]();
-    }
+    if (dump_data_function != NULL)
+        dump_data_function();
 }
 
-void ps_get_timer_data_(ps_tool_timer_data_t *timer_data, int tool_id) {
-    if (tool_id < num_tools_registered) {
-        if (get_timer_data_functions[tool_id] != NULL)
-            get_timer_data_functions[tool_id](timer_data);
-    }
+void ps_get_timer_data_(ps_tool_timer_data_t *timer_data) {
+    if (get_timer_data_function != NULL)
+        get_timer_data_function(timer_data);
 }
 
-void ps_get_counter_data_(ps_tool_counter_data_t *counter_data, int tool_id) {
-    if (tool_id < num_tools_registered) {
-        if (get_counter_data_functions[tool_id] != NULL)
-            get_counter_data_functions[tool_id](counter_data);
-    }
+void ps_get_counter_data_(ps_tool_counter_data_t *counter_data) {
+    if (get_counter_data_function != NULL)
+        get_counter_data_function(counter_data);
 }
 
-void ps_get_metadata_(ps_tool_metadata_t *metadata, int tool_id) {
-    if (tool_id < num_tools_registered) {
-        if (get_metadata_functions[tool_id] != NULL)
-            get_metadata_functions[tool_id](metadata);
-    }
+void ps_get_metadata_(ps_tool_metadata_t *metadata) {
+    if (get_metadata_function != NULL)
+        get_metadata_function(metadata);
 }
 
-void ps_free_timer_data_(ps_tool_timer_data_t *timer_data, int tool_id) {
-    if (tool_id < num_tools_registered) {
-        if (free_timer_data_functions[tool_id] != NULL)
-            free_timer_data_functions[tool_id](timer_data);
-    }
+void ps_free_timer_data_(ps_tool_timer_data_t *timer_data) {
+    if (free_timer_data_function != NULL)
+        free_timer_data_function(timer_data);
 }
 
-void ps_free_counter_data_(ps_tool_counter_data_t *counter_data, int tool_id) {
-    if (tool_id < num_tools_registered) {
-        if (free_counter_data_functions[tool_id] != NULL)
-            free_counter_data_functions[tool_id](counter_data);
-    }
+void ps_free_counter_data_(ps_tool_counter_data_t *counter_data) {
+    if (free_counter_data_function != NULL)
+        free_counter_data_function(counter_data);
 }
 
-void ps_free_metadata_(ps_tool_metadata_t *metadata, int tool_id) {
-    if (tool_id < num_tools_registered) {
-        if (free_metadata_functions[tool_id] != NULL)
-            free_metadata_functions[tool_id](metadata);
-    }
+void ps_free_metadata_(ps_tool_metadata_t *metadata) {
+    if (free_metadata_function != NULL)
+        free_metadata_function(metadata);
 }
 
diff --git a/thirdparty/perfstubs/perfstubs/perfstubs_api/timer.h b/thirdparty/perfstubs/perfstubs/perfstubs_api/timer.h
index c806c9ed64..6bc36978f5 100644
--- a/thirdparty/perfstubs/perfstubs/perfstubs_api/timer.h
+++ b/thirdparty/perfstubs/perfstubs/perfstubs_api/timer.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2020 University of Oregon
+// Copyright (c) 2019-2022 University of Oregon
 // Distributed under the BSD Software License
 // (See accompanying file LICENSE.txt)
 
@@ -24,7 +24,8 @@
  * not just the function name.  If the compiler doesn't support it,
  * just use the function name. */
 
-#if defined(__GNUC__)
+/* ISO C doesn't allow __PRETTY_FUNCTION__, so only do it with C++ */
+#if defined(__GNUC__) && defined(__cplusplus)
 #define __PERFSTUBS_FUNCTION__ __PRETTY_FUNCTION__
 #else
 #define __PERFSTUBS_FUNCTION__ __func__
@@ -75,12 +76,12 @@ void  ps_set_metadata_(const char *name, const char *value);
 
 /* data query API */
 
-void  ps_get_timer_data_(ps_tool_timer_data_t *timer_data, int tool_id);
-void  ps_get_counter_data_(ps_tool_counter_data_t *counter_data, int tool_id);
-void  ps_get_metadata_(ps_tool_metadata_t *metadata, int tool_id);
-void  ps_free_timer_data_(ps_tool_timer_data_t *timer_data, int tool_id);
-void  ps_free_counter_data_(ps_tool_counter_data_t *counter_data, int tool_id);
-void  ps_free_metadata_(ps_tool_metadata_t *metadata, int tool_id);
+void  ps_get_timer_data_(ps_tool_timer_data_t *timer_data);
+void  ps_get_counter_data_(ps_tool_counter_data_t *counter_data);
+void  ps_get_metadata_(ps_tool_metadata_t *metadata);
+void  ps_free_timer_data_(ps_tool_timer_data_t *timer_data);
+void  ps_free_counter_data_(ps_tool_counter_data_t *counter_data);
+void  ps_free_metadata_(ps_tool_metadata_t *metadata);
 
 char* ps_make_timer_name_(const char * file, const char * func, int line);
 
diff --git a/thirdparty/perfstubs/perfstubs/perfstubs_api/tool.h b/thirdparty/perfstubs/perfstubs/perfstubs_api/tool.h
index d00f9ede3d..8668ffa9d8 100644
--- a/thirdparty/perfstubs/perfstubs/perfstubs_api/tool.h
+++ b/thirdparty/perfstubs/perfstubs/perfstubs_api/tool.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2020 University of Oregon
+// Copyright (c) 2019-2022 University of Oregon
 // Distributed under the BSD Software License
 // (See accompanying file LICENSE.txt)