LLNL · august-knox · Aug 23, 2024 · Aug 23, 2024 · Aug 23, 2024 · Sep 3, 2024
diff --git a/.github/workflows/run.yml b/.github/workflows/run.yml
@@ -431,15 +431,6 @@ jobs:
           system_name: ruby
           system_spec: llnl-cluster cluster=ruby compiler=intel
 
-      - name: osu-micro-benchmarks/mpi ruby llnl-cluster cluster=ruby compiler=intel
-        uses: ./.github/actions/dynamic-dry-run
-        with:
-          benchmark_name: osu-micro-benchmarks
-          benchmark_mode: mpi
-          benchmark_spec: osu-micro-benchmarks workload=all
-          system_name: ruby
-          system_spec: llnl-cluster cluster=ruby compiler=intel
-
       - name: laghos/mpi caliper=mpi,time ruby llnl-cluster cluster=ruby compiler=intel
         uses: ./.github/actions/dynamic-dry-run
         with:
@@ -799,3 +790,21 @@ jobs:
           benchmark_spec: stream caliper=mpi,time
           system_name: tioga
           system_spec: llnl-elcapitan rocm=5.5.1 compiler=cce
+
+      - name: osu-micro-benchmarks/mpi ruby llnl-cluster cluster=ruby compiler=intel
+        uses: ./.github/actions/dynamic-dry-run
+        with:
+          benchmark_name: osu-micro-benchmarks
+          benchmark_mode: mpi
+          benchmark_spec: osu-micro-benchmarks workload=all
+          system_name: ruby
+          system_spec: llnl-cluster cluster=ruby compiler=intel
+
+      - name: osu-micro-benchmarks/rocm tioga llnl-elcapitan cluster=tioga compiler=cce +gtl
+        uses: ./.github/actions/dynamic-dry-run
+        with:
+          benchmark_name: osu-micro-benchmarks
+          benchmark_mode: rocm
+          benchmark_spec: osu-micro-benchmarks+rocm workload=all
+          system_name: tioga
+          system_spec: llnl-elcapitan cluster=tioga compiler=cce +gtl
diff --git a/experiments/osu-micro-benchmarks/experiment.py b/experiments/osu-micro-benchmarks/experiment.py
@@ -4,11 +4,16 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from benchpark.directives import variant
-from benchpark.error import BenchparkError
 from benchpark.experiment import Experiment
+from benchpark.rocm import ROCmExperiment
+from benchpark.cuda import CudaExperiment
 
 
-class OsuMicroBenchmarks(Experiment):
+class OsuMicroBenchmarks(
+    Experiment,
+    ROCmExperiment,
+    CudaExperiment,
+):
 
     variant(
         "workload",
@@ -90,27 +95,33 @@ class OsuMicroBenchmarks(Experiment):
     )
 
     def compute_applications_section(self):
-        scaling_modes = {
-            "single_node": self.spec.satisfies("+single_node"),
-        }
-
-        scaling_mode_enabled = [key for key, value in scaling_modes.items() if value]
-        if len(scaling_mode_enabled) != 1:
-            raise BenchparkError(
-                f"Only one type of scaling per experiment is allowed for application package {self.name}"
-            )
 
         num_nodes = {"n_nodes": 2}
 
         if self.spec.satisfies("+single_node"):
             for pk, pv in num_nodes.items():
                 self.add_experiment_variable(pk, pv, True)
 
+        if self.spec.satisfies("+rocm"):
+            self.add_experiment_variable("additional_args", " -d rocm", False)
+        if self.spec.satisfies("+cuda"):
+            self.add_experiment_variable("additional_args", " -d cuda", False)
+        if self.spec.satisfies("+rocm") or self.spec.satisfies("+cuda"):
+            for pk, pv in num_nodes.items():
+                self.add_experiment_variable("n_gpus", pv, True)
+
     def compute_spack_section(self):
         system_specs = {}
+        if self.spec.satisfies("+cuda"):
+            system_specs["cuda_version"] = "{default_cuda_version}"
+            system_specs["cuda_arch"] = "{cuda_arch}"
+        elif self.spec.satisfies("+rocm"):
+            system_specs["rocm_arch"] = "{rocm_arch}"
+
         system_specs["compiler"] = "default-compiler"
         system_specs["mpi"] = "default-mpi"
         self.add_spack_spec(system_specs["mpi"])
+
         self.add_spack_spec(
             self.name, ["osu-micro-benchmarks", system_specs["compiler"]]
         )
diff --git a/repo/osu-micro-benchmarks/package.py b/repo/osu-micro-benchmarks/package.py
@@ -0,0 +1,29 @@
+# Copyright 2023 Lawrence Livermore National Security, LLC and other
+# Benchpark Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from spack.package import *
+from spack.pkg.builtin.osu_micro_benchmarks import OsuMicroBenchmarks as BuiltinOsu
+
+
+class OsuMicroBenchmarks(BuiltinOsu, ROCmPackage):
+
+    depends_on("cray-mpich+gtl", when="+rocm")
+
+    def configure_args(self):
+        args = super().configure_args()
+        if self.spec.satisfies("+rocm"):
+            args.extend([f"LDFLAGS={self.spec['mpi'].libs.ld_flags}"]) 
+            print(self.spec['mpi'])
+        return args
+
+    def setup_run_environment(self, env):
+        mpidir = join_path(self.prefix.libexec, "osu-micro-benchmarks", "mpi")
+        env.prepend_path("PATH", join_path(mpidir, "startup"))
+        env.prepend_path("PATH", join_path(mpidir, "pt2pt"))
+        env.prepend_path("PATH", join_path(mpidir, "one-sided"))
+        env.prepend_path("PATH", join_path(mpidir, "collective"))
+        if self.spec.satisfies("+rocm"):
+            if self.spec.satisfies("^cray-mpich+gtl"):
+                env.prepend_path("LOCAL_RANK", self.spec['mpi'].extra_attributes['gtl_flags'])
diff --git a/systems/llnl-elcapitan/system.py b/systems/llnl-elcapitan/system.py
@@ -160,6 +160,7 @@ def mpi_config(self, cce_version):
 """
 
             use_gtl = f"""\
+        gtl_flags: $MV2_COMM_WORLD_LOCAL_RANK
         gtl_cutoff_size: 4096
         fi_cxi_ats: 0
         gtl_lib_path: /opt/cray/pe/mpich/{mpi_version}/gtl/lib