parameterization of filters and features benchmarks (v2) (#322)

Update of #278 for branch-22.08 with minor additional fixes. I have tested this locally and it seems to be operating as expected. Authors: - Gregory Lee (https://github.com/grlee77) - https://github.com/aasthajh Approvers: - https://github.com/jakirkham URL: #322
rapidsai · Jul 27, 2022 · 3f7069f · 3f7069f
1 parent e5006ff
commit 3f7069f
Show file tree

Hide file tree

Showing 5 changed files with 289 additions and 205 deletions.
diff --git a/benchmarks/skimage/_image_bench.py b/benchmarks/skimage/_image_bench.py
@@ -36,6 +36,7 @@ def __init__(
         module_cpu=scipy.ndimage,
         module_gpu=cupyx.scipy.ndimage,
         function_is_generator=False,
+        run_cpu=True
     ):
 
         self.shape = shape
@@ -67,9 +68,12 @@ def gen_gpu(*args, **kwargs):
         self.module_name_cpu = module_cpu.__name__
         self.module_name_gpu = module_gpu.__name__
 
+        self.run_cpu = run_cpu
+
     def set_args(self, dtype):
         if np.dtype(dtype).kind in "iu":
             im1 = skimage.data.camera()
+            im1 = im1.astype(dtype)
         else:
             im1 = skimage.data.camera() / 255.0
             im1 = im1.astype(dtype)
@@ -166,17 +170,20 @@ def run_benchmark(self, duration=3, verbose=True):
                 rep_kwargs_gpu = self.get_reps(
                     self.func_gpu, self.args_gpu, kw_gpu, duration, cpu=False
                 )
-                perf = repeat(self.func_cpu, self.args_cpu, kw_cpu, **rep_kwargs_cpu)
+                print("Number of Repetitions : ", rep_kwargs_gpu)
                 perf_gpu = repeat(self.func_gpu, self.args_gpu, kw_gpu, **rep_kwargs_gpu)
-                df.at[index, "GPU accel"] = perf.cpu_times.mean() / perf_gpu.gpu_times.mean()
+
                 df.at[index, "shape"] = f"{self.shape}"
                 # df.at[index,  "description"] = index
                 df.at[index, "function_name"] = self.function_name
                 df.at[index, "dtype"] = np.dtype(dtype).name
                 df.at[index, "ndim"] = len(self.shape)
 
-                df.at[index, "CPU: host (mean)"] = perf.cpu_times.mean()
-                df.at[index, "CPU: host (std)"] = perf.cpu_times.std()
+                if self.run_cpu == True:
+                    perf = repeat(self.func_cpu, self.args_cpu, kw_cpu, **rep_kwargs_cpu)
+                    df.at[index, "GPU accel"] = perf.cpu_times.mean() / perf_gpu.gpu_times.mean()
+                    df.at[index, "CPU: host (mean)"] = perf.cpu_times.mean()
+                    df.at[index, "CPU: host (std)"] = perf.cpu_times.std()
 
                 df.at[index, "GPU: host (mean)"] = perf_gpu.cpu_times.mean()
                 df.at[index, "GPU: host (std)"] = perf_gpu.cpu_times.std()

diff --git a/benchmarks/skimage/cucim_feature_bench.py b/benchmarks/skimage/cucim_feature_bench.py
@@ -1,5 +1,6 @@
 import os
 import pickle
+import argparse
 
 import cucim.skimage
 import cucim.skimage.feature
@@ -26,102 +27,124 @@ def set_args(self, dtype):
         self.args_cpu = (image, template)
         self.args_gpu = (imaged, templated)
 
-
-pfile = "cucim_feature_results.pickle"
-if os.path.exists(pfile):
-    with open(pfile, "rb") as f:
-        all_results = pickle.load(f)
-else:
-    all_results = pd.DataFrame()
-dtypes = [np.float32]
-
-for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
-    ("multiscale_basic_features", dict(edges=True), dict(texture=[True, False]), True, True),
-    ("canny", dict(sigma=1.8), dict(), False, False),
-    # reduced default rings, histograms, orientations to fit daisy at (3840, 2160) into GPU memory
-    (
-        "daisy",
-        dict(step=4, radius=15, rings=2, histograms=5, orientations=4),
-        dict(normalization=["l1", "l2", "daisy"]),
-        False,
-        False,
-    ),
-    ("structure_tensor", dict(sigma=1, mode="reflect", order="rc"), dict(), False, True),
-    ("hessian_matrix", dict(sigma=1, mode="reflect", order="rc"), dict(), False, True),
-    ("hessian_matrix_det", dict(sigma=1, approximate=False), dict(), False, True),
-    ("shape_index", dict(sigma=1, mode="reflect"), dict(), False, False),
-    ("corner_kitchen_rosenfeld", dict(mode="reflect"), dict(), False, False),
-    ("corner_harris", dict(k=0.05, eps=1e-6, sigma=1), dict(method=["k", "eps"]), False, False),
-    ("corner_shi_tomasi", dict(sigma=1), dict(), False, False),
-    ("corner_foerstner", dict(sigma=1), dict(), False, False),
-    ("corner_peaks", dict(), dict(min_distance=(2, 3, 5)), False, True),
-]:
-
-    for shape in [(128, 128, 128), (512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]:
-
-        if function_name in ["corner_peaks", "peak_local_max"] and np.prod(shape) > 1000000:
-            # skip any large sizes that take too long
-            continue
-        ndim = len(shape)
-        if not allow_nd:
-            if not allow_color:
-                if ndim > 2:
-                    continue
-            else:
-                if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]):
-                    continue
-        if shape[-1] == 3 and not allow_color:
+def main(args):
+
+    pfile = "cucim_feature_results.pickle"
+    if os.path.exists(pfile):
+        with open(pfile, "rb") as f:
+            all_results = pickle.load(f)
+    else:
+        all_results = pd.DataFrame()
+
+    dtypes = [np.dtype(args.dtype)]
+
+    for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
+        ("multiscale_basic_features", dict(edges=True), dict(texture=[True, False]), True, True),
+        ("canny", dict(sigma=1.8), dict(), False, False),
+        # reduced default rings, histograms, orientations to fit daisy at (3840, 2160) into GPU memory
+        (
+            "daisy",
+            dict(step=4, radius=15, rings=2, histograms=5, orientations=4),
+            dict(normalization=["l1", "l2", "daisy"]),
+            False,
+            False,
+        ),
+        ("structure_tensor", dict(sigma=1, mode="reflect", order="rc"), dict(), False, True),
+        ("hessian_matrix", dict(sigma=1, mode="reflect", order="rc"), dict(), False, True),
+        ("hessian_matrix_det", dict(sigma=1, approximate=False), dict(), False, True),
+        ("shape_index", dict(sigma=1, mode="reflect"), dict(), False, False),
+        ("corner_kitchen_rosenfeld", dict(mode="reflect"), dict(), False, False),
+        ("corner_harris", dict(k=0.05, eps=1e-6, sigma=1), dict(method=["k", "eps"]), False, False),
+        ("corner_shi_tomasi", dict(sigma=1), dict(), False, False),
+        ("corner_foerstner", dict(sigma=1), dict(), False, False),
+        ("corner_peaks", dict(), dict(min_distance=(2, 3, 5)), False, True),
+        ("match_template", dict(), dict(pad_input=[False], mode=["reflect"]), False, True)
+    ]:
+
+        if function_name == args.func_name:
+            shape = tuple(list(map(int,(args.img_size.split(',')))))
+        else:
             continue
 
-        if function_name == "multiscale_basic_features":
-            fixed_kwargs["channel_axis"] = -1 if shape[-1] == 3 else None
-            if ndim == 3 and shape[-1] != 3:
-                # Omit texture=True case to avoid excessive GPU memory usage
-                var_kwargs["texture"] = [False]
-
-        B = ImageBench(
-            function_name=function_name,
-            shape=shape,
-            dtypes=dtypes,
-            fixed_kwargs=fixed_kwargs,
-            var_kwargs=var_kwargs,
-            module_cpu=skimage.feature,
-            module_gpu=cucim.skimage.feature,
-        )
-        results = B.run_benchmark(duration=1)
-        all_results = pd.concat([all_results, results["full"]])
-
-
-for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
-    ("match_template", dict(), dict(pad_input=[False], mode=["reflect"]), False, True),
-]:
-    for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]:
-
+        #if function_name in ["corner_peaks", "peak_local_max"] and np.prod(shape) > 1000000:
+            # skip any large sizes that take too long
         ndim = len(shape)
-        if not allow_nd:
-            if allow_color:
-                if ndim > 2:
-                    continue
-            else:
-                if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]):
-                    continue
-        if shape[-1] == 3 and not allow_color:
-            continue
-
-        B = MatchTemplateBench(
-            function_name=function_name,
-            shape=shape,
-            dtypes=dtypes,
-            fixed_kwargs=fixed_kwargs,
-            var_kwargs=var_kwargs,
-            module_cpu=skimage.feature,
-            module_gpu=cucim.skimage.feature,
-        )
-        results = B.run_benchmark(duration=1)
+        run_cpu = not args.no_cpu
+
+        if function_name != "match_template":
+            if not allow_nd:
+                if not allow_color:
+                    if ndim > 2:
+                        continue
+                else:
+                    if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]):
+                        continue
+
+            if shape[-1] == 3 and not allow_color:
+                continue
+
+            if function_name == "multiscale_basic_features":
+                fixed_kwargs["channel_axis"] = -1 if shape[-1] == 3 else None
+                if ndim == 3 and shape[-1] != 3:
+                    # Omit texture=True case to avoid excessive GPU memory usage
+                    var_kwargs["texture"] = [False]
+
+            B = ImageBench(
+                function_name=function_name,
+                shape=shape,
+                dtypes=dtypes,
+                fixed_kwargs=fixed_kwargs,
+                var_kwargs=var_kwargs,
+                module_cpu=skimage.feature,
+                module_gpu=cucim.skimage.feature,
+                run_cpu=run_cpu,
+            )
+        else:
+            if not allow_nd:
+                if allow_color:
+                    if ndim > 2:
+                        continue
+                else:
+                    if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]):
+                        continue
+            if shape[-1] == 3 and not allow_color:
+                continue
+
+            B = MatchTemplateBench(
+                function_name=function_name,
+                shape=shape,
+                dtypes=dtypes,
+                fixed_kwargs=fixed_kwargs,
+                var_kwargs=var_kwargs,
+                module_cpu=skimage.feature,
+                module_gpu=cucim.skimage.feature,
+                run_cpu=run_cpu,
+            )
+
+        results = B.run_benchmark(duration=args.duration)
         all_results = pd.concat([all_results, results["full"]])
 
-fbase = os.path.splitext(pfile)[0]
-all_results.to_csv(fbase + ".csv")
-all_results.to_pickle(pfile)
-with open(fbase + ".md", "wt") as f:
-    f.write(all_results.to_markdown())
+    fbase = os.path.splitext(pfile)[0]
+    all_results.to_csv(fbase + ".csv")
+    all_results.to_pickle(pfile)
+    try:
+        import tabulate
+
+        with open(fbase + ".md", "wt") as f:
+            f.write(all_results.to_markdown())
+    except ImportError:
+        pass
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Benchmarking cuCIM Feature')
+    func_name_choices = ["multiscale_basic_features","canny","daisy","structure_tensor","hessian_matrix","hessian_matrix_det","shape_index","corner_kitchen_rosenfeld","corner_harris","corner_shi_tomasi","corner_foerstner","corner_peaks","match_template"]
+    dtype_choices = ['float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64']
+    parser.add_argument('-i','--img_size', type=str, help='Size of input image', required=True)
+    parser.add_argument('-d','--dtype', type=str, help='Dtype of input image', choices=dtype_choices, required=True)
+    parser.add_argument('-f','--func_name', type=str, help='function to benchmark', choices=func_name_choices, required=True)
+    parser.add_argument('-t','--duration', type=int, help='time to run benchmark', required=True)
+    parser.add_argument('--no_cpu', action='store_true', help='disable cpu measurements', default=False)
+
+    args = parser.parse_args()
+    main(args)