Skip to content

Commit

Permalink
parameterization of filters and features benchmarks (v2) (#322)
Browse files Browse the repository at this point in the history
Update of #278 for branch-22.08 with minor additional fixes. I have tested this locally and it seems to be operating as expected.

Authors:
  - Gregory Lee (https://github.com/grlee77)
  - https://github.com/aasthajh

Approvers:
  - https://github.com/jakirkham

URL: #322
  • Loading branch information
grlee77 authored Jul 27, 2022
1 parent e5006ff commit 3f7069f
Show file tree
Hide file tree
Showing 5 changed files with 289 additions and 205 deletions.
15 changes: 11 additions & 4 deletions benchmarks/skimage/_image_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def __init__(
module_cpu=scipy.ndimage,
module_gpu=cupyx.scipy.ndimage,
function_is_generator=False,
run_cpu=True
):

self.shape = shape
Expand Down Expand Up @@ -67,9 +68,12 @@ def gen_gpu(*args, **kwargs):
self.module_name_cpu = module_cpu.__name__
self.module_name_gpu = module_gpu.__name__

self.run_cpu = run_cpu

def set_args(self, dtype):
if np.dtype(dtype).kind in "iu":
im1 = skimage.data.camera()
im1 = im1.astype(dtype)
else:
im1 = skimage.data.camera() / 255.0
im1 = im1.astype(dtype)
Expand Down Expand Up @@ -166,17 +170,20 @@ def run_benchmark(self, duration=3, verbose=True):
rep_kwargs_gpu = self.get_reps(
self.func_gpu, self.args_gpu, kw_gpu, duration, cpu=False
)
perf = repeat(self.func_cpu, self.args_cpu, kw_cpu, **rep_kwargs_cpu)
print("Number of Repetitions : ", rep_kwargs_gpu)
perf_gpu = repeat(self.func_gpu, self.args_gpu, kw_gpu, **rep_kwargs_gpu)
df.at[index, "GPU accel"] = perf.cpu_times.mean() / perf_gpu.gpu_times.mean()

df.at[index, "shape"] = f"{self.shape}"
# df.at[index, "description"] = index
df.at[index, "function_name"] = self.function_name
df.at[index, "dtype"] = np.dtype(dtype).name
df.at[index, "ndim"] = len(self.shape)

df.at[index, "CPU: host (mean)"] = perf.cpu_times.mean()
df.at[index, "CPU: host (std)"] = perf.cpu_times.std()
if self.run_cpu == True:
perf = repeat(self.func_cpu, self.args_cpu, kw_cpu, **rep_kwargs_cpu)
df.at[index, "GPU accel"] = perf.cpu_times.mean() / perf_gpu.gpu_times.mean()
df.at[index, "CPU: host (mean)"] = perf.cpu_times.mean()
df.at[index, "CPU: host (std)"] = perf.cpu_times.std()

df.at[index, "GPU: host (mean)"] = perf_gpu.cpu_times.mean()
df.at[index, "GPU: host (std)"] = perf_gpu.cpu_times.std()
Expand Down
211 changes: 117 additions & 94 deletions benchmarks/skimage/cucim_feature_bench.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import pickle
import argparse

import cucim.skimage
import cucim.skimage.feature
Expand All @@ -26,102 +27,124 @@ def set_args(self, dtype):
self.args_cpu = (image, template)
self.args_gpu = (imaged, templated)


pfile = "cucim_feature_results.pickle"
if os.path.exists(pfile):
with open(pfile, "rb") as f:
all_results = pickle.load(f)
else:
all_results = pd.DataFrame()
dtypes = [np.float32]

for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
("multiscale_basic_features", dict(edges=True), dict(texture=[True, False]), True, True),
("canny", dict(sigma=1.8), dict(), False, False),
# reduced default rings, histograms, orientations to fit daisy at (3840, 2160) into GPU memory
(
"daisy",
dict(step=4, radius=15, rings=2, histograms=5, orientations=4),
dict(normalization=["l1", "l2", "daisy"]),
False,
False,
),
("structure_tensor", dict(sigma=1, mode="reflect", order="rc"), dict(), False, True),
("hessian_matrix", dict(sigma=1, mode="reflect", order="rc"), dict(), False, True),
("hessian_matrix_det", dict(sigma=1, approximate=False), dict(), False, True),
("shape_index", dict(sigma=1, mode="reflect"), dict(), False, False),
("corner_kitchen_rosenfeld", dict(mode="reflect"), dict(), False, False),
("corner_harris", dict(k=0.05, eps=1e-6, sigma=1), dict(method=["k", "eps"]), False, False),
("corner_shi_tomasi", dict(sigma=1), dict(), False, False),
("corner_foerstner", dict(sigma=1), dict(), False, False),
("corner_peaks", dict(), dict(min_distance=(2, 3, 5)), False, True),
]:

for shape in [(128, 128, 128), (512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]:

if function_name in ["corner_peaks", "peak_local_max"] and np.prod(shape) > 1000000:
# skip any large sizes that take too long
continue
ndim = len(shape)
if not allow_nd:
if not allow_color:
if ndim > 2:
continue
else:
if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]):
continue
if shape[-1] == 3 and not allow_color:
def main(args):

pfile = "cucim_feature_results.pickle"
if os.path.exists(pfile):
with open(pfile, "rb") as f:
all_results = pickle.load(f)
else:
all_results = pd.DataFrame()

dtypes = [np.dtype(args.dtype)]

for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
("multiscale_basic_features", dict(edges=True), dict(texture=[True, False]), True, True),
("canny", dict(sigma=1.8), dict(), False, False),
# reduced default rings, histograms, orientations to fit daisy at (3840, 2160) into GPU memory
(
"daisy",
dict(step=4, radius=15, rings=2, histograms=5, orientations=4),
dict(normalization=["l1", "l2", "daisy"]),
False,
False,
),
("structure_tensor", dict(sigma=1, mode="reflect", order="rc"), dict(), False, True),
("hessian_matrix", dict(sigma=1, mode="reflect", order="rc"), dict(), False, True),
("hessian_matrix_det", dict(sigma=1, approximate=False), dict(), False, True),
("shape_index", dict(sigma=1, mode="reflect"), dict(), False, False),
("corner_kitchen_rosenfeld", dict(mode="reflect"), dict(), False, False),
("corner_harris", dict(k=0.05, eps=1e-6, sigma=1), dict(method=["k", "eps"]), False, False),
("corner_shi_tomasi", dict(sigma=1), dict(), False, False),
("corner_foerstner", dict(sigma=1), dict(), False, False),
("corner_peaks", dict(), dict(min_distance=(2, 3, 5)), False, True),
("match_template", dict(), dict(pad_input=[False], mode=["reflect"]), False, True)
]:

if function_name == args.func_name:
shape = tuple(list(map(int,(args.img_size.split(',')))))
else:
continue

if function_name == "multiscale_basic_features":
fixed_kwargs["channel_axis"] = -1 if shape[-1] == 3 else None
if ndim == 3 and shape[-1] != 3:
# Omit texture=True case to avoid excessive GPU memory usage
var_kwargs["texture"] = [False]

B = ImageBench(
function_name=function_name,
shape=shape,
dtypes=dtypes,
fixed_kwargs=fixed_kwargs,
var_kwargs=var_kwargs,
module_cpu=skimage.feature,
module_gpu=cucim.skimage.feature,
)
results = B.run_benchmark(duration=1)
all_results = pd.concat([all_results, results["full"]])


for function_name, fixed_kwargs, var_kwargs, allow_color, allow_nd in [
("match_template", dict(), dict(pad_input=[False], mode=["reflect"]), False, True),
]:
for shape in [(512, 512), (3840, 2160), (3840, 2160, 3), (192, 192, 192)]:

#if function_name in ["corner_peaks", "peak_local_max"] and np.prod(shape) > 1000000:
# skip any large sizes that take too long
ndim = len(shape)
if not allow_nd:
if allow_color:
if ndim > 2:
continue
else:
if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]):
continue
if shape[-1] == 3 and not allow_color:
continue

B = MatchTemplateBench(
function_name=function_name,
shape=shape,
dtypes=dtypes,
fixed_kwargs=fixed_kwargs,
var_kwargs=var_kwargs,
module_cpu=skimage.feature,
module_gpu=cucim.skimage.feature,
)
results = B.run_benchmark(duration=1)
run_cpu = not args.no_cpu

if function_name != "match_template":
if not allow_nd:
if not allow_color:
if ndim > 2:
continue
else:
if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]):
continue

if shape[-1] == 3 and not allow_color:
continue

if function_name == "multiscale_basic_features":
fixed_kwargs["channel_axis"] = -1 if shape[-1] == 3 else None
if ndim == 3 and shape[-1] != 3:
# Omit texture=True case to avoid excessive GPU memory usage
var_kwargs["texture"] = [False]

B = ImageBench(
function_name=function_name,
shape=shape,
dtypes=dtypes,
fixed_kwargs=fixed_kwargs,
var_kwargs=var_kwargs,
module_cpu=skimage.feature,
module_gpu=cucim.skimage.feature,
run_cpu=run_cpu,
)
else:
if not allow_nd:
if allow_color:
if ndim > 2:
continue
else:
if ndim > 3 or (ndim == 3 and shape[-1] not in [3, 4]):
continue
if shape[-1] == 3 and not allow_color:
continue

B = MatchTemplateBench(
function_name=function_name,
shape=shape,
dtypes=dtypes,
fixed_kwargs=fixed_kwargs,
var_kwargs=var_kwargs,
module_cpu=skimage.feature,
module_gpu=cucim.skimage.feature,
run_cpu=run_cpu,
)

results = B.run_benchmark(duration=args.duration)
all_results = pd.concat([all_results, results["full"]])

fbase = os.path.splitext(pfile)[0]
all_results.to_csv(fbase + ".csv")
all_results.to_pickle(pfile)
with open(fbase + ".md", "wt") as f:
f.write(all_results.to_markdown())
fbase = os.path.splitext(pfile)[0]
all_results.to_csv(fbase + ".csv")
all_results.to_pickle(pfile)
try:
import tabulate

with open(fbase + ".md", "wt") as f:
f.write(all_results.to_markdown())
except ImportError:
pass


if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Benchmarking cuCIM Feature')
func_name_choices = ["multiscale_basic_features","canny","daisy","structure_tensor","hessian_matrix","hessian_matrix_det","shape_index","corner_kitchen_rosenfeld","corner_harris","corner_shi_tomasi","corner_foerstner","corner_peaks","match_template"]
dtype_choices = ['float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64']
parser.add_argument('-i','--img_size', type=str, help='Size of input image', required=True)
parser.add_argument('-d','--dtype', type=str, help='Dtype of input image', choices=dtype_choices, required=True)
parser.add_argument('-f','--func_name', type=str, help='function to benchmark', choices=func_name_choices, required=True)
parser.add_argument('-t','--duration', type=int, help='time to run benchmark', required=True)
parser.add_argument('--no_cpu', action='store_true', help='disable cpu measurements', default=False)

args = parser.parse_args()
main(args)
Loading

0 comments on commit 3f7069f

Please sign in to comment.