conv2d fusion test worked

masahi · Dec 12, 2021 · 3705bbd · 3705bbd
1 parent 05b51c9
commit 3705bbd
Show file tree

Hide file tree

Showing 2 changed files with 48 additions and 18 deletions.
diff --git a/src/relay/backend/contrib/cutlass/codegen.cc b/src/relay/backend/contrib/cutlass/codegen.cc
@@ -339,7 +339,7 @@ std::string Conv2dOp(std::string id, const Str2StrMap& attrs,
   }
   CutlassPrint(conv2d_decl, " {static_cast<ElementOutput*>(ptr_out),layout_C},\n");
   if (has_bias) {
-    CutlassPrint(conv2d_decl, " {alpha},\n");
+    CutlassPrint(conv2d_decl, " {alpha}\n};\n");
   } else {
     CutlassPrint(conv2d_decl, "{alpha, beta}\n};\n");
   }

diff --git a/tests/python/contrib/test_cutlass.py b/tests/python/contrib/test_cutlass.py
@@ -139,8 +139,12 @@ def profile_and_build(mod, params, sm, tmp_dir="./tmp", lib_path="compile.so"):
     mod, num_cutlass_partition = tune_cutlass_kernels(
         mod, sm, profile_all=False, use_multiprocessing=False, tmp_dir=tmp_dir
     )
-    print(mod)
-    return
+    with tvm.transform.PassContext(opt_level=3):
+        lib = relay.build(mod, target="cuda", params=params)
+    lib = build_cutlass_kernels(lib, sm, tmp_dir, lib_path)
+    dev = tvm.device("cuda", 0)
+    rt_mod = tvm.contrib.graph_executor.GraphModule(lib["default"](dev))
+    return rt_mod, dev, num_cutlass_partition
 
 
 def profile_and_build_vm(
@@ -328,8 +332,8 @@ def verify_conv2d(
     use_cudnn_ref=False,
     run_benchmark=False,
 ):
-    # if not has_cutlass():
-    #     return
+    if not has_cutlass():
+        return
 
     mod_nchw = tvm.IRModule.from_expr(expr_nchw)
     mod_ref = tvm.IRModule.from_expr(expr_ref)
@@ -343,15 +347,44 @@ def verify_conv2d(
 
     params = {"weight": np_weight, "bias": np_bias}
 
+    typ = relay.transform.InferType()(mod_nchw)["main"].body.checked_type
     use_vm = any(isinstance(s, tvm.tir.Any) for s in typ.shape)
 
     mod_weight_ohwi = convert_conv2d_layout(mod_nchw, {"nn.conv2d": ["NHWC", "OHWI"]})
 
-    profile_and_build(
-        mod_weight_ohwi,
-        params,
-        sm,
-    )
+    if use_vm:
+        rt_mod, _, num_cutlass_partition = profile_and_build_vm(mod_weight_ohwi, params, sm)
+        out = get_output_vm(rt_mod, ["data"], [np_data])
+    else:
+        rt_mod, _, num_cutlass_partition = profile_and_build(
+            mod_weight_ohwi,
+            params,
+            sm,
+        )
+        out = get_output(rt_mod, ["data"], [np_data])
+
+    assert num_cutlass_partition > 0
+
+    if use_cudnn_ref:
+        rt_mod_ref, dev = get_ref_rt_mod(
+            convert_conv2d_layout(mod_ref, {"nn.conv2d": ["NHWC", "OHWI"]}),
+            params,
+            target="cuda -libs=cudnn",
+        )
+    else:
+        rt_mod_ref, dev = get_ref_rt_mod(
+            convert_conv2d_layout(mod_ref, {"nn.conv2d": ["NHWC", "HWIO"]}),
+            params,
+            target="cuda",
+        )
+
+    ref_out = get_output(rt_mod_ref, ["data"], [np_data])
+
+    if run_benchmark:
+        print("CUTLASS:", rt_mod.benchmark(dev, number=1, repeat=600))
+        print("TVM Tensorcore (no tuning):", rt_mod_ref.benchmark(dev, number=1, repeat=600))
+
+    np.testing.assert_allclose(out, ref_out, atol=atol, rtol=rtol)
 
 
 def test_conv2d():
@@ -385,24 +418,21 @@ def test_conv2d():
     )
 
 
-d_shape = (16, 16, 32, 32)
-w_shape = (32, 16, 3, 3)
-padding = (1, 1)
-
+def test_conv2d_fusion():
+    d_shape = (16, 16, 32, 32)
+    w_shape = (32, 16, 3, 3)
+    padding = (1, 1)
 
-def test_conv2d_bias():
     mod_nchw = get_conv2d_nchw_bias(d_shape, w_shape, padding)
     verify_conv2d(
         mod_nchw, mod_nchw, d_shape, w_shape, sm=80, atol=1e-5, rtol=1e-5, run_benchmark=False
     )
 
-
-def test_conv2d_bias_relu():
     mod_nchw = get_conv2d_nchw_bias_relu(d_shape, w_shape, padding)
     verify_conv2d(
         mod_nchw, mod_nchw, d_shape, w_shape, sm=80, atol=1e-5, rtol=1e-5, run_benchmark=False
     )
 
 
 if __name__ == "__main__":
-    test_conv2d_bias_relu()
+    test_conv2d_fusion()