Skip to content

Commit

Permalink
[OpenCL] Implement conv2d_winograd algorithm for Adreno (apache#11543)
Browse files Browse the repository at this point in the history
* Implement conv2d_winograd algorithm for Adreno

* Implement gtest for OpenCL texture pool

* Implement conv2d_nhwc_winograd for Adreno

* Minor refactoring

* Fix lint

* Apply comments

* Apply comments

* Fix lint
  • Loading branch information
echuraev committed Jun 17, 2022
1 parent 2a03cef commit f9dd389
Show file tree
Hide file tree
Showing 20 changed files with 1,597 additions and 101 deletions.
16 changes: 16 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ endif()
# Alernatively, use cmake -DOPTION=VALUE through command-line.
tvm_option(USE_CUDA "Build with CUDA" OFF)
tvm_option(USE_OPENCL "Build with OpenCL" OFF)
tvm_option(USE_OPENCL_GTEST "Path to OpenCL specific gtest version for runtime cpp tests." /path/to/opencl/gtest)
tvm_option(USE_VULKAN "Build with Vulkan" OFF)


Expand Down Expand Up @@ -609,6 +610,18 @@ if(BUILD_FOR_HEXAGON AND DEFINED USE_HEXAGON_GTEST AND EXISTS ${USE_HEXAGON_GTES
include_directories("${USE_HEXAGON_GTEST}/include")
endif()

if(USE_OPENCL AND DEFINED USE_OPENCL_GTEST AND EXISTS ${USE_OPENCL_GTEST})
include(FetchContent)
FetchContent_Declare(googletest SOURCE_DIR "${USE_OPENCL_GTEST}")
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(googletest)
target_link_libraries(tvm_runtime PUBLIC gtest)
target_link_libraries(tvm PUBLIC gtest)
include_directories("${USE_OPENCL_GTEST}/include")
include_directories("${USE_OPENCL_GTEST}/googletest/include")
message(STATUS "Found OpenCL gtest at ${USE_OPENCL_GTEST}")
endif()

# Set flags for clang
include(cmake/modules/ClangFlags.cmake)
set(CRC16_INCLUDE_PATH "3rdparty/libcrc/include")
Expand Down Expand Up @@ -668,6 +681,9 @@ install(TARGETS tvm_runtime EXPORT ${PROJECT_NAME}Targets DESTINATION lib${LIB_S
if(BUILD_FOR_HEXAGON AND DEFINED USE_HEXAGON_GTEST AND EXISTS ${USE_HEXAGON_GTEST})
install(TARGETS gtest EXPORT ${PROJECT_NAME}Targets DESTINATION lib${LIB_SUFFIX})
endif()
if(USE_OPENCL AND DEFINED USE_OPENCL_GTEST AND EXISTS ${USE_OPENCL_GTEST})
install(TARGETS gtest EXPORT ${PROJECT_NAME}Targets DESTINATION lib${LIB_SUFFIX})
endif()

if (INSTALL_DEV)
install(
Expand Down
1 change: 1 addition & 0 deletions cmake/modules/LibInfo.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ function(add_lib_info src_file)
TVM_INFO_USE_MSVC_MT="${USE_MSVC_MT}"
TVM_INFO_USE_NNPACK="${USE_NNPACK}"
TVM_INFO_USE_OPENCL="${USE_OPENCL}"
TVM_INFO_USE_OPENCL_GTEST="${USE_OPENCL_GTEST}"
TVM_INFO_USE_OPENMP="${USE_OPENMP}"
TVM_INFO_USE_PAPI="${USE_PAPI}"
TVM_INFO_USE_PROFILER="${USE_PROFILER}"
Expand Down
6 changes: 6 additions & 0 deletions cmake/modules/OpenCL.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ if(USE_OPENCL)
message(STATUS "Build with OpenCL support")
tvm_file_glob(GLOB RUNTIME_OPENCL_SRCS src/runtime/opencl/*.cc)
list(APPEND TVM_RUNTIME_LINKER_LIBS ${OpenCL_LIBRARIES})

if(DEFINED USE_OPENCL_GTEST AND EXISTS ${USE_OPENCL_GTEST})
file_glob_append(RUNTIME_OPENCL_SRCS
"${CMAKE_SOURCE_DIR}/tests/cpp-runtime/opencl/*.cc"
)
endif()
list(APPEND RUNTIME_SRCS ${RUNTIME_OPENCL_SRCS})
else()
list(APPEND COMPILER_SRCS src/target/opt/build_opencl_off.cc)
Expand Down
99 changes: 98 additions & 1 deletion python/tvm/relay/op/strategy/adreno.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def conv2d_strategy_adreno(attrs, inputs, out_type, target):
strategy = _op.OpStrategy()
data, kernel = inputs
dilation_h, dilation_w = attrs.get_int_tuple("dilation")
stride_h, stride_w = attrs.get_int_tuple("strides")
groups = attrs.groups
data_layout = attrs.data_layout
kernel_layout = attrs.kernel_layout
Expand All @@ -38,6 +39,28 @@ def conv2d_strategy_adreno(attrs, inputs, out_type, target):
if (data_layout == "NCHW" and kernel_layout == "OIHW") or (
data_layout == "NCHW4c" and kernel_layout == "OIHW4o"
):
if len(kernel.shape) == 4:
_, _, kh, kw = get_const_tuple(kernel.shape)
else:
_, _, kh, kw, _ = get_const_tuple(kernel.shape)
if (
(2 < kh < 8 and 2 < kw < 8 and kh == kw)
and (stride_h == 1 and stride_w == 1)
and (dilation_h == 1 and dilation_w == 1)
):
if out_type.dtype == "float16":
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.conv2d_nchw_winograd),
wrap_topi_schedule(topi.adreno.schedule_conv2d_nchw_winograd),
name="conv2d_nchw_winograd.image2d",
plevel=25,
)
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.conv2d_nchw_winograd_acc32),
wrap_topi_schedule(topi.adreno.schedule_conv2d_nchw_winograd_acc32),
name="conv2d_nchw_winograd_acc32.image2d",
plevel=30,
)
if out_type.dtype == "float16":
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.conv2d_nchwc),
Expand All @@ -48,12 +71,34 @@ def conv2d_strategy_adreno(attrs, inputs, out_type, target):
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.conv2d_nchwc_acc32),
wrap_topi_schedule(topi.adreno.schedule_conv2d_nchwc_acc32),
name="conv2d_nchwc_tpack.image2d",
name="conv2d_nchwc_acc32.image2d",
plevel=20,
)
elif (data_layout == "NHWC" and kernel_layout == "HWIO") or (
data_layout == "NHWC4c" and kernel_layout == "HWIO4o"
):
if len(kernel.shape) == 4:
kh, kw, _, _ = get_const_tuple(kernel.shape)
else:
kh, kw, _, _, _ = get_const_tuple(kernel.shape)
if (
(2 < kh < 8 and 2 < kw < 8 and kh == kw)
and (stride_h == 1 and stride_w == 1)
and (dilation_h == 1 and dilation_w == 1)
):
if out_type.dtype == "float16":
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.conv2d_nhwc_winograd),
wrap_topi_schedule(topi.adreno.schedule_conv2d_nhwc_winograd),
name="conv2d_nhwc_winograd.image2d",
plevel=25,
)
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.conv2d_nhwc_winograd_acc32),
wrap_topi_schedule(topi.adreno.schedule_conv2d_nhwc_winograd_acc32),
name="conv2d_nhwc_winograd_acc32.image2d",
plevel=30,
)
if out_type.dtype == "float16":
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.conv2d_nhwc),
Expand Down Expand Up @@ -153,6 +198,58 @@ def conv2d_strategy_adreno(attrs, inputs, out_type, target):
return strategy


@conv2d_winograd_without_weight_transfrom_strategy.register("adreno")
def conv2d_winograd_without_weight_transfrom_strategy_adreno(attrs, inputs, out_type, target):
"""conv2d_winograd_without_weight_transfrom adreno strategy"""
dilation = attrs.get_int_tuple("dilation")
groups = attrs.get_int("groups")
layout = attrs.data_layout
assert dilation == (1, 1), "Do not support dilate now"
assert groups == 1, "Do not supoort arbitrary group number"
strategy = _op.OpStrategy()
if layout in ("NCHW", "NCHW4c"):
if out_type.dtype == "float16":
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.conv2d_nchw_winograd_without_weight_transform),
wrap_topi_schedule(
topi.adreno.schedule_conv2d_nchw_winograd_without_weight_transform
),
name="conv2d_nchw_winograd_without_weight_transform.image2d",
plevel=35,
)
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.conv2d_nchw_winograd_without_weight_transform_acc32),
wrap_topi_schedule(
topi.adreno.schedule_conv2d_nchw_winograd_without_weight_transform_acc32
),
name="conv2d_nchw_winograd_without_weight_transform_acc32.image2d",
plevel=40,
)
elif layout in ("NHWC", "NHWC4c"):
if out_type.dtype == "float16":
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.conv2d_nhwc_winograd_without_weight_transform),
wrap_topi_schedule(
topi.adreno.schedule_conv2d_nhwc_winograd_without_weight_transform
),
name="conv2d_nhwc_winograd_without_weight_transform.image2d",
plevel=35,
)
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.conv2d_nhwc_winograd_without_weight_transform_acc32),
wrap_topi_schedule(
topi.adreno.schedule_conv2d_nhwc_winograd_without_weight_transform_acc32
),
name="conv2d_nhwc_winograd_without_weight_transform_acc32.image2d",
plevel=40,
)
else:
raise RuntimeError(
"Unsupported conv2d_winograd_without_weight_transfrom layout {}".format(layout)
)
return strategy


@schedule_pool.register("adreno")
def schedule_pool_adreno(attrs, outs, target):
"""schedule pooling ops for adreno"""
Expand Down
2 changes: 2 additions & 0 deletions python/tvm/topi/adreno/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,5 @@
from .pooling import *
from .conv2d_alter_op import *
from .injective import schedule_injective
from .conv2d_nchw_winograd import *
from .conv2d_nhwc_winograd import *
Loading

0 comments on commit f9dd389

Please sign in to comment.