Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RUNTIME][OPENCL] OpenCL host pointer support to acheive zero copy #13413

Merged
merged 2 commits into from
Dec 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ endif()
# Alernatively, use cmake -DOPTION=VALUE through command-line.
tvm_option(USE_CUDA "Build with CUDA" OFF)
tvm_option(USE_OPENCL "Build with OpenCL" OFF)
tvm_option(USE_OPENCL_ENABLE_HOST_PTR "Enable OpenCL memory object access to host" OFF)
tvm_option(USE_OPENCL_GTEST "Path to OpenCL specific gtest version for runtime cpp tests." /path/to/opencl/gtest)
tvm_option(USE_VULKAN "Build with Vulkan" OFF)

Expand Down
5 changes: 5 additions & 0 deletions cmake/config.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ set(USE_AOCL OFF)
# - /path/to/opencl-sdk: use specific path to opencl-sdk
set(USE_OPENCL OFF)

# Wheather to allow OPENCL cl_mem access to host
# cl_mem will be allocated with CL_MEM_ALLOC_HOST_PTR
# OpenCLWorkspace->GetHostPtr API returns the host accessible pointer
set(USE_OPENCL_ENABLE_HOST_PTR OFF)

# Whether enable Metal runtime
set(USE_METAL OFF)

Expand Down
1 change: 1 addition & 0 deletions cmake/modules/LibInfo.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ function(add_lib_info src_file)
TVM_INFO_USE_MSVC_MT="${USE_MSVC_MT}"
TVM_INFO_USE_NNPACK="${USE_NNPACK}"
TVM_INFO_USE_OPENCL="${USE_OPENCL}"
TVM_INFO_USE_OPENCL_ENABLE_HOST_PTR="${USE_OPENCL_ENABLE_HOST_PTR}"
TVM_INFO_USE_OPENCL_GTEST="${USE_OPENCL_GTEST}"
TVM_INFO_USE_OPENMP="${USE_OPENMP}"
TVM_INFO_USE_PAPI="${USE_PAPI}"
Expand Down
3 changes: 3 additions & 0 deletions cmake/modules/OpenCL.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ if(USE_OPENCL)
target_link_libraries(opencl-cpptest PRIVATE gtest_main tvm_runtime)
endif()
list(APPEND RUNTIME_SRCS ${RUNTIME_OPENCL_SRCS})
if(USE_OPENCL_ENABLE_HOST_PTR)
add_definitions(-DOPENCL_ENABLE_HOST_PTR)
endif(USE_OPENCL_ENABLE_HOST_PTR)
else()
list(APPEND COMPILER_SRCS src/target/opt/build_opencl_off.cc)
endif(USE_OPENCL)
4 changes: 3 additions & 1 deletion cmake/modules/contrib/CLML.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -54,5 +54,7 @@ if(USE_CLML_GRAPH_EXECUTOR)
file(GLOB RUNTIME_OPENCL_SRCS src/runtime/opencl/*.cc)
list(APPEND RUNTIME_SRCS ${RUNTIME_OPENCL_SRCS})
set(USE_OPENCL ON)

if(USE_OPENCL_ENABLE_HOST_PTR)
add_definitions(-DOPENCL_ENABLE_HOST_PTR)
endif(USE_OPENCL_ENABLE_HOST_PTR)
endif()
5 changes: 5 additions & 0 deletions src/runtime/opencl/opencl_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ inline cl_channel_type DTypeToOpenCLChannelType(DLDataType data_type) {
}

class OpenCLThreadEntry;
struct BufferDescriptor;

/*!
* \brief Process global OpenCL workspace.
Expand Down Expand Up @@ -290,6 +291,7 @@ class OpenCLWorkspace : public DeviceAPI {
void* AllocDataSpace(Device dev, size_t size, size_t alignment, DLDataType type_hint) final;
void* AllocDataSpace(Device dev, int ndim, const int64_t* shape, DLDataType dtype,
Optional<String> mem_scope = NullOpt) final;
void* GetNativePtr(const tvm::runtime::NDArray& narr);
void FreeDataSpace(Device dev, void* ptr) final;
void StreamSync(Device dev, TVMStreamHandle stream) final;
void* AllocWorkspace(Device dev, size_t size, DLDataType type_hint) final;
Expand All @@ -310,6 +312,8 @@ class OpenCLWorkspace : public DeviceAPI {

void CopyDataFromTo(DLTensor* from, DLTensor* to, TVMStreamHandle stream) final;

void* CreateHostPtrIfEnabled(BufferDescriptor* desc, Device dev, size_t size);

private:
std::string GetError() {
if (this->devices.size() == 0) return noDevicesErrorMsg;
Expand Down Expand Up @@ -377,6 +381,7 @@ struct BufferDescriptor {
static String ScopeFromMemoryLayout(MemoryLayout mem_scope);

cl_mem buffer{nullptr};
cl_uchar* host_ptr{nullptr};
MemoryLayout layout{MemoryLayout::kBuffer1D};
};
} // namespace cl
Expand Down
32 changes: 29 additions & 3 deletions src/runtime/opencl/opencl_device_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@

#include "opencl_common.h"

#ifdef OPENCL_ENABLE_HOST_PTR
#define CL_MEM_CREATE_FLAGS CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR
#else
#define CL_MEM_CREATE_FLAGS CL_MEM_READ_WRITE
#endif

namespace tvm {
namespace runtime {
namespace cl {
Expand Down Expand Up @@ -191,6 +197,17 @@ void OpenCLWorkspace::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv)
}
}

void* OpenCLWorkspace::CreateHostPtrIfEnabled(cl::BufferDescriptor* desc, Device dev, size_t size) {
#if defined(OPENCL_ENABLE_HOST_PTR)
cl_int err_code;
desc->host_ptr = reinterpret_cast<cl_uchar*>(
clEnqueueMapBuffer(this->GetQueue(dev), desc->buffer, CL_TRUE, CL_MAP_WRITE, 0,
sizeof(cl_uchar) * size, 0, NULL, NULL, &err_code));
OPENCL_CHECK_ERROR(err_code);
#endif // OPENCL_ENABLE_HOST_PTR
return desc;
}

void* OpenCLWorkspace::AllocDataSpace(Device dev, size_t size, size_t alignment,
DLDataType type_hint) {
this->Init();
Expand All @@ -201,10 +218,10 @@ void* OpenCLWorkspace::AllocDataSpace(Device dev, size_t size, size_t alignment,
if (size == 0) {
size = 1;
}
desc->buffer = clCreateBuffer(this->context, CL_MEM_READ_WRITE, size, nullptr, &err_code);
desc->buffer = clCreateBuffer(this->context, CL_MEM_CREATE_FLAGS, size, nullptr, &err_code);
desc->layout = cl::BufferDescriptor::MemoryLayout::kBuffer1D;
OPENCL_CHECK_ERROR(err_code);
return desc;
return CreateHostPtrIfEnabled(desc, dev, size);
}

void* OpenCLWorkspace::AllocDataSpace(Device dev, int ndim, const int64_t* shape, DLDataType dtype,
Expand All @@ -226,12 +243,21 @@ void* OpenCLWorkspace::AllocDataSpace(Device dev, int ndim, const int64_t* shape
return desc;
}

void* OpenCLWorkspace::GetNativePtr(const tvm::runtime::NDArray& narr) {
cl::BufferDescriptor* desc = static_cast<cl::BufferDescriptor*>(narr.operator->()->data);
return desc->host_ptr;
}

void OpenCLWorkspace::FreeDataSpace(Device dev, void* ptr) {
// We have to make sure that the memory object is not in the command queue
// for some OpenCL platforms.
OPENCL_CALL(clFinish(this->GetQueue(dev)));

cl::BufferDescriptor* desc = static_cast<cl::BufferDescriptor*>(ptr);
if (desc->host_ptr) {
clEnqueueUnmapMemObject(this->GetQueue(dev), desc->buffer,
reinterpret_cast<void*>(desc->host_ptr), 0, NULL, NULL);
}
OPENCL_CALL(clReleaseMemObject(desc->buffer));
delete desc;
}
Expand All @@ -245,7 +271,7 @@ cl_mem OpenCLWorkspace::AllocTexture(Device dev, size_t width, size_t height,
cl_image_format format = {CL_RGBA, cl_type};
cl_image_desc descriptor = {CL_MEM_OBJECT_IMAGE2D, width, height, 0, 0, 0, 0, 0, 0};
cl_mem mptr =
clCreateImage(this->context, CL_MEM_READ_WRITE, &format, &descriptor, nullptr, &err_code);
clCreateImage(this->context, CL_MEM_CREATE_FLAGS, &format, &descriptor, nullptr, &err_code);
OPENCL_CHECK_ERROR(err_code);
return mptr;
}
Expand Down
31 changes: 31 additions & 0 deletions src/runtime/opencl/opencl_wrapper/opencl_wrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,11 @@ using f_clEnqueueNDRangeKernel = cl_int (*)(cl_command_queue, cl_kernel, cl_uint
cl_event*);
using f_clCreateCommandQueue = cl_command_queue (*)(cl_context, cl_device_id,
cl_command_queue_properties, cl_int*);
using f_clEnqueueUnmapMemObject = cl_int (*)(cl_command_queue, cl_mem, void*, cl_uint,
const cl_event*, cl_event*);
using f_clEnqueueMapBuffer = void* (*)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, size_t,
size_t, cl_uint, const cl_event*, cl_event*, cl_int*);

} // namespace

cl_int clGetPlatformIDs(cl_uint num_entries, cl_platform_id* platforms, cl_uint* num_platforms) {
Expand Down Expand Up @@ -572,3 +577,29 @@ cl_command_queue clCreateCommandQueue(cl_context context, cl_device_id device,
return nullptr;
}
}

cl_int clEnqueueUnmapMemObject(cl_command_queue queue, cl_mem memobj, void* mapped_ptr,
cl_uint num_events_in_wait_list, const cl_event* event_wait_list,
cl_event* event) {
auto& lib = LibOpenCLWrapper::getInstance();
auto func = (f_clEnqueueUnmapMemObject)lib.getOpenCLFunction("clEnqueueUnmapMemObject");
if (func) {
return func(queue, memobj, mapped_ptr, num_events_in_wait_list, event_wait_list, event);
} else {
return CL_INVALID_PLATFORM;
}
}

void* clEnqueueMapBuffer(cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map,
cl_map_flags map_flags, size_t offset, size_t cb,
cl_uint num_events_in_wait_list, const cl_event* event_wait_list,
cl_event* event, cl_int* errcode_ret) {
auto& lib = LibOpenCLWrapper::getInstance();
auto func = (f_clEnqueueMapBuffer)lib.getOpenCLFunction("clEnqueueMapBuffer");
if (func) {
return func(command_queue, buffer, blocking_map, map_flags, offset, cb, num_events_in_wait_list,
event_wait_list, event, errcode_ret);
} else {
return nullptr;
}
}
5 changes: 5 additions & 0 deletions src/support/libinfo.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@
#define TVM_INFO_USE_OPENCL "NOT-FOUND"
#endif

#ifndef TVM_INFO_USE_OPENCL_ENABLE_HOST_PTR
#define TVM_INFO_USE_OPENCL_ENABLE_HOST_PTR "NOT-FOUND"
#endif

#ifndef TVM_INFO_USE_OPENCL_GTEST
#define TVM_INFO_USE_OPENCL_GTEST "NOT-FOUND"
#endif
Expand Down Expand Up @@ -294,6 +298,7 @@ TVM_DLL Map<String, String> GetLibInfo() {
{"USE_MSVC_MT", TVM_INFO_USE_MSVC_MT},
{"USE_NNPACK", TVM_INFO_USE_NNPACK},
{"USE_OPENCL", TVM_INFO_USE_OPENCL},
{"USE_OPENCL_ENABLE_HOST_PTR", TVM_INFO_USE_OPENCL_ENABLE_HOST_PTR},
{"USE_OPENCL_GTEST", TVM_INFO_USE_OPENCL_GTEST},
{"USE_OPENMP", TVM_INFO_USE_OPENMP},
{"USE_PAPI", TVM_INFO_USE_PAPI},
Expand Down
36 changes: 36 additions & 0 deletions tests/cpp-runtime/opencl/opencl_nativeptr.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include <gtest/gtest.h>
#include <tvm/runtime/container/optional.h>

#include "../src/runtime/opencl/opencl_common.h"

using namespace tvm::runtime;
using namespace tvm::runtime::cl;

#if defined(OPENCL_ENABLE_HOST_PTR)
TEST(OpenCLNDArray, native_ptr) {
OpenCLWorkspace* workspace = OpenCLWorkspace::Global();

auto A = tvm::runtime::NDArray::Empty({128, 128}, {kDLFloat, 32, 1}, {kDLOpenCL, 0});
void* nptr = workspace->GetNativePtr(A);
memset(nptr, 0x0, 128 * 128 * 4);
}
#endif