Skip to content

Commit

Permalink
[Runtime] Driver version + consistent clock speed units (#7867)
Browse files Browse the repository at this point in the history
* Added kDriverVersion to DeviceAttrKind, implemented for VulkanDeviceAPI.

The vulkan backend has had inconsistencies that look correlated to
drivers used.  This will help in collecting information for
troubleshooting.

* Changed units for OpenCL's clock rate from MHz to kHz, to match Cuda/ROCm.

* [Docs][Runtime] Additional documentation for tvm.runtime.Device, DeviceAPI feature matching

Primarily documentation, with some changes to the OpenCL DeviceAPI to
match available features in cuda/vulkan.

* Added CL_TARGET_OPENCL_VERSION definition, for use with unified OpenCL headers.

Co-authored-by: Eric Lunderberg <[email protected]>
  • Loading branch information
Lunderberg and Lunderberg authored Apr 22, 2021
1 parent 1c71a06 commit 46e0634
Show file tree
Hide file tree
Showing 8 changed files with 213 additions and 29 deletions.
3 changes: 2 additions & 1 deletion include/tvm/runtime/device_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ enum DeviceAttrKind : int {
kMaxThreadDimensions = 8,
kMaxRegistersPerBlock = 9,
kGcnArch = 10,
kApiVersion = 11
kApiVersion = 11,
kDriverVersion = 12
};

/*! \brief Number of bytes each allocation must align to */
Expand Down
160 changes: 148 additions & 12 deletions python/tvm/_ffi/runtime_ctypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,16 @@ def __ne__(self, other):


class Device(ctypes.Structure):
"""TVM device strucure."""
"""TVM device strucure.
Typically constructed using convenience function
:meth:`tvm.runtime.device`.
Exposes uniform interface to device-specific APIs such as CUDA or
OpenCL. Some properties may return None depending on whether an
API exposes that particular property.
"""

_fields_ = [("device_type", ctypes.c_int), ("device_id", ctypes.c_int)]
MASK2STR = {
Expand Down Expand Up @@ -205,63 +214,190 @@ def _GetDeviceAttr(self, device_type, device_id, attr_id):

@property
def exist(self):
"""Whether this device exist."""
"""Whether this device exists.
Returns True if TVM has support for the device, if the
physical device is present, and the device is accessible
through appropriate drivers (e.g. cuda/vulkan).
Returns
-------
exist : bool
True if the device exists
"""
return self._GetDeviceAttr(self.device_type, self.device_id, 0) != 0

@property
def max_threads_per_block(self):
"""Maximum number of threads on each block."""
"""Maximum number of threads on each block.
Returns device value for cuda, metal, rocm, opencl, and vulkan
devices. Returns remote device value for RPC devices.
Returns None for all other devices.
Returns
-------
max_threads_per_block : int or None
The number of threads on each block
"""
return self._GetDeviceAttr(self.device_type, self.device_id, 1)

@property
def warp_size(self):
"""Number of threads that executes in concurrent."""
"""Number of threads that execute concurrently.
Returns device value for for cuda, rocm, and vulkan. Returns
1 for metal and opencl devices, regardless of the physical
device. Returns remote device value for RPC devices. Returns
None for all other devices.
Returns
-------
warp_size : int or None
Number of threads that execute concurrently
"""
return self._GetDeviceAttr(self.device_type, self.device_id, 2)

@property
def max_shared_memory_per_block(self):
"""Total amount of shared memory per block in bytes."""
"""Total amount of shared memory per block in bytes.
Returns device value for cuda, rocm, opencl, and vulkan.
Returns remote device value for RPC devices. Returns None for
all other devices.
Returns
-------
max_shared_memory_per_block : int or None
Total amount of shared memory per block in bytes
"""
return self._GetDeviceAttr(self.device_type, self.device_id, 3)

@property
def compute_version(self):
"""Get compute verison number in string.
"""Get compute version number as string.
Currently used to get compute capability of CUDA device.
Returns maximum API version (e.g. CUDA/OpenCL/Vulkan)
supported by the device.
Returns device value for cuda, rocm, opencl, and
vulkan. Returns remote device value for RPC devices. Returns
None for all other devices.
Returns
-------
version : str
version : str or None
The version string in `major.minor` format.
"""
return self._GetDeviceAttr(self.device_type, self.device_id, 4)

@property
def device_name(self):
"""Return the string name of device."""
"""Return the vendor-specific name of device.
Returns device value for cuda, rocm, opencl, and vulkan.
Returns remote device value for RPC devices. Returns None for
all other devices.
Returns
-------
device_name : str or None
The name of the device.
"""
return self._GetDeviceAttr(self.device_type, self.device_id, 5)

@property
def max_clock_rate(self):
"""Return the max clock frequency of device."""
"""Return the max clock frequency of device (kHz).
Returns device value for cuda, rocm, and opencl. Returns
remote device value for RPC devices. Returns None for all
other devices.
Returns
-------
max_clock_rate : int or None
The maximum clock frequency of the device (kHz)
"""
return self._GetDeviceAttr(self.device_type, self.device_id, 6)

@property
def multi_processor_count(self):
"""Return the number of compute units of device."""
"""Return the number of compute units in the device.
Returns device value for cuda, rocm, and opencl. Returns
remote device value for RPC devices. Returns None for all
other devices.
Returns
-------
multi_processor_count : int or None
Thee number of compute units in the device
"""
return self._GetDeviceAttr(self.device_type, self.device_id, 7)

@property
def max_thread_dimensions(self):
"""Return the maximum size of each thread axis
Returns device value for cuda, rocm, opencl, and vulkan.
Returns remote device value for RPC devices. Returns None for
all other devices.
Returns
-------
dims: List of int
dims: List of int, or None
The maximum length of threadIdx.x, threadIdx.y, threadIdx.z
"""
return json.loads(self._GetDeviceAttr(self.device_type, self.device_id, 8))

@property
def api_version(self):
"""Returns version number of the SDK used to compile TVM.
For example, CUDA_VERSION for cuda or VK_HEADER_VERSION for
Vulkan.
Returns device value for cuda, rocm, opencl, and vulkan.
Returns remote device value for RPC devices. Returns None for
all other devices.
Returns
-------
version : int or None
The version of the SDK
"""
return self._GetDeviceAttr(self.device_type, self.device_id, 12)

@property
def driver_version(self):
"""Returns version number of the driver
Returns driver vendor's internal version number.
(e.g. "450.408.256" for nvidia-driver-450)
Returns device value for opencl and vulkan. Returns remote
device value for RPC devices. Returns None for all other
devices.
Returns
-------
version : str or None
The version string in `major.minor.patch` format.
"""
return self._GetDeviceAttr(self.device_type, self.device_id, 12)

def create_raw_stream(self):
"""Create a new runtime stream at the context.
Expand Down
2 changes: 2 additions & 0 deletions src/runtime/cuda/cuda_device_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ class CUDADeviceAPI final : public DeviceAPI {
*rv = CUDA_VERSION;
return;
}
case kDriverVersion:
return;
}
*rv = value;
}
Expand Down
2 changes: 2 additions & 0 deletions src/runtime/metal/metal_device_api.mm
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@
return;
case kApiVersion:
return;
case kDriverVersion:
return;
}
}
}
Expand Down
11 changes: 11 additions & 0 deletions src/runtime/opencl/opencl_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,17 @@
*/
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS

/* Newer releases of OpenCL header files (after May 2018) work with
* any OpenCL version, with an application's target version
* specified. Setting the target version disables APIs from after that
* version, and sets appropriate USE_DEPRECATED macros. The above
* macro for CL_USE_DEPRECATED_OPENCL_1_2_APIS is still needed in case
* we are compiling against the earlier version-specific OpenCL header
* files. This also allows us to expose the OpenCL version through
* tvm.runtime.Device.
*/
#define CL_TARGET_OPENCL_VERSION 120

#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
Expand Down
39 changes: 29 additions & 10 deletions src/runtime/opencl/opencl_device_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ namespace tvm {
namespace runtime {
namespace cl {

std::string GetPlatformInfo(cl_platform_id pid, cl_platform_info param_name);
std::string GetDeviceInfo(cl_device_id pid, cl_device_info param_name);

OpenCLThreadEntry* OpenCLWorkspace::GetThreadEntry() { return OpenCLThreadEntry::ThreadLocal(); }

OpenCLWorkspace* OpenCLWorkspace::Global() {
Expand Down Expand Up @@ -72,20 +75,27 @@ void OpenCLWorkspace::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv)
*rv = static_cast<int64_t>(value);
break;
}
case kComputeVersion:
return;
case kDeviceName: {
char value[128] = {0};
OPENCL_CALL(
clGetDeviceInfo(devices[index], CL_DEVICE_NAME, sizeof(value) - 1, value, nullptr));
*rv = std::string(value);
case kComputeVersion: {
// String returned is "OpenCL $MAJOR.$MINOR $VENDOR_INFO". To
// match other implementations, we want to return "$MAJOR.$MINOR"
std::string ret = GetDeviceInfo(devices[index], CL_DEVICE_VERSION);

const size_t version_start = 7; // Length of initial "OpenCL " prefix to skip
const size_t version_end = ret.find(' ', version_start);
*rv = ret.substr(version_start, version_end - version_start);
break;
}
return;
case kDeviceName:
*rv = GetDeviceInfo(devices[index], CL_DEVICE_NAME);
break;
case kMaxClockRate: {
cl_uint value;
OPENCL_CALL(clGetDeviceInfo(devices[index], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(cl_uint),
&value, nullptr));
*rv = static_cast<int32_t>(value);
// OpenCL returns the clock rate in MHz, while CUDA/ROCm return the
// clock rate in kHz. Converting to the same units for each.
*rv = static_cast<int32_t>(value * 1000);
break;
}
case kMultiProcessorCount: {
Expand All @@ -109,8 +119,17 @@ void OpenCLWorkspace::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv)
return;
case kGcnArch:
return;
case kApiVersion:
return;
case kApiVersion: {
*rv = CL_TARGET_OPENCL_VERSION;
break;
}
case kDriverVersion: {
char value[128] = {0};
OPENCL_CALL(
clGetDeviceInfo(devices[index], CL_DRIVER_VERSION, sizeof(value) - 1, value, nullptr));
*rv = std::string(value);
break;
}
}
}

Expand Down
2 changes: 2 additions & 0 deletions src/runtime/rocm/rocm_device_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ class ROCMDeviceAPI final : public DeviceAPI {
*rv = HIP_VERSION;
return;
}
case kDriverVersion:
return;
}
*rv = value;
}
Expand Down
23 changes: 17 additions & 6 deletions src/runtime/vulkan/vulkan.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <tvm/runtime/device_api.h>
#include <tvm/runtime/registry.h>
#include <vulkan/vulkan.h>
#include <vulkan/vulkan_core.h>

#include <array>
#include <cstring>
Expand Down Expand Up @@ -469,11 +470,12 @@ void VulkanDeviceAPI::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv)
break;
}
case kDeviceName:
return;
*rv = std::string(phy_prop.deviceName);
break;
case kMaxClockRate:
return;
break;
case kMultiProcessorCount:
return;
break;
case kExist:
break;
case kMaxThreadDimensions: {
Expand All @@ -487,11 +489,20 @@ void VulkanDeviceAPI::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv)
break;
}
case kMaxRegistersPerBlock:
return;
break;
case kGcnArch:
return;
break;
case kApiVersion:
return;
*rv = VK_HEADER_VERSION;
break;
case kDriverVersion: {
int64_t value = phy_prop.driverVersion;
std::ostringstream os;
os << VK_VERSION_MAJOR(value) << "." << VK_VERSION_MINOR(value) << "."
<< VK_VERSION_PATCH(value);
*rv = os.str();
break;
}
}
}

Expand Down

0 comments on commit 46e0634

Please sign in to comment.