Skip to content

Commit

Permalink
[FIX,PROFILING] Fix gpu timer name and lookup (#12849)
Browse files Browse the repository at this point in the history
* [FIX,PROFILING] Fix gpu timer name and lookup

In the switch from gpu to cuda naming, the cuda timer was passed over.
Renaming it to "profiling.timer.cuda" so it is correctly picked up by
the timing mechanisms.

* warn if timer impl does not exist
Tristan Konolige authored Sep 21, 2022

Verified

This commit was signed with the committer’s verified signature.
1 parent d4e3207 commit b051cad
Showing 2 changed files with 23 additions and 8 deletions.
18 changes: 10 additions & 8 deletions src/runtime/cuda/cuda_device_api.cc
Original file line number Diff line number Diff line change
@@ -252,9 +252,11 @@ TVM_REGISTER_GLOBAL("device_api.cuda_host").set_body([](TVMArgs args, TVMRetValu
*rv = static_cast<void*>(ptr);
});

class GPUTimerNode : public TimerNode {
class CUDATimerNode : public TimerNode {
public:
virtual void Start() {
// This initial cudaEventRecord is sometimes pretty slow (~100us). Does
// cudaEventRecord do some stream synchronization?
CUDA_CALL(cudaEventRecord(start_, CUDAThreadEntry::ThreadLocal()->stream));
}
virtual void Stop() { CUDA_CALL(cudaEventRecord(stop_, CUDAThreadEntry::ThreadLocal()->stream)); }
@@ -264,27 +266,27 @@ class GPUTimerNode : public TimerNode {
CUDA_CALL(cudaEventElapsedTime(&milliseconds, start_, stop_));
return milliseconds * 1e6;
}
virtual ~GPUTimerNode() {
virtual ~CUDATimerNode() {
CUDA_CALL(cudaEventDestroy(start_));
CUDA_CALL(cudaEventDestroy(stop_));
}
GPUTimerNode() {
CUDATimerNode() {
CUDA_CALL(cudaEventCreate(&start_));
CUDA_CALL(cudaEventCreate(&stop_));
}

static constexpr const char* _type_key = "GPUTimerNode";
TVM_DECLARE_FINAL_OBJECT_INFO(GPUTimerNode, TimerNode);
static constexpr const char* _type_key = "CUDATimerNode";
TVM_DECLARE_FINAL_OBJECT_INFO(CUDATimerNode, TimerNode);

private:
cudaEvent_t start_;
cudaEvent_t stop_;
};

TVM_REGISTER_OBJECT_TYPE(GPUTimerNode);
TVM_REGISTER_OBJECT_TYPE(CUDATimerNode);

TVM_REGISTER_GLOBAL("profiling.timer.gpu").set_body_typed([](Device dev) {
return Timer(make_object<GPUTimerNode>());
TVM_REGISTER_GLOBAL("profiling.timer.cuda").set_body_typed([](Device dev) {
return Timer(make_object<CUDATimerNode>());
});

TVM_DLL String GetCudaFreeMemory() {
13 changes: 13 additions & 0 deletions src/runtime/profiling.cc
Original file line number Diff line number Diff line change
@@ -89,9 +89,22 @@ TVM_REGISTER_GLOBAL("profiling.timer.cpu").set_body_typed([](Device dev) {
return Timer(make_object<CPUTimerNode>());
});

// keep track of which timers are not defined but we have already warned about
std::set<DLDeviceType> seen_devices;
std::mutex seen_devices_lock;

Timer Timer::Start(Device dev) {
auto f = Registry::Get(std::string("profiling.timer.") + DeviceName(dev.device_type));
if (f == nullptr) {
{
std::lock_guard<std::mutex> lock(seen_devices_lock);
if (seen_devices.find(dev.device_type) == seen_devices.end()) {
LOG(WARNING)
<< "No timer implementation for " << DeviceName(dev.device_type)
<< ", using default timer instead. It may be inaccurate or have extra overhead.";
seen_devices.insert(dev.device_type);
}
}
Timer t = DefaultTimer(dev);
t->Start();
return t;

0 comments on commit b051cad

Please sign in to comment.