diff --git a/include/tvm/runtime/profiling.h b/include/tvm/runtime/profiling.h index 606bf502c195..3cfb73f58e80 100644 --- a/include/tvm/runtime/profiling.h +++ b/include/tvm/runtime/profiling.h @@ -511,6 +511,29 @@ String ShapeString(const std::vector& shape, DLDataType dtype); PackedFunc ProfileFunction(Module mod, std::string func_name, int device_type, int device_id, int warmup_iters, Array collectors); +/*! + * \brief Wrap a timer function to measure the time cost of a given packed function. + * \param f The function argument. + * \param dev The device. + * \param number The number of times to run this function for taking average. + * We call these runs as one `repeat` of measurement. + * \param repeat The number of times to repeat the measurement. + * In total, the function will be invoked (1 + number x repeat) times, + * where the first one is warm up and will be discarded. + * The returned result contains `repeat` costs, + * each of which is an average of `number` costs. + * \param min_repeat_ms The minimum duration of one `repeat` in milliseconds. + * By default, one `repeat` contains `number` runs. If this parameter is set, + * the parameters `number` will be dynamically adjusted to meet the + * minimum duration requirement of one `repeat`. + * i.e., When the run time of one `repeat` falls below this time, + * the `number` parameter will be automatically increased. + * \param f_preproc The function to be executed before we excetute time evaluator. + * \return f_timer A timer function. + */ +PackedFunc WrapTimeEvaluator(PackedFunc f, Device dev, int number, int repeat, int min_repeat_ms, + PackedFunc f_preproc = nullptr); + } // namespace profiling } // namespace runtime } // namespace tvm diff --git a/src/runtime/graph_executor/debug/graph_executor_debug.cc b/src/runtime/graph_executor/debug/graph_executor_debug.cc index cf7a4cd04984..97d89206f5dc 100644 --- a/src/runtime/graph_executor/debug/graph_executor_debug.cc +++ b/src/runtime/graph_executor/debug/graph_executor_debug.cc @@ -113,7 +113,7 @@ class GraphExecutorDebug : public GraphExecutor { // assume host runs things which is first device Device& d = devices_[0]; - PackedFunc time_evaluator = WrapTimeEvaluator( + PackedFunc time_evaluator = profiling::WrapTimeEvaluator( TypedPackedFunc([this, node_index]() { this->RunOpHost(node_index); }), d, number, repeat, min_repeat_ms); std::string result = time_evaluator(); diff --git a/src/runtime/profiling.cc b/src/runtime/profiling.cc index 037cd1ce79a7..6d95a0fbd212 100644 --- a/src/runtime/profiling.cc +++ b/src/runtime/profiling.cc @@ -739,6 +739,61 @@ TVM_REGISTER_GLOBAL("runtime.profiling.ProfileFunction") } }); +PackedFunc WrapTimeEvaluator(PackedFunc pf, Device dev, int number, int repeat, int min_repeat_ms, + PackedFunc f_preproc) { + ICHECK(pf != nullptr); + + if (static_cast(dev.device_type) == static_cast(kDLMicroDev)) { + auto get_micro_time_evaluator = runtime::Registry::Get("micro._GetMicroTimeEvaluator"); + ICHECK(get_micro_time_evaluator != nullptr) << "micro backend not enabled"; + return (*get_micro_time_evaluator)(pf, dev, number, repeat); + } + + auto ftimer = [pf, dev, number, repeat, min_repeat_ms, f_preproc](TVMArgs args, + TVMRetValue* rv) mutable { + TVMRetValue temp; + std::ostringstream os; + // skip first time call, to activate lazy compilation components. + pf.CallPacked(args, &temp); + + DeviceAPI::Get(dev)->StreamSync(dev, nullptr); + + for (int i = 0; i < repeat; ++i) { + if (f_preproc != nullptr) { + f_preproc.CallPacked(args, &temp); + } + double duration_ms = 0.0; + + do { + if (duration_ms > 0.0) { + number = static_cast(std::max((min_repeat_ms / (duration_ms / number) + 1), + number * 1.618)); // 1.618 is chosen by random + } + + Timer t = Timer::Start(dev); + // start timing + for (int i = 0; i < number; ++i) { + pf.CallPacked(args, &temp); + } + t->Stop(); + int64_t t_nanos = t->SyncAndGetElapsedNanos(); + duration_ms = t_nanos / 1e6; + } while (duration_ms < min_repeat_ms); + + double speed = duration_ms / 1e3 / number; + os.write(reinterpret_cast(&speed), sizeof(speed)); + } + + std::string blob = os.str(); + TVMByteArray arr; + arr.size = blob.length(); + arr.data = blob.data(); + // return the time. + *rv = arr; + }; + return PackedFunc(ftimer); +} + } // namespace profiling } // namespace runtime } // namespace tvm diff --git a/src/runtime/rpc/rpc_module.cc b/src/runtime/rpc/rpc_module.cc index a13921195721..8e558fb6278e 100644 --- a/src/runtime/rpc/rpc_module.cc +++ b/src/runtime/rpc/rpc_module.cc @@ -357,61 +357,6 @@ inline void CPUCacheFlush(int begin_index, const TVMArgs& args) { } } -PackedFunc WrapTimeEvaluator(PackedFunc pf, Device dev, int number, int repeat, int min_repeat_ms, - PackedFunc f_preproc) { - ICHECK(pf != nullptr); - - if (static_cast(dev.device_type) == static_cast(kDLMicroDev)) { - auto get_micro_time_evaluator = runtime::Registry::Get("micro._GetMicroTimeEvaluator"); - ICHECK(get_micro_time_evaluator != nullptr) << "micro backend not enabled"; - return (*get_micro_time_evaluator)(pf, dev, number, repeat); - } - - auto ftimer = [pf, dev, number, repeat, min_repeat_ms, f_preproc](TVMArgs args, - TVMRetValue* rv) mutable { - TVMRetValue temp; - std::ostringstream os; - // skip first time call, to activate lazy compilation components. - pf.CallPacked(args, &temp); - - DeviceAPI::Get(dev)->StreamSync(dev, nullptr); - - for (int i = 0; i < repeat; ++i) { - if (f_preproc != nullptr) { - f_preproc.CallPacked(args, &temp); - } - double duration_ms = 0.0; - - do { - if (duration_ms > 0.0) { - number = static_cast(std::max((min_repeat_ms / (duration_ms / number) + 1), - number * 1.618)); // 1.618 is chosen by random - } - - Timer t = Timer::Start(dev); - // start timing - for (int i = 0; i < number; ++i) { - pf.CallPacked(args, &temp); - } - t->Stop(); - int64_t t_nanos = t->SyncAndGetElapsedNanos(); - duration_ms = t_nanos / 1e6; - } while (duration_ms < min_repeat_ms); - - double speed = duration_ms / 1e3 / number; - os.write(reinterpret_cast(&speed), sizeof(speed)); - } - - std::string blob = os.str(); - TVMByteArray arr; - arr.size = blob.length(); - arr.data = blob.data(); - // return the time. - *rv = arr; - }; - return PackedFunc(ftimer); -} - TVM_REGISTER_GLOBAL("runtime.RPCTimeEvaluator") .set_body_typed([](Optional opt_mod, std::string name, int device_type, int device_id, int number, int repeat, int min_repeat_ms, std::string f_preproc_name) { @@ -434,7 +379,7 @@ TVM_REGISTER_GLOBAL("runtime.RPCTimeEvaluator") } PackedFunc pf = m.GetFunction(name, true); CHECK(pf != nullptr) << "Cannot find " << name << " in the global registry"; - return WrapTimeEvaluator(pf, dev, number, repeat, min_repeat_ms, f_preproc); + return profiling::WrapTimeEvaluator(pf, dev, number, repeat, min_repeat_ms, f_preproc); } } else { auto* pf = runtime::Registry::Get(name); @@ -446,7 +391,7 @@ TVM_REGISTER_GLOBAL("runtime.RPCTimeEvaluator") << "Cannot find " << f_preproc_name << " in the global function"; f_preproc = *pf_preproc; } - return WrapTimeEvaluator(*pf, dev, number, repeat, min_repeat_ms, f_preproc); + return profiling::WrapTimeEvaluator(*pf, dev, number, repeat, min_repeat_ms, f_preproc); } }); diff --git a/src/runtime/rpc/rpc_session.h b/src/runtime/rpc/rpc_session.h index 8923103157d5..d78b3219bf3d 100644 --- a/src/runtime/rpc/rpc_session.h +++ b/src/runtime/rpc/rpc_session.h @@ -282,29 +282,6 @@ struct RemoteSpace { std::shared_ptr sess; }; -/*! - * \brief Wrap a timer function to measure the time cost of a given packed function. - * \param f The function argument. - * \param dev The device. - * \param number The number of times to run this function for taking average. - * We call these runs as one `repeat` of measurement. - * \param repeat The number of times to repeat the measurement. - * In total, the function will be invoked (1 + number x repeat) times, - * where the first one is warm up and will be discarded. - * The returned result contains `repeat` costs, - * each of which is an average of `number` costs. - * \param min_repeat_ms The minimum duration of one `repeat` in milliseconds. - * By default, one `repeat` contains `number` runs. If this parameter is set, - * the parameters `number` will be dynamically adjusted to meet the - * minimum duration requirement of one `repeat`. - * i.e., When the run time of one `repeat` falls below this time, - * the `number` parameter will be automatically increased. - * \param f_preproc The function to be executed before we excetute time evaluator. - * \return f_timer A timer function. - */ -PackedFunc WrapTimeEvaluator(PackedFunc f, Device dev, int number, int repeat, int min_repeat_ms, - PackedFunc f_preproc = nullptr); - /*! * \brief Create a Global RPC module that refers to the session. * \param sess The RPC session of the global module.