Skip to content

Commit

Permalink
Move WrapTimeEvaluator from RPC to profiling, NFC (apache#11172)
Browse files Browse the repository at this point in the history
  • Loading branch information
Krzysztof Parzyszek authored and Sergey Shtin committed May 17, 2022
1 parent d17ba5e commit f588d87
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 81 deletions.
23 changes: 23 additions & 0 deletions include/tvm/runtime/profiling.h
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,29 @@ String ShapeString(const std::vector<int64_t>& shape, DLDataType dtype);
PackedFunc ProfileFunction(Module mod, std::string func_name, int device_type, int device_id,
int warmup_iters, Array<MetricCollector> collectors);

/*!
* \brief Wrap a timer function to measure the time cost of a given packed function.
* \param f The function argument.
* \param dev The device.
* \param number The number of times to run this function for taking average.
* We call these runs as one `repeat` of measurement.
* \param repeat The number of times to repeat the measurement.
* In total, the function will be invoked (1 + number x repeat) times,
* where the first one is warm up and will be discarded.
* The returned result contains `repeat` costs,
* each of which is an average of `number` costs.
* \param min_repeat_ms The minimum duration of one `repeat` in milliseconds.
* By default, one `repeat` contains `number` runs. If this parameter is set,
* the parameters `number` will be dynamically adjusted to meet the
* minimum duration requirement of one `repeat`.
* i.e., When the run time of one `repeat` falls below this time,
* the `number` parameter will be automatically increased.
* \param f_preproc The function to be executed before we excetute time evaluator.
* \return f_timer A timer function.
*/
PackedFunc WrapTimeEvaluator(PackedFunc f, Device dev, int number, int repeat, int min_repeat_ms,
PackedFunc f_preproc = nullptr);

} // namespace profiling
} // namespace runtime
} // namespace tvm
Expand Down
2 changes: 1 addition & 1 deletion src/runtime/graph_executor/debug/graph_executor_debug.cc
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ class GraphExecutorDebug : public GraphExecutor {

// assume host runs things which is first device
Device& d = devices_[0];
PackedFunc time_evaluator = WrapTimeEvaluator(
PackedFunc time_evaluator = profiling::WrapTimeEvaluator(
TypedPackedFunc<void()>([this, node_index]() { this->RunOpHost(node_index); }), d, number,
repeat, min_repeat_ms);
std::string result = time_evaluator();
Expand Down
55 changes: 55 additions & 0 deletions src/runtime/profiling.cc
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,61 @@ TVM_REGISTER_GLOBAL("runtime.profiling.ProfileFunction")
}
});

PackedFunc WrapTimeEvaluator(PackedFunc pf, Device dev, int number, int repeat, int min_repeat_ms,
PackedFunc f_preproc) {
ICHECK(pf != nullptr);

if (static_cast<int>(dev.device_type) == static_cast<int>(kDLMicroDev)) {
auto get_micro_time_evaluator = runtime::Registry::Get("micro._GetMicroTimeEvaluator");
ICHECK(get_micro_time_evaluator != nullptr) << "micro backend not enabled";
return (*get_micro_time_evaluator)(pf, dev, number, repeat);
}

auto ftimer = [pf, dev, number, repeat, min_repeat_ms, f_preproc](TVMArgs args,
TVMRetValue* rv) mutable {
TVMRetValue temp;
std::ostringstream os;
// skip first time call, to activate lazy compilation components.
pf.CallPacked(args, &temp);

DeviceAPI::Get(dev)->StreamSync(dev, nullptr);

for (int i = 0; i < repeat; ++i) {
if (f_preproc != nullptr) {
f_preproc.CallPacked(args, &temp);
}
double duration_ms = 0.0;

do {
if (duration_ms > 0.0) {
number = static_cast<int>(std::max((min_repeat_ms / (duration_ms / number) + 1),
number * 1.618)); // 1.618 is chosen by random
}

Timer t = Timer::Start(dev);
// start timing
for (int i = 0; i < number; ++i) {
pf.CallPacked(args, &temp);
}
t->Stop();
int64_t t_nanos = t->SyncAndGetElapsedNanos();
duration_ms = t_nanos / 1e6;
} while (duration_ms < min_repeat_ms);

double speed = duration_ms / 1e3 / number;
os.write(reinterpret_cast<char*>(&speed), sizeof(speed));
}

std::string blob = os.str();
TVMByteArray arr;
arr.size = blob.length();
arr.data = blob.data();
// return the time.
*rv = arr;
};
return PackedFunc(ftimer);
}

} // namespace profiling
} // namespace runtime
} // namespace tvm
59 changes: 2 additions & 57 deletions src/runtime/rpc/rpc_module.cc
Original file line number Diff line number Diff line change
Expand Up @@ -357,61 +357,6 @@ inline void CPUCacheFlush(int begin_index, const TVMArgs& args) {
}
}

PackedFunc WrapTimeEvaluator(PackedFunc pf, Device dev, int number, int repeat, int min_repeat_ms,
PackedFunc f_preproc) {
ICHECK(pf != nullptr);

if (static_cast<int>(dev.device_type) == static_cast<int>(kDLMicroDev)) {
auto get_micro_time_evaluator = runtime::Registry::Get("micro._GetMicroTimeEvaluator");
ICHECK(get_micro_time_evaluator != nullptr) << "micro backend not enabled";
return (*get_micro_time_evaluator)(pf, dev, number, repeat);
}

auto ftimer = [pf, dev, number, repeat, min_repeat_ms, f_preproc](TVMArgs args,
TVMRetValue* rv) mutable {
TVMRetValue temp;
std::ostringstream os;
// skip first time call, to activate lazy compilation components.
pf.CallPacked(args, &temp);

DeviceAPI::Get(dev)->StreamSync(dev, nullptr);

for (int i = 0; i < repeat; ++i) {
if (f_preproc != nullptr) {
f_preproc.CallPacked(args, &temp);
}
double duration_ms = 0.0;

do {
if (duration_ms > 0.0) {
number = static_cast<int>(std::max((min_repeat_ms / (duration_ms / number) + 1),
number * 1.618)); // 1.618 is chosen by random
}

Timer t = Timer::Start(dev);
// start timing
for (int i = 0; i < number; ++i) {
pf.CallPacked(args, &temp);
}
t->Stop();
int64_t t_nanos = t->SyncAndGetElapsedNanos();
duration_ms = t_nanos / 1e6;
} while (duration_ms < min_repeat_ms);

double speed = duration_ms / 1e3 / number;
os.write(reinterpret_cast<char*>(&speed), sizeof(speed));
}

std::string blob = os.str();
TVMByteArray arr;
arr.size = blob.length();
arr.data = blob.data();
// return the time.
*rv = arr;
};
return PackedFunc(ftimer);
}

TVM_REGISTER_GLOBAL("runtime.RPCTimeEvaluator")
.set_body_typed([](Optional<Module> opt_mod, std::string name, int device_type, int device_id,
int number, int repeat, int min_repeat_ms, std::string f_preproc_name) {
Expand All @@ -434,7 +379,7 @@ TVM_REGISTER_GLOBAL("runtime.RPCTimeEvaluator")
}
PackedFunc pf = m.GetFunction(name, true);
CHECK(pf != nullptr) << "Cannot find " << name << " in the global registry";
return WrapTimeEvaluator(pf, dev, number, repeat, min_repeat_ms, f_preproc);
return profiling::WrapTimeEvaluator(pf, dev, number, repeat, min_repeat_ms, f_preproc);
}
} else {
auto* pf = runtime::Registry::Get(name);
Expand All @@ -446,7 +391,7 @@ TVM_REGISTER_GLOBAL("runtime.RPCTimeEvaluator")
<< "Cannot find " << f_preproc_name << " in the global function";
f_preproc = *pf_preproc;
}
return WrapTimeEvaluator(*pf, dev, number, repeat, min_repeat_ms, f_preproc);
return profiling::WrapTimeEvaluator(*pf, dev, number, repeat, min_repeat_ms, f_preproc);
}
});

Expand Down
23 changes: 0 additions & 23 deletions src/runtime/rpc/rpc_session.h
Original file line number Diff line number Diff line change
Expand Up @@ -282,29 +282,6 @@ struct RemoteSpace {
std::shared_ptr<RPCSession> sess;
};

/*!
* \brief Wrap a timer function to measure the time cost of a given packed function.
* \param f The function argument.
* \param dev The device.
* \param number The number of times to run this function for taking average.
* We call these runs as one `repeat` of measurement.
* \param repeat The number of times to repeat the measurement.
* In total, the function will be invoked (1 + number x repeat) times,
* where the first one is warm up and will be discarded.
* The returned result contains `repeat` costs,
* each of which is an average of `number` costs.
* \param min_repeat_ms The minimum duration of one `repeat` in milliseconds.
* By default, one `repeat` contains `number` runs. If this parameter is set,
* the parameters `number` will be dynamically adjusted to meet the
* minimum duration requirement of one `repeat`.
* i.e., When the run time of one `repeat` falls below this time,
* the `number` parameter will be automatically increased.
* \param f_preproc The function to be executed before we excetute time evaluator.
* \return f_timer A timer function.
*/
PackedFunc WrapTimeEvaluator(PackedFunc f, Device dev, int number, int repeat, int min_repeat_ms,
PackedFunc f_preproc = nullptr);

/*!
* \brief Create a Global RPC module that refers to the session.
* \param sess The RPC session of the global module.
Expand Down

0 comments on commit f588d87

Please sign in to comment.