Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move WrapTimeEvaluator from RPC to profiling, NFC #11172

Merged
merged 1 commit into from
Apr 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions include/tvm/runtime/profiling.h
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,29 @@ String ShapeString(const std::vector<int64_t>& shape, DLDataType dtype);
PackedFunc ProfileFunction(Module mod, std::string func_name, int device_type, int device_id,
int warmup_iters, Array<MetricCollector> collectors);

/*!
* \brief Wrap a timer function to measure the time cost of a given packed function.
* \param f The function argument.
* \param dev The device.
* \param number The number of times to run this function for taking average.
* We call these runs as one `repeat` of measurement.
* \param repeat The number of times to repeat the measurement.
* In total, the function will be invoked (1 + number x repeat) times,
* where the first one is warm up and will be discarded.
* The returned result contains `repeat` costs,
* each of which is an average of `number` costs.
* \param min_repeat_ms The minimum duration of one `repeat` in milliseconds.
* By default, one `repeat` contains `number` runs. If this parameter is set,
* the parameters `number` will be dynamically adjusted to meet the
* minimum duration requirement of one `repeat`.
* i.e., When the run time of one `repeat` falls below this time,
* the `number` parameter will be automatically increased.
* \param f_preproc The function to be executed before we excetute time evaluator.
* \return f_timer A timer function.
*/
PackedFunc WrapTimeEvaluator(PackedFunc f, Device dev, int number, int repeat, int min_repeat_ms,
PackedFunc f_preproc = nullptr);

} // namespace profiling
} // namespace runtime
} // namespace tvm
Expand Down
2 changes: 1 addition & 1 deletion src/runtime/graph_executor/debug/graph_executor_debug.cc
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ class GraphExecutorDebug : public GraphExecutor {

// assume host runs things which is first device
Device& d = devices_[0];
PackedFunc time_evaluator = WrapTimeEvaluator(
PackedFunc time_evaluator = profiling::WrapTimeEvaluator(
TypedPackedFunc<void()>([this, node_index]() { this->RunOpHost(node_index); }), d, number,
repeat, min_repeat_ms);
std::string result = time_evaluator();
Expand Down
55 changes: 55 additions & 0 deletions src/runtime/profiling.cc
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,61 @@ TVM_REGISTER_GLOBAL("runtime.profiling.ProfileFunction")
}
});

PackedFunc WrapTimeEvaluator(PackedFunc pf, Device dev, int number, int repeat, int min_repeat_ms,
PackedFunc f_preproc) {
ICHECK(pf != nullptr);

if (static_cast<int>(dev.device_type) == static_cast<int>(kDLMicroDev)) {
auto get_micro_time_evaluator = runtime::Registry::Get("micro._GetMicroTimeEvaluator");
ICHECK(get_micro_time_evaluator != nullptr) << "micro backend not enabled";
return (*get_micro_time_evaluator)(pf, dev, number, repeat);
}

auto ftimer = [pf, dev, number, repeat, min_repeat_ms, f_preproc](TVMArgs args,
TVMRetValue* rv) mutable {
TVMRetValue temp;
std::ostringstream os;
// skip first time call, to activate lazy compilation components.
pf.CallPacked(args, &temp);

DeviceAPI::Get(dev)->StreamSync(dev, nullptr);

for (int i = 0; i < repeat; ++i) {
if (f_preproc != nullptr) {
f_preproc.CallPacked(args, &temp);
}
double duration_ms = 0.0;

do {
if (duration_ms > 0.0) {
number = static_cast<int>(std::max((min_repeat_ms / (duration_ms / number) + 1),
number * 1.618)); // 1.618 is chosen by random
}

Timer t = Timer::Start(dev);
// start timing
for (int i = 0; i < number; ++i) {
pf.CallPacked(args, &temp);
}
t->Stop();
int64_t t_nanos = t->SyncAndGetElapsedNanos();
duration_ms = t_nanos / 1e6;
} while (duration_ms < min_repeat_ms);

double speed = duration_ms / 1e3 / number;
os.write(reinterpret_cast<char*>(&speed), sizeof(speed));
}

std::string blob = os.str();
TVMByteArray arr;
arr.size = blob.length();
arr.data = blob.data();
// return the time.
*rv = arr;
};
return PackedFunc(ftimer);
}

} // namespace profiling
} // namespace runtime
} // namespace tvm
59 changes: 2 additions & 57 deletions src/runtime/rpc/rpc_module.cc
Original file line number Diff line number Diff line change
Expand Up @@ -357,61 +357,6 @@ inline void CPUCacheFlush(int begin_index, const TVMArgs& args) {
}
}

PackedFunc WrapTimeEvaluator(PackedFunc pf, Device dev, int number, int repeat, int min_repeat_ms,
PackedFunc f_preproc) {
ICHECK(pf != nullptr);

if (static_cast<int>(dev.device_type) == static_cast<int>(kDLMicroDev)) {
auto get_micro_time_evaluator = runtime::Registry::Get("micro._GetMicroTimeEvaluator");
ICHECK(get_micro_time_evaluator != nullptr) << "micro backend not enabled";
return (*get_micro_time_evaluator)(pf, dev, number, repeat);
}

auto ftimer = [pf, dev, number, repeat, min_repeat_ms, f_preproc](TVMArgs args,
TVMRetValue* rv) mutable {
TVMRetValue temp;
std::ostringstream os;
// skip first time call, to activate lazy compilation components.
pf.CallPacked(args, &temp);

DeviceAPI::Get(dev)->StreamSync(dev, nullptr);

for (int i = 0; i < repeat; ++i) {
if (f_preproc != nullptr) {
f_preproc.CallPacked(args, &temp);
}
double duration_ms = 0.0;

do {
if (duration_ms > 0.0) {
number = static_cast<int>(std::max((min_repeat_ms / (duration_ms / number) + 1),
number * 1.618)); // 1.618 is chosen by random
}

Timer t = Timer::Start(dev);
// start timing
for (int i = 0; i < number; ++i) {
pf.CallPacked(args, &temp);
}
t->Stop();
int64_t t_nanos = t->SyncAndGetElapsedNanos();
duration_ms = t_nanos / 1e6;
} while (duration_ms < min_repeat_ms);

double speed = duration_ms / 1e3 / number;
os.write(reinterpret_cast<char*>(&speed), sizeof(speed));
}

std::string blob = os.str();
TVMByteArray arr;
arr.size = blob.length();
arr.data = blob.data();
// return the time.
*rv = arr;
};
return PackedFunc(ftimer);
}

TVM_REGISTER_GLOBAL("runtime.RPCTimeEvaluator")
.set_body_typed([](Optional<Module> opt_mod, std::string name, int device_type, int device_id,
int number, int repeat, int min_repeat_ms, std::string f_preproc_name) {
Expand All @@ -434,7 +379,7 @@ TVM_REGISTER_GLOBAL("runtime.RPCTimeEvaluator")
}
PackedFunc pf = m.GetFunction(name, true);
CHECK(pf != nullptr) << "Cannot find " << name << " in the global registry";
return WrapTimeEvaluator(pf, dev, number, repeat, min_repeat_ms, f_preproc);
return profiling::WrapTimeEvaluator(pf, dev, number, repeat, min_repeat_ms, f_preproc);
}
} else {
auto* pf = runtime::Registry::Get(name);
Expand All @@ -446,7 +391,7 @@ TVM_REGISTER_GLOBAL("runtime.RPCTimeEvaluator")
<< "Cannot find " << f_preproc_name << " in the global function";
f_preproc = *pf_preproc;
}
return WrapTimeEvaluator(*pf, dev, number, repeat, min_repeat_ms, f_preproc);
return profiling::WrapTimeEvaluator(*pf, dev, number, repeat, min_repeat_ms, f_preproc);
}
});

Expand Down
23 changes: 0 additions & 23 deletions src/runtime/rpc/rpc_session.h
Original file line number Diff line number Diff line change
Expand Up @@ -282,29 +282,6 @@ struct RemoteSpace {
std::shared_ptr<RPCSession> sess;
};

/*!
* \brief Wrap a timer function to measure the time cost of a given packed function.
* \param f The function argument.
* \param dev The device.
* \param number The number of times to run this function for taking average.
* We call these runs as one `repeat` of measurement.
* \param repeat The number of times to repeat the measurement.
* In total, the function will be invoked (1 + number x repeat) times,
* where the first one is warm up and will be discarded.
* The returned result contains `repeat` costs,
* each of which is an average of `number` costs.
* \param min_repeat_ms The minimum duration of one `repeat` in milliseconds.
* By default, one `repeat` contains `number` runs. If this parameter is set,
* the parameters `number` will be dynamically adjusted to meet the
* minimum duration requirement of one `repeat`.
* i.e., When the run time of one `repeat` falls below this time,
* the `number` parameter will be automatically increased.
* \param f_preproc The function to be executed before we excetute time evaluator.
* \return f_timer A timer function.
*/
PackedFunc WrapTimeEvaluator(PackedFunc f, Device dev, int number, int repeat, int min_repeat_ms,
PackedFunc f_preproc = nullptr);

/*!
* \brief Create a Global RPC module that refers to the session.
* \param sess The RPC session of the global module.
Expand Down