-
Notifications
You must be signed in to change notification settings - Fork 3.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[GRAPH EXECUTOR,VM] Add benchmarking function to graph executor and vm #8807
Changes from 3 commits
f7a304c
9ef33bc
f0b49f2
c514787
809aad4
11f3900
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -46,14 +46,15 @@ | |
import os | ||
import tarfile | ||
import json | ||
from typing import Optional, Union, List, Dict, Callable, TextIO | ||
from typing import Optional, Union, Dict, Callable, TextIO | ||
import numpy as np | ||
|
||
import tvm | ||
import tvm.contrib.cc | ||
from tvm import relay | ||
from tvm.contrib import utils | ||
from tvm.relay.backend.executor_factory import GraphExecutorFactoryModule | ||
from tvm.runtime.module import BenchmarkResult | ||
|
||
try: | ||
from tvm.micro import export_model_library_format | ||
|
@@ -371,14 +372,14 @@ def import_package(self, package_path: str): | |
class TVMCResult(object): | ||
"""A class that stores the results of tvmc.run and provides helper utilities.""" | ||
|
||
def __init__(self, outputs: Dict[str, np.ndarray], times: List[float]): | ||
def __init__(self, outputs: Dict[str, np.ndarray], times: BenchmarkResult): | ||
"""Create a convenience wrapper around the output of tvmc.run | ||
|
||
Parameters | ||
---------- | ||
outputs : dict | ||
Outputs dictionary mapping the name of the output to its numpy value. | ||
times : list of float | ||
times : BenchmarkResult | ||
The execution times measured by the time evaluator in seconds to produce outputs. | ||
""" | ||
self.outputs = outputs | ||
|
@@ -390,29 +391,15 @@ def format_times(self): | |
This has the effect of producing a small table that looks like: | ||
.. code-block:: | ||
Execution time summary: | ||
mean (ms) max (ms) min (ms) std (ms) | ||
0.14310 0.16161 0.12933 0.01004 | ||
mean (ms) median (ms) max (ms) min (ms) std (ms) | ||
0.14310 0.14310 0.16161 0.12933 0.01004 | ||
|
||
Returns | ||
------- | ||
str | ||
A formatted string containing the statistics. | ||
""" | ||
|
||
# timestamps | ||
mean_ts = np.mean(self.times) * 1000 | ||
std_ts = np.std(self.times) * 1000 | ||
max_ts = np.max(self.times) * 1000 | ||
min_ts = np.min(self.times) * 1000 | ||
|
||
header = "Execution time summary:\n{0:^10} {1:^10} {2:^10} {3:^10}".format( | ||
"mean (ms)", "max (ms)", "min (ms)", "std (ms)" | ||
) | ||
stats = "{0:^10.2f} {1:^10.2f} {2:^10.2f} {3:^10.2f}".format( | ||
mean_ts, max_ts, min_ts, std_ts | ||
) | ||
|
||
return "%s\n%s\n" % (header, stats) | ||
return str(times) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's probably a bad idea to change the semantics of time_evaluator, which is no doubt used in many places where it's expected to return the raw list of times. I would recommend moving the BenchmarkResult to the benchmark() functions and not change time_evaluator itself. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is just the printing code in TVMC, it is not modifying time_evaluator at all. The only changes to the printing code is that it now outputs median. |
||
|
||
def get_output(self, name: str): | ||
"""A helper function to grab one of the outputs by name. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,7 +20,8 @@ | |
import os | ||
import ctypes | ||
import struct | ||
from collections import namedtuple | ||
from typing import Sequence | ||
import numpy as np | ||
|
||
import tvm._ffi | ||
from tvm._ffi.base import _LIB, check_call, c_str, string_types, _RUNTIME_ONLY | ||
|
@@ -30,8 +31,65 @@ | |
from . import _ffi_api | ||
|
||
|
||
# profile result of time evaluator | ||
ProfileResult = namedtuple("ProfileResult", ["mean", "results"]) | ||
class BenchmarkResult: | ||
"""Runtimes from benchmarking""" | ||
|
||
def __init__(self, results: Sequence[float]): | ||
"""Construct a new BenchmarkResult from a sequence of runtimes. | ||
|
||
Parameters | ||
---------- | ||
results : Sequence[float] | ||
Raw times from benchmarking | ||
|
||
Attributes | ||
---------- | ||
min : float | ||
Minimum runtime in seconds of all results. | ||
mean : float | ||
Mean runtime in seconds of all results. Note that this mean is not | ||
necessarily statistically correct as it is the mean of mean | ||
runtimes. | ||
median : float | ||
Median runtime in seconds of all results. Note that this is not necessarily | ||
statistically correct as it is the median of mean runtimes. | ||
max : float | ||
Maximum runtime in seconds of all results. | ||
std : float | ||
Standard deviation in seconds of runtimes. Note that this is not necessarily | ||
correct as it is the std of mean runtimes. | ||
results : Sequence[float] | ||
The collected runtimes (in seconds). This may be a series of mean runtimes if | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this needs more explanation. Currently a BenchmarkResult object contains no information on the benchmark parameters that were used, and it would be best (IMHO) to avoid there being surprises in terms of the interpretation of the results based on how the object was created. My recommendation would be to either fully document the behavior of what it means to benchmark with 'number > 1' or ensure that the BenchmarkResult object itself contains the benchmark parameters used. |
||
the benchmark was run with `number` > 1. | ||
""" | ||
self.results = results | ||
self.mean = np.mean(self.results) | ||
self.std = np.std(self.results) | ||
self.median = np.median(self.results) | ||
self.min = np.min(self.results) | ||
self.max = np.max(self.results) | ||
|
||
def __repr__(self): | ||
return "BenchmarkResult(min={}, mean={}, median={}, max={}, std={}, results={})".format( | ||
self.min, self.mean, self.median, self.max, self.std, self.results | ||
) | ||
|
||
def __str__(self): | ||
return """Execution time summary: | ||
{:^12} {:^12} {:^12} {:^12} {:^12} | ||
{:^12.2f} {:^12.2f} {:^12.2f} {:^12.2f} {:^12.2f} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 2 significant digits is not really enough for models that run quite fast. Would recommend using .4 instead. |
||
""".format( | ||
"mean (ms)", | ||
"median (ms)", | ||
"max (ms)", | ||
"min (ms)", | ||
"std (ms)", | ||
self.mean * 1000, | ||
self.median * 1000, | ||
self.max * 1000, | ||
self.min * 1000, | ||
self.std * 1000, | ||
) | ||
|
||
|
||
class Module(object): | ||
|
@@ -209,7 +267,7 @@ def time_evaluator(self, func_name, dev, number=10, repeat=1, min_repeat_ms=0, f | |
Returns | ||
------- | ||
ftimer : function | ||
The function that takes same argument as func and returns a ProfileResult. | ||
The function that takes same argument as func and returns a BenchmarkResult. | ||
The ProfileResult reports `repeat` time costs in seconds. | ||
""" | ||
try: | ||
|
@@ -230,8 +288,7 @@ def evaluator(*args): | |
blob = feval(*args) | ||
fmt = "@" + ("d" * repeat) | ||
results = struct.unpack(fmt, blob) | ||
mean = sum(results) / float(repeat) | ||
return ProfileResult(mean=mean, results=results) | ||
return BenchmarkResult(results) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does time_evaluator itself have a unit test that needs to be updated? If not, would it make sense to add a quick one? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. time_evaluator was not changed, so it makes no sense to create/modify a unit test for it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Um, unless I'm reading this diff incorrectly (sorry if I'm getting confused by github diffs!) this is a change to time_evaluator. Am I confused? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh yes, this is the python binding for time_evaluator. Output is essentially the same, so there is no need to modify the tests. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, OK, but the existing tests for time_evaluator seem kind of weak. Here we're adding a new class (BenchmarkResult) which calculates a number of new statistics, but there's no tests for it ... meaning ... a regression in this behavior (either a change to time_evaluator or BenchmarkResult) would not be caught by CI. At minimum I'd recommend a unit test for BenchmarkResult() itself, which is easy to write. Not trying to be too nitpicky but I do feel like we should be constantly improving our test coverage and adding a new class without corresponding tests tends to be a red flag for me :-) WDYT? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To clarify |
||
|
||
return evaluator | ||
except NameError: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,7 @@ | |
from tvm.contrib import graph_executor | ||
from tvm.relay.op import add | ||
import tvm.testing | ||
from tvm.relay.testing import mlp | ||
|
||
# @tq, @jr should we put this in testing ns? | ||
def check_rts(expr, args, expected_result, mod=None): | ||
|
@@ -322,5 +323,17 @@ def test_graph_executor_api(): | |
assert mod.get_input_index("Invalid") == -1 | ||
|
||
|
||
@tvm.testing.requires_llvm | ||
def test_benchmark(): | ||
mod, params = mlp.get_workload(1) | ||
lib = relay.build(mod, target="llvm", params=params) | ||
exe = graph_executor.create(lib.get_graph_json(), lib.lib, tvm.cpu()) | ||
data = tvm.nd.array(np.random.rand(1, 1, 28, 28).astype("float32")) | ||
result = exe.benchmark(tvm.cpu(), data=data, func_name="run", repeat=2, number=1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A better test here would be to use unittest.patch() on the time_evaluator function itself so it always returns deterministic times, and hence you can fully test the behavior of the benchmark() function by itself independent of the actual module or time_evaluator behavior. |
||
assert result.mean == result.median | ||
assert result.mean > 0 | ||
assert len(result.results) == 2 | ||
|
||
|
||
if __name__ == "__main__": | ||
pytest.main([__file__]) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,6 +30,7 @@ | |
from tvm import rpc | ||
import tvm.testing | ||
from tvm.relay.transform import InferType | ||
from tvm.relay.testing import mlp | ||
|
||
|
||
def check_result(args, expected_result, mod=None): | ||
|
@@ -955,5 +956,17 @@ def test_get_input_index(): | |
assert vm_factory.get_input_index("invalid") == -1 | ||
|
||
|
||
@tvm.testing.requires_llvm | ||
def test_benchmark(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See comment above on using unittest.patch() so you can test the benchmark() function directly. |
||
mod, params = mlp.get_workload(1) | ||
lib = vm.compile(mod, target="llvm", params=params) | ||
exe = runtime.vm.VirtualMachine(lib, tvm.cpu()) | ||
data = tvm.nd.array(np.random.rand(1, 1, 28, 28).astype("float32")) | ||
result = exe.benchmark(tvm.cpu(), data, func_name="main", repeat=2, number=1) | ||
assert result.mean == result.median | ||
assert result.mean > 0 | ||
assert len(result.results) == 2 | ||
|
||
|
||
if __name__ == "__main__": | ||
pytest.main([__file__]) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this should be BenchmarkResult.