Skip to content

Commit

Permalink
[runtime] AOTExecutor implementation and c target code-generator (apa…
Browse files Browse the repository at this point in the history
…che#10283)

* Add memory pools to Metadata classes.

* Move ShapeToJSON to utils.

* Track returned TensorType from AOTExecutorCodegen.

* Support calling Relay functions with Tuple.

* Expand supported TIR calling conventions to work with C++ runtime.

* Rename MetadataModule to ConstLoaderModule.

* Add runtime AOT executor module.

* Add AOT code-generation.

* Add a runtime Module to mux between .text Metadata and live Metadata.

* Move launch_param to namespace

* Add test of c++ AOT.

* Fix incongruity between kTvmRuntimeCrt constant

* Expand ExecutorCodegenMetadata to include AOT runtime metadata.

* commit cpp test

* Make Metadata compile under C.

* Ignore ephemeral metadata_module export_model_library_format.

 * This module does not need to be exported, since it is merely a C++
   wrapper around get_c_metadata, and get_metadata is not used in C.

* address manupa, kparszsyc, masahi comments.

* further address comments

* clang and python format

* Fix broken test

* Address lingering comments from masahi, kparszyzc
  • Loading branch information
areusch authored and pfk-beta committed Apr 11, 2022
1 parent 1be4baf commit 3e0a823
Show file tree
Hide file tree
Showing 39 changed files with 2,020 additions and 171 deletions.
8 changes: 8 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ tvm_option(USE_LLVM "Build with LLVM, can be set to specific llvm-config path" O
tvm_option(USE_STACKVM_RUNTIME "Include stackvm into the runtime" OFF)
tvm_option(USE_GRAPH_EXECUTOR "Build with tiny graph executor" ON)
tvm_option(USE_GRAPH_EXECUTOR_CUDA_GRAPH "Build with tiny graph executor with CUDA Graph for GPUs" OFF)
tvm_option(USE_AOT_EXECUTOR "Build with AOT executor" ON)
tvm_option(USE_PROFILER "Build profiler for the VM and graph executor" ON)
tvm_option(USE_OPENMP "Build with OpenMP thread pool implementation" OFF)
tvm_option(USE_RELAY_DEBUG "Building Relay in debug mode..." OFF)
Expand Down Expand Up @@ -395,6 +396,13 @@ if(USE_PROFILER)
list(APPEND RUNTIME_SRCS ${RUNTIME_VM_PROFILER_SRCS})
endif(USE_PROFILER)

if(USE_AOT_EXECUTOR)
message(STATUS "Build with AOT Executor support...")
file(GLOB RUNTIME_AOT_EXECUTOR_SRCS src/runtime/aot_executor/*.cc)
list(APPEND RUNTIME_SRCS ${RUNTIME_AOT_EXECUTOR_SRCS})

endif(USE_AOT_EXECUTOR)

# Enable ctest if gtest is available
if(USE_GTEST)
# Check env var for backward compatibility. A better way to specify package
Expand Down
6 changes: 6 additions & 0 deletions include/tvm/relay/runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ class AttrRegistry;

namespace relay {

/*! \brief Value used with Runtime::name to indicate the C++ runtime. */
static constexpr const char* kTvmRuntimeCpp = "cpp";

/*! \brief Value used with Runtime::name to indicate the C runtime. */
static constexpr const char* kTvmRuntimeCrt = "crt";

/*!
* \brief Runtime information.
*
Expand Down
13 changes: 11 additions & 2 deletions include/tvm/runtime/metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,13 @@
#include <tvm/runtime/c_runtime_api.h>
#ifdef __cplusplus
#include <tvm/runtime/metadata_base.h>
#endif
#include <tvm/support/span.h>
#endif

// Version number recorded in emitted artifacts for runtime checking.
#define TVM_METADATA_VERSION 1

#ifdef __cplusplus
namespace tvm {
namespace runtime {
namespace metadata {
Expand All @@ -51,7 +52,6 @@ static const constexpr int64_t kMetadataVersion = TVM_METADATA_VERSION;
} // namespace runtime
} // namespace tvm

#ifdef __cplusplus
extern "C" {
#endif

Expand All @@ -75,6 +75,13 @@ struct TVMMetadata {
const struct TVMTensorInfo* outputs;
/*! \brief Number of elements in `outputs` array. */
int64_t num_outputs;
/*! \brief Memory Pools needed by the AOT main function.
* The order of the elements is the same as in the arguments to run_model. That is to say,
* this array specifies the last `num_pools` arguments to run_model.
*/
const struct TVMTensorInfo* pools;
/*! \brief Number of elements in `pools` array. */
int64_t num_pools;
/*! \brief Name of the model, as passed to tvm.relay.build. */
const char* mod_name;
};
Expand Down Expand Up @@ -114,6 +121,8 @@ class MetadataNode : public MetadataBaseNode {
ArrayAccessor<struct TVMTensorInfo, TensorInfo> inputs();
inline int64_t num_outputs() const { return data_->num_outputs; }
ArrayAccessor<struct TVMTensorInfo, TensorInfo> outputs();
inline int64_t num_pools() const { return data_->num_pools; }
ArrayAccessor<struct TVMTensorInfo, TensorInfo> pools();
inline ::tvm::runtime::String mod_name() const { return ::tvm::runtime::String(data_->mod_name); }
const struct ::TVMMetadata* data() const { return data_; }
TVM_DECLARE_FINAL_OBJECT_INFO(MetadataNode, MetadataBaseNode);
Expand Down
2 changes: 2 additions & 0 deletions include/tvm/runtime/module.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,8 @@ TVM_DLL bool RuntimeEnabled(const std::string& target);

/*! \brief namespace for constant symbols */
namespace symbol {
/*! \brief A PackedFunc that retrieves exported metadata. */
constexpr const char* tvm_get_c_metadata = "get_c_metadata";
/*! \brief Global variable to store module context. */
constexpr const char* tvm_module_ctx = "__tvm_module_ctx";
/*! \brief Global variable to store device module blob */
Expand Down
2 changes: 1 addition & 1 deletion python/tvm/contrib/graph_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def set_input(self, key=None, value=None, **params):
keys.sort(key=lambda x: -np.prod(params[x].shape))
for k in keys:
# TODO(zhiics) Skip the weights for submodule in a better way.
# We should use MetadataModule for initialization and remove
# We should use ConstLoaderModule for initialization and remove
# params from set_input
val = self._get_input(k)
if val:
Expand Down
9 changes: 9 additions & 0 deletions python/tvm/micro/model_library_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ def generate_c_interface_header(
return metadata_header


# List of type_key for modules which are ephemeral and do not need to be exported.
EPHEMERAL_MODULE_TYPE_KEYS = ("metadata_module",)


def _populate_codegen_dir(mod, codegen_dir: str, module_name: str = None):
"""Populate the codegen sub-directory as part of a Model Library Format export.
Expand All @@ -79,6 +83,11 @@ def _populate_codegen_dir(mod, codegen_dir: str, module_name: str = None):
"""
dso_modules = mod._collect_dso_modules()
non_dso_modules = mod._collect_from_import_tree(lambda m: m not in dso_modules)

# Filter ephemeral modules which cannot be exported.
dso_modules = [m for m in dso_modules if m.type_key not in EPHEMERAL_MODULE_TYPE_KEYS]
non_dso_modules = [m for m in non_dso_modules if m.type_key not in EPHEMERAL_MODULE_TYPE_KEYS]

if non_dso_modules:
raise UnsupportedInModelLibraryFormatError(
f"Don't know how to export non-c or non-llvm modules; found: {non_dso_modules!r}"
Expand Down
10 changes: 10 additions & 0 deletions python/tvm/relay/backend/executor_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,13 @@ def __init__(
executor_codegen_metadata,
devices,
):
fcreate = get_global_func("tvm.aot_executor_factory.create")
args = []
for k, v in params.items():
args.append(k)
args.append(ndarray.array(v))

self.module = fcreate(libmod, libmod_name, *args)
self.ir_mod = ir_mod
self.lowered_ir_mods = lowered_ir_mods
self.target = target
Expand All @@ -134,6 +141,9 @@ def get_executor_config(self):
def get_lib(self):
return self.lib

def export_library(self, file_name, fcompile=None, addons=None, **kwargs):
return self.module.export_library(file_name, fcompile, addons, **kwargs)


class GraphExecutorFactoryModule(ExecutorFactoryModule):
"""Graph executor factory module.
Expand Down
82 changes: 77 additions & 5 deletions python/tvm/relay/build_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,11 @@
from tvm.tir import expr as tvm_expr
from tvm.target import Target
from .. import nd as _nd, autotvm, register_func
from ..runtime import load_module
from ..runtime.executor import aot_executor as _aot_executor
from ..target import Target
from ..contrib import graph_executor as _graph_rt
from ..contrib import graph_executor as _graph_executor
from ..contrib import utils as contrib_utils
from . import _build_module
from . import ty as _ty
from . import expr as _expr
Expand Down Expand Up @@ -612,7 +615,7 @@ def _make_executor(self, expr=None):
"Graph Executor only supports static graphs, got output type", ret_type
)
mod = build(self.mod, target=self.target)
gmodule = _graph_rt.GraphModule(mod["default"](self.device))
gmodule = _graph_executor.GraphModule(mod["default"](self.device))

def _unflatten(flat_iter, cur_type):
if isinstance(cur_type, _ty.TensorType):
Expand Down Expand Up @@ -641,6 +644,74 @@ def _graph_wrapper(*args, **kwargs):
return _graph_wrapper


class AotExecutor(_interpreter.Executor):
"""Implements the Executor interface for AOT.
Parameters
----------
mod : :py:class:`~tvm.IRModule`
The module to support the execution.
device : :py:class:`Device`
The runtime device to run the code on.
target : :py:class:`Target`
The target option to build the function.
"""

def __init__(self, mod, device, target):
assert mod is not None
self.mod = mod
self.device = device
self.target = target
assert target.attrs.get("executor", "graph") == "aot"

def _make_executor(self, expr=None):
if expr:
self.mod["main"] = expr
self.mod = InferType()(self.mod)
ret_type = self.mod["main"].checked_type.ret_type
if _ty.is_dynamic(ret_type):
raise ValueError("AOT Executor only supports static graphs, got output type", ret_type)
mod = build(self.mod, target=self.target)

# NOTE: Given AOT requires use of the "c" backend, must export/import to compile the
# generated code.
temp_so_dir = contrib_utils.TempDirectory()
temp_so = temp_so_dir / "temp.so"
mod.export_library(temp_so, cc="gcc", options=["-std=c11"])

mod = load_module(temp_so)
aot_mod = mod["default"](self.device)
gmodule = _aot_executor.AotModule(aot_mod)

def _unflatten(flat_iter, cur_type):
if isinstance(cur_type, _ty.TensorType):
return next(flat_iter)
if isinstance(cur_type, _ty.TupleType):
fields = []
for field_type in cur_type.fields:
field = _unflatten(flat_iter, field_type)
fields.append(field)
return fields
raise ValueError("Return type", ret_type, "contains unsupported type", cur_type)

def _aot_wrapper(*args, **kwargs):
args = self._convert_args(self.mod["main"], args, kwargs)
# Create map of inputs.
for i, arg in enumerate(args):
gmodule.set_input(i, arg)
# Run the module, and fetch the output.
gmodule.run()
flattened = []
for i in range(gmodule.get_num_outputs()):
flattened.append(gmodule.get_output(i).copyto(_nd.cpu(0)))
unflattened = _unflatten(iter(flattened), ret_type)
return unflattened

return _aot_wrapper


# TODO(mbs): Collapse the create_executor/evaluate phases together since a) most callers don't
# reuse the executor for multiple expressions and b) any preparation necessary for the expression
# evaluation needs to (currently) be done along with preparation for the module.
Expand All @@ -664,9 +735,8 @@ def create_executor(kind="debug", mod=None, device=None, target="llvm", params=N
Parameters
----------
kind : str
The type of executor. Avaliable options are `debug` for the
interpreter, `graph` for the graph executor, and `vm` for the virtual
machine.
The type of executor. Avaliable options are `debug` for the interpreter, `graph` for the
graph executor, `aot` for the aot executor, and `vm` for the virtual machine.
mod : :py:class:`~tvm.IRModule`
The Relay module containing collection of functions
Expand Down Expand Up @@ -703,4 +773,6 @@ def create_executor(kind="debug", mod=None, device=None, target="llvm", params=N
return GraphExecutor(mod, device, target)
if kind == "vm":
return VMExecutor(mod, device, target)
if kind == "aot":
return AotExecutor(mod, device, target)
raise RuntimeError("unknown execution strategy: {0}".format(kind))
2 changes: 2 additions & 0 deletions python/tvm/runtime/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,5 @@
from .module import load_module, enabled, system_lib
from .container import String, ShapeTuple
from .params import save_param_dict, load_param_dict

from . import executor
26 changes: 26 additions & 0 deletions python/tvm/runtime/executor/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

"""This module contains Python wrappers for the TVM C++ Executor implementations.
NOTE: at present, only AOT Executor is contained here. The others are:
- GraphExecutor, in python/tvm/contrib/graph_executor.py
- VM Executor, in python/tvm/runtime/vm.py
TODO(areusch): Consolidate these into this module.
"""
from .aot_executor import AotModule
Loading

0 comments on commit 3e0a823

Please sign in to comment.