[runtime] AOTExecutor implementation and c target code-generator (apa…

…che#10283) * Add memory pools to Metadata classes. * Move ShapeToJSON to utils. * Track returned TensorType from AOTExecutorCodegen. * Support calling Relay functions with Tuple. * Expand supported TIR calling conventions to work with C++ runtime. * Rename MetadataModule to ConstLoaderModule. * Add runtime AOT executor module. * Add AOT code-generation. * Add a runtime Module to mux between .text Metadata and live Metadata. * Move launch_param to namespace * Add test of c++ AOT. * Fix incongruity between kTvmRuntimeCrt constant * Expand ExecutorCodegenMetadata to include AOT runtime metadata. * commit cpp test * Make Metadata compile under C. * Ignore ephemeral metadata_module export_model_library_format. * This module does not need to be exported, since it is merely a C++ wrapper around get_c_metadata, and get_metadata is not used in C. * address manupa, kparszsyc, masahi comments. * further address comments * clang and python format * Fix broken test * Address lingering comments from masahi, kparszyzc
pfk-beta · Apr 11, 2022 · 3e0a823 · 3e0a823
1 parent 1be4baf
commit 3e0a823
Show file tree

Hide file tree

Showing 39 changed files with 2,020 additions and 171 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -38,6 +38,7 @@ tvm_option(USE_LLVM "Build with LLVM, can be set to specific llvm-config path" O
 tvm_option(USE_STACKVM_RUNTIME "Include stackvm into the runtime" OFF)
 tvm_option(USE_GRAPH_EXECUTOR "Build with tiny graph executor" ON)
 tvm_option(USE_GRAPH_EXECUTOR_CUDA_GRAPH "Build with tiny graph executor with CUDA Graph for GPUs" OFF)
+tvm_option(USE_AOT_EXECUTOR "Build with AOT executor" ON)
 tvm_option(USE_PROFILER "Build profiler for the VM and graph executor" ON)
 tvm_option(USE_OPENMP "Build with OpenMP thread pool implementation" OFF)
 tvm_option(USE_RELAY_DEBUG "Building Relay in debug mode..." OFF)
@@ -395,6 +396,13 @@ if(USE_PROFILER)
   list(APPEND RUNTIME_SRCS ${RUNTIME_VM_PROFILER_SRCS})
 endif(USE_PROFILER)
 
+if(USE_AOT_EXECUTOR)
+  message(STATUS "Build with AOT Executor support...")
+  file(GLOB RUNTIME_AOT_EXECUTOR_SRCS src/runtime/aot_executor/*.cc)
+  list(APPEND RUNTIME_SRCS ${RUNTIME_AOT_EXECUTOR_SRCS})
+
+endif(USE_AOT_EXECUTOR)
+
 # Enable ctest if gtest is available
 if(USE_GTEST)
   # Check env var for backward compatibility. A better way to specify package

diff --git a/include/tvm/relay/runtime.h b/include/tvm/relay/runtime.h
@@ -44,6 +44,12 @@ class AttrRegistry;
 
 namespace relay {
 
+/*! \brief Value used with Runtime::name to indicate the C++ runtime. */
+static constexpr const char* kTvmRuntimeCpp = "cpp";
+
+/*! \brief Value used with Runtime::name to indicate the C runtime. */
+static constexpr const char* kTvmRuntimeCrt = "crt";
+
 /*!
  * \brief Runtime information.
  *

diff --git a/include/tvm/runtime/metadata.h b/include/tvm/runtime/metadata.h
@@ -33,12 +33,13 @@
 #include <tvm/runtime/c_runtime_api.h>
 #ifdef __cplusplus
 #include <tvm/runtime/metadata_base.h>
-#endif
 #include <tvm/support/span.h>
+#endif
 
 // Version number recorded in emitted artifacts for runtime checking.
 #define TVM_METADATA_VERSION 1
 
+#ifdef __cplusplus
 namespace tvm {
 namespace runtime {
 namespace metadata {
@@ -51,7 +52,6 @@ static const constexpr int64_t kMetadataVersion = TVM_METADATA_VERSION;
 }  // namespace runtime
 }  // namespace tvm
 
-#ifdef __cplusplus
 extern "C" {
 #endif
 
@@ -75,6 +75,13 @@ struct TVMMetadata {
   const struct TVMTensorInfo* outputs;
   /*! \brief Number of elements in `outputs` array. */
   int64_t num_outputs;
+  /*! \brief Memory Pools needed by the AOT main function.
+   * The order of the elements is the same as in the arguments to run_model. That is to say,
+   * this array specifies the last `num_pools` arguments to run_model.
+   */
+  const struct TVMTensorInfo* pools;
+  /*! \brief Number of elements in `pools` array. */
+  int64_t num_pools;
   /*! \brief Name of the model, as passed to tvm.relay.build. */
   const char* mod_name;
 };
@@ -114,6 +121,8 @@ class MetadataNode : public MetadataBaseNode {
   ArrayAccessor<struct TVMTensorInfo, TensorInfo> inputs();
   inline int64_t num_outputs() const { return data_->num_outputs; }
   ArrayAccessor<struct TVMTensorInfo, TensorInfo> outputs();
+  inline int64_t num_pools() const { return data_->num_pools; }
+  ArrayAccessor<struct TVMTensorInfo, TensorInfo> pools();
   inline ::tvm::runtime::String mod_name() const { return ::tvm::runtime::String(data_->mod_name); }
   const struct ::TVMMetadata* data() const { return data_; }
   TVM_DECLARE_FINAL_OBJECT_INFO(MetadataNode, MetadataBaseNode);

diff --git a/include/tvm/runtime/module.h b/include/tvm/runtime/module.h
@@ -217,6 +217,8 @@ TVM_DLL bool RuntimeEnabled(const std::string& target);
 
 /*! \brief namespace for constant symbols */
 namespace symbol {
+/*! \brief A PackedFunc that retrieves exported metadata. */
+constexpr const char* tvm_get_c_metadata = "get_c_metadata";
 /*! \brief Global variable to store module context. */
 constexpr const char* tvm_module_ctx = "__tvm_module_ctx";
 /*! \brief Global variable to store device module blob */

diff --git a/python/tvm/contrib/graph_executor.py b/python/tvm/contrib/graph_executor.py
@@ -189,7 +189,7 @@ def set_input(self, key=None, value=None, **params):
             keys.sort(key=lambda x: -np.prod(params[x].shape))
             for k in keys:
                 # TODO(zhiics) Skip the weights for submodule in a better way.
-                # We should use MetadataModule for initialization and remove
+                # We should use ConstLoaderModule for initialization and remove
                 # params from set_input
                 val = self._get_input(k)
                 if val:

diff --git a/python/tvm/micro/model_library_format.py b/python/tvm/micro/model_library_format.py
@@ -64,6 +64,10 @@ def generate_c_interface_header(
     return metadata_header
 
 
+# List of type_key for modules which are ephemeral and do not need to be exported.
+EPHEMERAL_MODULE_TYPE_KEYS = ("metadata_module",)
+
+
 def _populate_codegen_dir(mod, codegen_dir: str, module_name: str = None):
     """Populate the codegen sub-directory as part of a Model Library Format export.
 
@@ -79,6 +83,11 @@ def _populate_codegen_dir(mod, codegen_dir: str, module_name: str = None):
     """
     dso_modules = mod._collect_dso_modules()
     non_dso_modules = mod._collect_from_import_tree(lambda m: m not in dso_modules)
+
+    # Filter ephemeral modules which cannot be exported.
+    dso_modules = [m for m in dso_modules if m.type_key not in EPHEMERAL_MODULE_TYPE_KEYS]
+    non_dso_modules = [m for m in non_dso_modules if m.type_key not in EPHEMERAL_MODULE_TYPE_KEYS]
+
     if non_dso_modules:
         raise UnsupportedInModelLibraryFormatError(
             f"Don't know how to export non-c or non-llvm modules; found: {non_dso_modules!r}"

diff --git a/python/tvm/relay/backend/executor_factory.py b/python/tvm/relay/backend/executor_factory.py
@@ -109,6 +109,13 @@ def __init__(
         executor_codegen_metadata,
         devices,
     ):
+        fcreate = get_global_func("tvm.aot_executor_factory.create")
+        args = []
+        for k, v in params.items():
+            args.append(k)
+            args.append(ndarray.array(v))
+
+        self.module = fcreate(libmod, libmod_name, *args)
         self.ir_mod = ir_mod
         self.lowered_ir_mods = lowered_ir_mods
         self.target = target
@@ -134,6 +141,9 @@ def get_executor_config(self):
     def get_lib(self):
         return self.lib
 
+    def export_library(self, file_name, fcompile=None, addons=None, **kwargs):
+        return self.module.export_library(file_name, fcompile, addons, **kwargs)
+
 
 class GraphExecutorFactoryModule(ExecutorFactoryModule):
     """Graph executor factory module.

diff --git a/python/tvm/relay/build_module.py b/python/tvm/relay/build_module.py
@@ -27,8 +27,11 @@
 from tvm.tir import expr as tvm_expr
 from tvm.target import Target
 from .. import nd as _nd, autotvm, register_func
+from ..runtime import load_module
+from ..runtime.executor import aot_executor as _aot_executor
 from ..target import Target
-from ..contrib import graph_executor as _graph_rt
+from ..contrib import graph_executor as _graph_executor
+from ..contrib import utils as contrib_utils
 from . import _build_module
 from . import ty as _ty
 from . import expr as _expr
@@ -612,7 +615,7 @@ def _make_executor(self, expr=None):
                 "Graph Executor only supports static graphs, got output type", ret_type
             )
         mod = build(self.mod, target=self.target)
-        gmodule = _graph_rt.GraphModule(mod["default"](self.device))
+        gmodule = _graph_executor.GraphModule(mod["default"](self.device))
 
         def _unflatten(flat_iter, cur_type):
             if isinstance(cur_type, _ty.TensorType):
@@ -641,6 +644,74 @@ def _graph_wrapper(*args, **kwargs):
         return _graph_wrapper
 
 
+class AotExecutor(_interpreter.Executor):
+    """Implements the Executor interface for AOT.
+
+    Parameters
+    ----------
+    mod : :py:class:`~tvm.IRModule`
+        The module to support the execution.
+
+    device : :py:class:`Device`
+        The runtime device to run the code on.
+
+    target : :py:class:`Target`
+        The target option to build the function.
+    """
+
+    def __init__(self, mod, device, target):
+        assert mod is not None
+        self.mod = mod
+        self.device = device
+        self.target = target
+        assert target.attrs.get("executor", "graph") == "aot"
+
+    def _make_executor(self, expr=None):
+        if expr:
+            self.mod["main"] = expr
+        self.mod = InferType()(self.mod)
+        ret_type = self.mod["main"].checked_type.ret_type
+        if _ty.is_dynamic(ret_type):
+            raise ValueError("AOT Executor only supports static graphs, got output type", ret_type)
+        mod = build(self.mod, target=self.target)
+
+        # NOTE: Given AOT requires use of the "c" backend, must export/import to compile the
+        # generated code.
+        temp_so_dir = contrib_utils.TempDirectory()
+        temp_so = temp_so_dir / "temp.so"
+        mod.export_library(temp_so, cc="gcc", options=["-std=c11"])
+
+        mod = load_module(temp_so)
+        aot_mod = mod["default"](self.device)
+        gmodule = _aot_executor.AotModule(aot_mod)
+
+        def _unflatten(flat_iter, cur_type):
+            if isinstance(cur_type, _ty.TensorType):
+                return next(flat_iter)
+            if isinstance(cur_type, _ty.TupleType):
+                fields = []
+                for field_type in cur_type.fields:
+                    field = _unflatten(flat_iter, field_type)
+                    fields.append(field)
+                return fields
+            raise ValueError("Return type", ret_type, "contains unsupported type", cur_type)
+
+        def _aot_wrapper(*args, **kwargs):
+            args = self._convert_args(self.mod["main"], args, kwargs)
+            # Create map of inputs.
+            for i, arg in enumerate(args):
+                gmodule.set_input(i, arg)
+            # Run the module, and fetch the output.
+            gmodule.run()
+            flattened = []
+            for i in range(gmodule.get_num_outputs()):
+                flattened.append(gmodule.get_output(i).copyto(_nd.cpu(0)))
+            unflattened = _unflatten(iter(flattened), ret_type)
+            return unflattened
+
+        return _aot_wrapper
+
+
 # TODO(mbs): Collapse the create_executor/evaluate phases together since a) most callers don't
 # reuse the executor for multiple expressions and b) any preparation necessary for the expression
 # evaluation needs to (currently) be done along with preparation for the module.
@@ -664,9 +735,8 @@ def create_executor(kind="debug", mod=None, device=None, target="llvm", params=N
     Parameters
     ----------
     kind : str
-        The type of executor. Avaliable options are `debug` for the
-        interpreter, `graph` for the graph executor, and `vm` for the virtual
-        machine.
+        The type of executor. Avaliable options are `debug` for the interpreter, `graph` for the
+        graph executor, `aot` for the aot executor, and `vm` for the virtual machine.
 
     mod : :py:class:`~tvm.IRModule`
         The Relay module containing collection of functions
@@ -703,4 +773,6 @@ def create_executor(kind="debug", mod=None, device=None, target="llvm", params=N
         return GraphExecutor(mod, device, target)
     if kind == "vm":
         return VMExecutor(mod, device, target)
+    if kind == "aot":
+        return AotExecutor(mod, device, target)
     raise RuntimeError("unknown execution strategy: {0}".format(kind))
diff --git a/python/tvm/runtime/__init__.py b/python/tvm/runtime/__init__.py
@@ -31,3 +31,5 @@
 from .module import load_module, enabled, system_lib
 from .container import String, ShapeTuple
 from .params import save_param_dict, load_param_dict
+
+from . import executor
diff --git a/python/tvm/runtime/executor/__init__.py b/python/tvm/runtime/executor/__init__.py
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""This module contains Python wrappers for the TVM C++ Executor implementations.
+
+NOTE: at present, only AOT Executor is contained here. The others are:
+ - GraphExecutor, in python/tvm/contrib/graph_executor.py
+ - VM Executor, in python/tvm/runtime/vm.py
+
+TODO(areusch): Consolidate these into this module.
+"""
+from .aot_executor import AotModule