apache · tmoreau89 · Mar 3, 2022 · Feb 23, 2022 · Nov 29, 2021 · Feb 15, 2022
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -38,6 +38,7 @@ tvm_option(USE_LLVM "Build with LLVM, can be set to specific llvm-config path" O
 tvm_option(USE_STACKVM_RUNTIME "Include stackvm into the runtime" OFF)
 tvm_option(USE_GRAPH_EXECUTOR "Build with tiny graph executor" ON)
 tvm_option(USE_GRAPH_EXECUTOR_CUDA_GRAPH "Build with tiny graph executor with CUDA Graph for GPUs" OFF)
+tvm_option(USE_AOT_EXECUTOR "Build with AOT executor" ON)
 tvm_option(USE_PROFILER "Build profiler for the VM and graph executor" ON)
 tvm_option(USE_OPENMP "Build with OpenMP thread pool implementation" OFF)
 tvm_option(USE_RELAY_DEBUG "Building Relay in debug mode..." OFF)
@@ -395,6 +396,13 @@ if(USE_PROFILER)
   list(APPEND RUNTIME_SRCS ${RUNTIME_VM_PROFILER_SRCS})
 endif(USE_PROFILER)
 
+if(USE_AOT_EXECUTOR)
+  message(STATUS "Build with AOT Executor support...")
+  file(GLOB RUNTIME_AOT_EXECUTOR_SRCS src/runtime/aot_executor/*.cc)
+  list(APPEND RUNTIME_SRCS ${RUNTIME_AOT_EXECUTOR_SRCS})
+
+endif(USE_AOT_EXECUTOR)
+
 # Enable ctest if gtest is available
 if(USE_GTEST)
   # Check env var for backward compatibility. A better way to specify package

diff --git a/include/tvm/relay/runtime.h b/include/tvm/relay/runtime.h
@@ -44,6 +44,12 @@ class AttrRegistry;
 
 namespace relay {
 
+/*! \brief Value used with Runtime::name to indicate the C++ runtime. */
+static constexpr const char* kTvmRuntimeCpp = "cpp";
+
+/*! \brief Value used with Runtime::name to indicate the C runtime. */
+static constexpr const char* kTvmRuntimeCrt = "crt";
+
 /*!
  * \brief Runtime information.
  *

diff --git a/include/tvm/runtime/metadata.h b/include/tvm/runtime/metadata.h
@@ -33,12 +33,13 @@
 #include <tvm/runtime/c_runtime_api.h>
 #ifdef __cplusplus
 #include <tvm/runtime/metadata_base.h>
-#endif
 #include <tvm/support/span.h>
+#endif
 
 // Version number recorded in emitted artifacts for runtime checking.
 #define TVM_METADATA_VERSION 1
 
+#ifdef __cplusplus
 namespace tvm {
 namespace runtime {
 namespace metadata {
@@ -51,7 +52,6 @@ static const constexpr int64_t kMetadataVersion = TVM_METADATA_VERSION;
 }  // namespace runtime
 }  // namespace tvm
 
-#ifdef __cplusplus
 extern "C" {
 #endif
 
@@ -75,6 +75,13 @@ struct TVMMetadata {
   const struct TVMTensorInfo* outputs;
   /*! \brief Number of elements in `outputs` array. */
   int64_t num_outputs;
+  /*! \brief Memory Pools needed by the AOT run_model function.
+   * The order of the elements is the same as in the arguments to run_model. That is to say,
+   * this array specifies the last `num_pools` arguments to run_model.
+   */
+  const struct TVMTensorInfo* pools;
+  /*! \brief Number of elements in `pools` array. */
+  int64_t num_pools;
   /*! \brief Name of the model, as passed to tvm.relay.build. */
   const char* mod_name;
 };
@@ -114,6 +121,8 @@ class MetadataNode : public MetadataBaseNode {
   ArrayAccessor<struct TVMTensorInfo, TensorInfo> inputs();
   inline int64_t num_outputs() const { return data_->num_outputs; }
   ArrayAccessor<struct TVMTensorInfo, TensorInfo> outputs();
+  inline int64_t num_pools() const { return data_->num_pools; }
+  ArrayAccessor<struct TVMTensorInfo, TensorInfo> pools();
   inline ::tvm::runtime::String mod_name() const { return ::tvm::runtime::String(data_->mod_name); }
   const struct ::TVMMetadata* data() const { return data_; }
   TVM_DECLARE_FINAL_OBJECT_INFO(MetadataNode, MetadataBaseNode);

diff --git a/python/tvm/contrib/graph_executor.py b/python/tvm/contrib/graph_executor.py
@@ -189,7 +189,7 @@ def set_input(self, key=None, value=None, **params):
             keys.sort(key=lambda x: -np.prod(params[x].shape))
             for k in keys:
                 # TODO(zhiics) Skip the weights for submodule in a better way.
-                # We should use MetadataModule for initialization and remove
+                # We should use ConstLoaderModule for initialization and remove
                 # params from set_input
                 val = self._get_input(k)
                 if val:

diff --git a/python/tvm/micro/model_library_format.py b/python/tvm/micro/model_library_format.py
@@ -64,6 +64,10 @@ def generate_c_interface_header(
     return metadata_header
 
 
+# List of type_key for modules which are ephemeral and do not need to be exported.
+EPHEMERAL_MODULE_TYPE_KEYS = ("metadata_module",)
+
+
 def _populate_codegen_dir(mod, codegen_dir: str, module_name: str = None):
     """Populate the codegen sub-directory as part of a Model Library Format export.
 
@@ -79,6 +83,11 @@ def _populate_codegen_dir(mod, codegen_dir: str, module_name: str = None):
     """
     dso_modules = mod._collect_dso_modules()
     non_dso_modules = mod._collect_from_import_tree(lambda m: m not in dso_modules)
+
+    # Filter ephemeral modules which cannot be exported.
+    dso_modules = [m for m in dso_modules if m.type_key not in EPHEMERAL_MODULE_TYPE_KEYS]
+    non_dso_modules = [m for m in non_dso_modules if m.type_key not in EPHEMERAL_MODULE_TYPE_KEYS]
+
     if non_dso_modules:
         raise UnsupportedInModelLibraryFormatError(
             f"Don't know how to export non-c or non-llvm modules; found: {non_dso_modules!r}"

diff --git a/python/tvm/relay/backend/executor_factory.py b/python/tvm/relay/backend/executor_factory.py
@@ -109,6 +109,13 @@ def __init__(
         executor_codegen_metadata,
         devices,
     ):
+        fcreate = get_global_func("tvm.aot_executor_factory.create")
+        args = []
+        for k, v in params.items():
+            args.append(k)
+            args.append(ndarray.array(v))
+
+        self.module = fcreate(libmod, libmod_name, *args)
         self.ir_mod = ir_mod
         self.lowered_ir_mods = lowered_ir_mods
         self.target = target
@@ -134,6 +141,9 @@ def get_executor_config(self):
     def get_lib(self):
         return self.lib
 
+    def export_library(self, file_name, fcompile=None, addons=None, **kwargs):
+        return self.module.export_library(file_name, fcompile, addons, **kwargs)
+
 
 class GraphExecutorFactoryModule(ExecutorFactoryModule):
     """Graph executor factory module.

diff --git a/python/tvm/runtime/__init__.py b/python/tvm/runtime/__init__.py
@@ -31,3 +31,5 @@
 from .module import load_module, enabled, system_lib
 from .container import String, ShapeTuple
 from .params import save_param_dict, load_param_dict
+
+from . import executor
diff --git a/python/tvm/runtime/executor/__init__.py b/python/tvm/runtime/executor/__init__.py
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""This module contains Python wrappers for the TVM C++ Executor implementations.
+
+NOTE: at present, only AOT Executor is contained here. The others are:
+ - GraphExecutor, in python/tvm/contrib/graph_executor.py
+ - VM Executor, in python/tvm/runtime/vm.py
+
+TODO(areusch): Consolidate these into this module.
 def create_executor(kind="debug", mod=None, device=None, target="llvm", params=None): 
 def create_executor(kind="debug", mod=None, device=None, target="llvm", params=None): 
+"""
+from .aot_executor import AotModule
diff --git a/python/tvm/runtime/executor/aot_executor.py b/python/tvm/runtime/executor/aot_executor.py
@@ -0,0 +1,182 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""A Python wrapper for the Module-based Model Runtime Interface for Ahead-of-Time compilation."""
+
+import numpy as np
+
+
+class AotModule(object):
+    """Wraps the AOT executor runtime.Module.
+
+    This is a thin wrapper of the underlying TVM module.
+    you can also directly call set_input, run, and get_output
+    of underlying module functions
+
+    Parameters
+    ----------
+    module : tvm.runtime.Module
+        The internal tvm module that holds the actual graph functions.
+
+    Attributes
+    ----------
+    module : tvm.runtime.Module
+        The internal tvm module that holds the actual graph functions.
+
+    Examples
+    --------
+
+    .. code-block:: python
+
+        import tvm
+        from tvm import relay
+        from tvm.contrib import graph_executor
+
+        # build the library using graph executor
+        lib = relay.build(...)
+        lib.export_library("compiled_lib.so")
+        # load it back as a runtime
+        lib: tvm.runtime.Module = tvm.runtime.load_module("compiled_lib.so")
+        # Call the library factory function for default and create
+        # a new runtime.Module, wrap with graph module.
+        gmod = graph_executor.GraphModule(lib["default"](dev))
+        # use the graph module.
+        gmod.set_input("x", data)
+        gmod.run()
+    """
+
+    def __init__(self, module):
+        self.module = module
+        self._set_input = module["set_input"]
+        self._run = module["run"]
+        self._get_output = module["get_output"]
+        self._get_input = module["get_input"]
+        self._get_num_outputs = module["get_num_outputs"]
+        self._get_input_index = module["get_input_index"]
+        self._get_num_inputs = module["get_num_inputs"]
+
+    def set_input(self, key=None, value=None, **params):
+        """Set inputs to the module via kwargs
+
+        Parameters
+        ----------
+        key : int or str
+           The input key
+
+        value : the input value.
+           The input key
+
+        params : dict of str to NDArray
+           Additional arguments
+        """
+        if key is not None:
+            v = self._get_input(key)
+            if v is None:
+                raise RuntimeError("Could not find '%s' in graph's inputs" % key)
+            v.copyfrom(value)
+
+        if params:
+            # upload big arrays first to avoid memory issue in rpc mode
+            keys = list(params.keys())
+            keys.sort(key=lambda x: -np.prod(params[x].shape))
+            for k in keys:
+                # TODO(zhiics) Skip the weights for submodule in a better way.
+                # We should use MetadataModule for initialization and remove
+                # params from set_input
+                val = self._get_input(k)
+                if val:
+                    self._get_input(k).copyfrom(params[k])
+
+    def run(self, **input_dict):
+        """Run forward execution of the graph
+
+        Parameters
+        ----------
+        input_dict: dict of str to NDArray
+            List of input values to be feed to
+        """
+        if input_dict:
+            self.set_input(**input_dict)
+        self._run()
+
+    def get_num_outputs(self):
+        """Get the number of outputs from the graph
+
+        Returns
+        -------
+        count : int
+            The number of outputs.
+        """
+        return self._get_num_outputs()
+
+    def get_num_inputs(self):
+        """Get the number of inputs to the graph
+
+        Returns
+        -------
+        count : int
+            The number of inputs.
+        """
+        return self._get_num_inputs()
+
+    def get_input(self, index, out=None):
+        """Get index-th input to out
+
+        Parameters
+        ----------
+        index : int
+            The input index
+
+        out : NDArray
+            The output array container
+        """
+        if out:
+            self._get_input(index).copyto(out)
+            return out
+
+        return self._get_input(index)
+
+    def get_input_index(self, name):
+        """Get inputs index via input name.
+
+        Parameters
+        ----------
+        name : str
+           The input key name
+
+        Returns
+        -------
+        index: int
+            The input index. -1 will be returned if the given input name is not found.
+        """
+        return self._get_input_index(name)
+
+    def get_output(self, index, out=None):
+        """Get index-th output to out
+
+        Parameters
+        ----------
+        index : int
+            The output index
+
+        out : NDArray
+            The output array container
+        """
+        if out:
+            self._get_output(index, out)
+            return out
+
+        return self._get_output(index)