apache · jwfromm · Mar 31, 2022 · Mar 18, 2022 · Mar 23, 2022 · Mar 23, 2022
diff --git a/python/tvm/driver/tvmc/compiler.py b/python/tvm/driver/tvmc/compiler.py
@@ -196,6 +196,7 @@ def compile_model(
     disabled_pass: Optional[str] = None,
     pass_context_configs: Optional[List[str]] = None,
     additional_target_options: Optional[Dict[str, Dict[str, Any]]] = None,
+    use_vm: bool = False,
 ):
     """Compile a model from a supported framework into a TVM module.
 
@@ -243,7 +244,8 @@ def compile_model(
         PassContext.
     additional_target_options: Optional[Dict[str, Dict[str, Any]]]
         Additional target options in a dictionary to combine with initial Target arguments
-
+    use_vm: bool
+        Whether to use the VM to compile the model as opposed to the graph executor
 
     Returns
     -------
@@ -286,25 +288,40 @@ def compile_model(
                     opt_level=opt_level, config=config, disabled_pass=disabled_pass
                 ):
                     logger.debug("building relay graph with autoscheduler")
-                    graph_module = relay.build(
-                        mod, target=tvm_target, executor=executor, runtime=runtime, params=params
+                    graph_module = build(
+                        mod,
+                        tvm_target=tvm_target,
+                        executor=executor,
+                        runtime=runtime,
+                        params=params,
+                        use_vm=use_vm,
                     )
         else:
             with autotvm.apply_history_best(tuning_records):
                 with tvm.transform.PassContext(
                     opt_level=opt_level, config=config, disabled_pass=disabled_pass
                 ):
                     logger.debug("building relay graph with tuning records")
-                    graph_module = relay.build(
-                        mod, target=tvm_target, executor=executor, runtime=runtime, params=params
+                    graph_module = build(
+                        mod,
+                        tvm_target=tvm_target,
+                        executor=executor,
+                        runtime=runtime,
+                        params=params,
+                        use_vm=use_vm,
                     )
     else:
         with tvm.transform.PassContext(
             opt_level=opt_level, config=config, disabled_pass=disabled_pass
         ):
             logger.debug("building relay graph (no tuning records provided)")
-            graph_module = relay.build(
-                mod, target=tvm_target, executor=executor, runtime=runtime, params=params
+            graph_module = build(
+                mod,
+                tvm_target=tvm_target,
+                executor=executor,
+                runtime=runtime,
+                params=params,
+                use_vm=use_vm,
             )
 
     # Generate output dump files with sources
@@ -314,26 +331,60 @@ def compile_model(
         dump_code = [dump_code]
     dumps = {}
     for source_type in dump_code:
-        lib = graph_module.get_lib()
+        if use_vm:
+            _, lib = graph_module.save()
+        else:
+            lib = graph_module.get_lib()
         # TODO lib.get_source call have inconsistent behavior for unsupported
         #      formats (@leandron).
         source = str(mod) if source_type == "relay" else lib.get_source(source_type)
         dumps[source_type] = source
 
     # Create a new tvmc model package object from the graph definition.
     package_path = tvmc_model.export_package(
-        graph_module,
-        package_path,
-        cross,
-        cross_options,
-        output_format,
+        graph_module, package_path, cross, cross_options, output_format, use_vm=use_vm
     )
 
     # Write dumps to file.
     if dumps:
         save_dumps(package_path, dumps)
 
-    return TVMCPackage(package_path)
+    return TVMCPackage(package_path, use_vm=use_vm)
+
+
+def build(
+    mod: tvm.IRModule,
+    tvm_target: str,
+    executor: Executor,
+    runtime: Runtime,
+    params: Dict[str, tvm.nd.NDArray],
+    use_vm: bool,
+):
+    """
+    Builds the model with the provided executor.
+
+    Parameters
+    ----------
+    mod : tvm.IRModule
+        The relay module corresponding to this model.
+    tvm_target : str
+        The target for which to compile. Can be a plain string or
+        a path.
+    executor : Executor
+        The graph executor to build the model if use_vm is not True
+    runtime : Runtime
+        The runtime configuration.
+    params : dict
+        A parameter dictionary for the model.
+    use_vm: bool
+        Whether to use the VM to compile the model as opposed to the graph executor
+
+    """
+    if use_vm:
+        logger.debug("building with vm compile")
+        return relay.vm.compile(mod, target=tvm_target, params=params)
+    logger.debug("building with relay build")
+    return relay.build(mod, target=tvm_target, executor=executor, runtime=runtime, params=params)
 
 
 def save_dumps(module_name: str, dumps: Dict[str, str], dump_root: str = "."):

diff --git a/python/tvm/driver/tvmc/model.py b/python/tvm/driver/tvmc/model.py
@@ -182,6 +182,42 @@ def default_package_path(self):
         """
         return self._tmp_dir.relpath("model_package.tar")
 
+    def export_vm_format(
+        self,
+        vm_exec: tvm.runtime.vm.Executable,
+        package_path: Optional[str] = None,
+        lib_format: str = "so",
+    ):
+        """Save this TVMCModel compiled via vm to file.
+        Parameters
+        ----------
+        vm_exec : vm.Executable
+            The VM Executable containing compiled the compiled artifacts needed to run this model.
+        package_path : str, None
+            Where the model should be saved. Note that it will be packaged as a .tar file.
+            If not provided, the package will be saved to a generically named file in tmp.
+        lib_format : str
+            How to export the modules function library. Must be one of "so" or "tar".
+
+        Returns
+        -------
+        package_path : str
+            The path that the package was saved to.
+        """
+        lib_name = "lib." + lib_format
+        temp = self._tmp_dir
+        if package_path is None:
+            package_path = self.default_package_path()
+
+        path_lib = temp.relpath(lib_name)
+        vm_exec.mod.export_library(path_lib)
+        self.lib_path = path_lib
+        # Package up all the temp files into a tar file.
+        with tarfile.open(package_path, "w") as tar:
+            tar.add(path_lib, lib_name)
+
+        return package_path
+
     def export_classic_format(
         self,
         executor_factory: GraphExecutorFactoryModule,
@@ -248,11 +284,12 @@ def export_classic_format(
 
     def export_package(
         self,
-        executor_factory: GraphExecutorFactoryModule,
+        executor_factory: Union[GraphExecutorFactoryModule, tvm.runtime.vm.Executable],
         package_path: Optional[str] = None,
         cross: Optional[Union[str, Callable]] = None,
         cross_options: Optional[str] = None,
         output_format: str = "so",
+        use_vm: bool = False,
     ):
         """Save this TVMCModel to file.
         Parameters
@@ -281,7 +318,9 @@ def export_package(
         if output_format == "mlf" and cross:
             raise TVMCException("Specifying the MLF output and a cross compiler is not supported.")
 
-        if output_format in ["so", "tar"]:
+        if use_vm:
+            package_path = self.export_vm_format(executor_factory, package_path, output_format)
+        elif output_format in ["so", "tar"]:
             package_path = self.export_classic_format(
                 executor_factory, package_path, cross, cross_options, output_format
             )
@@ -314,11 +353,20 @@ class TVMCPackage(object):
 
     project_dir : Path, str
         If given and loading a MLF file, the path to the project directory that contains the file.
+
+    use_vm : bool
+        Whether the graph module was compiled with vm or not.
     """
 
-    def __init__(self, package_path: str, project_dir: Optional[Union[Path, str]] = None):
+    def __init__(
+        self,
+        package_path: str,
+        project_dir: Optional[Union[Path, str]] = None,
+        use_vm: bool = False,
+    ):
         self._tmp_dir = utils.tempdir()
         self.package_path = package_path
+        self.use_vm = use_vm
         self.import_package(self.package_path)
 
         if project_dir and self.type != "mlf":
@@ -337,7 +385,21 @@ def import_package(self, package_path: str):
         t = tarfile.open(package_path)
         t.extractall(temp.relpath("."))
 
-        if os.path.exists(temp.relpath("metadata.json")):
+        if self.use_vm:
+            self.type = "vm"
+            graph = None
+            params = None
+            lib_name_so = "lib.so"
+            lib_name_tar = "lib.tar"
+            if os.path.exists(temp.relpath(lib_name_so)):
+                self.lib_name = lib_name_so
+            elif os.path.exists(temp.relpath(lib_name_tar)):
+                self.lib_name = lib_name_tar
+            else:
+                raise TVMCException("Couldn't find exported library in the package.")
+
+            self.lib_path = temp.relpath(self.lib_name)
+        elif os.path.exists(temp.relpath("metadata.json")):
             # Model Library Format (MLF)
             self.lib_name = None
             self.lib_path = None
@@ -366,8 +428,11 @@ def import_package(self, package_path: str):
 
             self.type = "classic"
 
-        with open(params, "rb") as param_file:
-            self.params = bytearray(param_file.read())
+        if params is not None:
+            with open(params, "rb") as param_file:
+                self.params = bytearray(param_file.read())
+        else:
+            self.params = None
 
         if graph is not None:
             with open(graph) as graph_file:

diff --git a/python/tvm/driver/tvmc/runner.py b/python/tvm/driver/tvmc/runner.py
@@ -28,8 +28,9 @@
 
 import tvm
 from tvm import rpc
+from tvm.runtime import vm
 from tvm.autotvm.measure import request_remote
-from tvm.contrib import graph_executor as runtime
+from tvm.contrib import graph_executor
 from tvm.contrib.debugger import debug_executor
 from . import TVMCException
 from .arguments import TVMCSuppressedArgumentParser
@@ -530,58 +531,82 @@ def run_module(
             assert device == "cpu"
             dev = session.cpu()
 
-        # TODO(gromero): Adjust for micro targets.
-        if profile:
-            logger.debug("Creating runtime with profiling enabled.")
-            module = debug_executor.create(tvmc_package.graph, lib, dev, dump_root="./prof")
+        if tvmc_package.use_vm:
+            assert inputs is not None and isinstance(
+                inputs, dict
+            ), "vm runner requires inputs to be provided as a dict"
+            exe = vm.VirtualMachine(lib, dev)
+            input_tensor = {}
+            for e, i in inputs.items():
+                input_tensor[e] = tvm.nd.array(i, dev)
+            exe.set_input("main", **input_tensor)
+            exe.invoke_stateful("main")
+            times = exe.benchmark(
+                dev,
+                **input_tensor,
+                func_name="main",
+                repeat=repeat,
+                number=number,
+                end_to_end=end_to_end,
+            )
+            exe_outputs = exe.get_outputs()
+            outputs = {}
+            for i, val in enumerate(exe_outputs):
+                output_name = "output_{}".format(i)
+                outputs[output_name] = val
         else:
-            if device == "micro":
-                logger.debug("Creating runtime (micro) with profiling disabled.")
-                module = tvm.micro.create_local_graph_executor(tvmc_package.graph, lib, dev)
+            # TODO(gromero): Adjust for micro targets.
+            if profile:
+                logger.debug("Creating runtime with profiling enabled.")
+                module = debug_executor.create(tvmc_package.graph, lib, dev, dump_root="./prof")
             else:
-                logger.debug("Creating runtime with profiling disabled.")
-                module = runtime.create(tvmc_package.graph, lib, dev)
+                if device == "micro":
+                    logger.debug("Creating runtime (micro) with profiling disabled.")
+                    module = tvm.micro.create_local_graph_executor(tvmc_package.graph, lib, dev)
+                else:
+                    logger.debug("Creating runtime with profiling disabled.")
+                    module = graph_executor.create(tvmc_package.graph, lib, dev)
 
-        logger.debug("Loading params into the runtime module.")
-        module.load_params(tvmc_package.params)
+            logger.debug("Loading params into the runtime module.")
+            module.load_params(tvmc_package.params)
 
-        logger.debug("Collecting graph input shape and type:")
-        shape_dict, dtype_dict = module.get_input_info()
-        logger.debug("Graph input shape: %s", shape_dict)
-        logger.debug("Graph input type: %s", dtype_dict)
+            logger.debug("Collecting graph input shape and type:")
+            shape_dict, dtype_dict = module.get_input_info()
+            logger.debug("Graph input shape: %s", shape_dict)
+            logger.debug("Graph input type: %s", dtype_dict)
 
-        inputs_dict = make_inputs_dict(shape_dict, dtype_dict, inputs, fill_mode)
+            inputs_dict = make_inputs_dict(shape_dict, dtype_dict, inputs, fill_mode)
 
-        logger.debug("Setting inputs to the module.")
-        module.set_input(**inputs_dict)
+            logger.debug("Setting inputs to the module.")
+            module.set_input(**inputs_dict)
 
-        # Run must be called explicitly if profiling
-        if profile:
-            logger.info("Running the module with profiling enabled.")
-            report = module.profile()
-            # This print is intentional
-            print(report)
+            # Run must be called explicitly if profiling
+            if profile:
+                logger.info("Running the module with profiling enabled.")
+                report = module.profile()
+                # This print is intentional
+                print(report)
 
-        if device == "micro":
-            # TODO(gromero): Fix time_evaluator() for micro targets. Once it's
-            # fixed module.benchmark() can be used instead and this if/else can
-            # be removed.
-            module.run()
-            times = []
-        else:
-            # Call the benchmarking function of the executor.
-            # Optionally measure e2e data transfers from the
-            # CPU to device memory overheads (e.g. PCIE
-            # overheads if the device is a discrete GPU).
-            if end_to_end:
-                dev = session.cpu()
-            times = module.benchmark(dev, number=number, repeat=repeat, end_to_end=end_to_end)
-
-        logger.debug("Collecting the output tensors.")
-        num_outputs = module.get_num_outputs()
-        outputs = {}
-        for i in range(num_outputs):
-            output_name = "output_{}".format(i)
-            outputs[output_name] = module.get_output(i).numpy()
+            if device == "micro":
+                # TODO(gromero): Fix time_evaluator() for micro targets. Once it's
+                # fixed module.benchmark() can be used instead and this if/else can
+                # be removed.
+                module.run()
+                times = []
+            else:
+                # Call the benchmarking function of the executor.
+                # Optionally measure e2e data transfers from the
+                # CPU to device memory overheads (e.g. PCIE
+                # overheads if the device is a discrete GPU).
+                if end_to_end:
+                    dev = session.cpu()
+                times = module.benchmark(dev, number=number, repeat=repeat, end_to_end=end_to_end)
+
+            logger.debug("Collecting the output tensors.")
+            num_outputs = module.get_num_outputs()
+            outputs = {}
+            for i in range(num_outputs):
+                output_name = "output_{}".format(i)
+                outputs[output_name] = module.get_output(i).numpy()
 
         return TVMCResult(outputs, times)