From 2d66f23bdc8e8a7cfbf8d36a4f400f39f6004807 Mon Sep 17 00:00:00 2001 From: Logan Weber <36520469+weberlo@users.noreply.github.com> Date: Thu, 25 Jul 2019 10:12:57 -0700 Subject: [PATCH] Implementation of uTVM (#3227) * uTVM interfaces (#14) * some minor interface changes * implemented HostLowLevelDevice * added MicroDeviceAPI * implemented micro_common and added Python interfaces * current status, semi implemented micro session * added micro_common implementation and python interfaces (#18) * added micro_common implementation and python interfaces (#18) * current status, semi implemented * host test working * updated interfaces for MicroSession arguments allocation * make somewhat lint compatible * fix based on comments * added rounding macro * fix minor bug * improvements based on comments * Clean up `binutil.py` and make Python-3-compatible * Change argument allocation design * Address feedback and lint errors * Improve binutil tests * Simplify allocator (per @tqchen's suggestions) * Doc/style fixes * farts * mcgee * rodata section werks (and so does `test_runtime_micro_workspace.py`) * simple graph runtime werk * TEMP * ResNet works, yo * First round of cleanup * More cleanup * runs a dyson over the code * Another pass * Fix `make lint` issues * ready to pr... probably * final * Undo change * Fix rebase resolution * Minor fixes * Undo changes to C codegen tests * Add `obj_path` in `create_micro_lib` * TEMP * Address feedback * Add missing TODO * Partially address feedback * Fix headers * Switch to enum class for `SectionKind` * Add missing ASF header * Fix lint * Fix lint again * Fix lint * Kill lint warnings * Address feedback * Change Python interface to MicroTVM All interaction with the device is now through `Session` objects, which are used through Python's `with` blocks. * Reorder LowLevelDevice interface * Store shared ptr to session in all alloced objects * Move helper functions out of `tvm.micro` * Switch static char arr to vector * Improve general infra and code quality Does not yet address all of tqchen's feedback * Forgot a rename * Fix lint * Add ASF header * Fix lint * Partially address MarisaKirisame's feedback * Lint * Expose `MicroSession` as a node to Python * Revert to using `Session` constructor * Fix compiler error * (Maybe) fix CI error * Debugging * Remove * Quell lint * Switch to stack-based session contexts * Make uTVM less intrusive to host codegen And use SSA for operands of generated ternary operators * Inline UTVMArgs into UTVMTask struct * Remove `HostLowLevelDevice` header * Remove `BaseAddr` class * Address feedback * Add "utvm" prefix to global vars in runtime * Fix lint * Fix CI * Fix `test_binutil.py` * Fix submodules * Remove ResNet tests * Make `test_binutil.py` work with nose * Fix CI * I swear this actually fixes the binutil tests * lint * lint * Add fcompile-compatible cross-compile func * Add docs for uTVM runtime files * Move pointer patching into `MicroSession` * Fix lint * First attempt at unifying cross-compile APIs * Fix lint * Rename `cross_compile` back to `cc` * Address feedback * Remove commented code * Lint * Figure out failing function * Remove debugging code * Change "micro_dev" target to "micro" * Add checks in tests for whether uTVM is enabled * Add TODO for 32-bit support * Rename more "micro_dev" to "micro" * Undo rename We already have `tvm.micro` as a namespace. Can't have it as a method as well. * Fix failing CI Thanks to @tqchen for finding this bug. Emitting ternary operators for `min` and `max` causes concurrency bugs in CUDA, so we're moving the ternary op emissions from `CodeGenC` to `CodeGenCHost`. * Address feedback * Fix lint --- CMakeLists.txt | 2 + cmake/config.cmake | 3 + cmake/modules/Micro.cmake | 22 + include/tvm/runtime/c_runtime_api.h | 1 + include/tvm/runtime/device_api.h | 1 + python/tvm/__init__.py | 2 +- python/tvm/_ffi/runtime_ctypes.py | 2 + python/tvm/contrib/binutil.py | 258 ++++++++++++ python/tvm/contrib/cc.py | 53 +-- python/tvm/micro/__init__.py | 9 + python/tvm/micro/base.py | 204 ++++++++++ python/tvm/ndarray.py | 16 + src/api/api_pass.cc | 4 +- src/codegen/codegen_c.cc | 12 +- src/codegen/codegen_c.h | 4 +- src/codegen/codegen_c_host.cc | 34 +- src/codegen/codegen_c_host.h | 24 +- .../micro/device/utvm_device_dylib_redirect.c | 57 +++ src/runtime/micro/device/utvm_runtime.c | 102 +++++ src/runtime/micro/device/utvm_runtime.h | 53 +++ src/runtime/micro/host_low_level_device.cc | 98 +++++ src/runtime/micro/low_level_device.h | 107 +++++ src/runtime/micro/micro_common.cc | 136 +++++++ src/runtime/micro/micro_common.h | 314 +++++++++++++++ src/runtime/micro/micro_device_api.cc | 173 ++++++++ src/runtime/micro/micro_module.cc | 126 ++++++ src/runtime/micro/micro_section_allocator.h | 122 ++++++ src/runtime/micro/micro_session.cc | 381 ++++++++++++++++++ src/runtime/micro/micro_session.h | 252 ++++++++++++ .../micro/target_data_layout_encoder.h | 190 +++++++++ src/runtime/module.cc | 6 +- tests/python/contrib/test_binutil.py | 151 +++++++ tests/python/unittest/test_codegen_c_host.py | 25 -- .../unittest/test_codegen_c_host_fadd.py | 140 +++++++ tests/python/unittest/test_runtime_micro.py | 306 ++++++++++++++ topi/python/topi/generic/nn.py | 2 +- topi/python/topi/testing/pool_grad_python.py | 2 +- 37 files changed, 3321 insertions(+), 73 deletions(-) create mode 100644 cmake/modules/Micro.cmake create mode 100644 python/tvm/contrib/binutil.py create mode 100644 python/tvm/micro/__init__.py create mode 100644 python/tvm/micro/base.py create mode 100644 src/runtime/micro/device/utvm_device_dylib_redirect.c create mode 100644 src/runtime/micro/device/utvm_runtime.c create mode 100644 src/runtime/micro/device/utvm_runtime.h create mode 100644 src/runtime/micro/host_low_level_device.cc create mode 100644 src/runtime/micro/low_level_device.h create mode 100644 src/runtime/micro/micro_common.cc create mode 100644 src/runtime/micro/micro_common.h create mode 100644 src/runtime/micro/micro_device_api.cc create mode 100644 src/runtime/micro/micro_module.cc create mode 100644 src/runtime/micro/micro_section_allocator.h create mode 100644 src/runtime/micro/micro_session.cc create mode 100644 src/runtime/micro/micro_session.h create mode 100644 src/runtime/micro/target_data_layout_encoder.h create mode 100644 tests/python/contrib/test_binutil.py create mode 100644 tests/python/unittest/test_codegen_c_host_fadd.py create mode 100644 tests/python/unittest/test_runtime_micro.py diff --git a/CMakeLists.txt b/CMakeLists.txt index df04da06022b..9efce53c985a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,6 +36,7 @@ tvm_option(USE_RELAY_DEBUG "Building Relay in debug mode..." OFF) tvm_option(USE_SGX "Build with SGX" OFF) tvm_option(USE_RTTI "Build with RTTI" ON) tvm_option(USE_MSVC_MT "Build with MT" OFF) +tvm_option(USE_MICRO "Build with Micro" OFF) tvm_option(INSTALL_DEV "Install compiler infrastructure" OFF) tvm_option(HIDE_PRIVATE_SYMBOLS "Compile with -fvisibility=hidden." OFF) @@ -208,6 +209,7 @@ include(cmake/modules/Metal.cmake) include(cmake/modules/ROCM.cmake) include(cmake/modules/SGX.cmake) include(cmake/modules/LLVM.cmake) +include(cmake/modules/Micro.cmake) include(cmake/modules/ANTLR.cmake) include(cmake/modules/contrib/BLAS.cmake) include(cmake/modules/contrib/Random.cmake) diff --git a/cmake/config.cmake b/cmake/config.cmake index 679de8d7e752..e2d49ddc3dfb 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -62,6 +62,9 @@ set(USE_VULKAN OFF) # Whether enable OpenGL runtime set(USE_OPENGL OFF) +# Whether enable MicroTVM runtime +set(USE_MICRO OFF) + # Whether to enable SGX runtime # # Possible values for USE_SGX: diff --git a/cmake/modules/Micro.cmake b/cmake/modules/Micro.cmake new file mode 100644 index 000000000000..edb5063fe68c --- /dev/null +++ b/cmake/modules/Micro.cmake @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +if(USE_MICRO) + message(STATUS "Build with Micro support") + file(GLOB RUNTIME_MICRO_SRCS src/runtime/micro/*.cc) + list(APPEND RUNTIME_SRCS ${RUNTIME_MICRO_SRCS}) +endif(USE_MICRO) diff --git a/include/tvm/runtime/c_runtime_api.h b/include/tvm/runtime/c_runtime_api.h index 2ae8e3afee1d..54e6f98e8ee5 100644 --- a/include/tvm/runtime/c_runtime_api.h +++ b/include/tvm/runtime/c_runtime_api.h @@ -81,6 +81,7 @@ typedef enum { kDLAOCL = 5, kDLSDAccel = 6, kOpenGL = 11, + kDLMicroDev = 13, // AddExtraTVMType which is not in DLPack here } TVMDeviceExtType; diff --git a/include/tvm/runtime/device_api.h b/include/tvm/runtime/device_api.h index 6986e62475fd..68029c13cb93 100644 --- a/include/tvm/runtime/device_api.h +++ b/include/tvm/runtime/device_api.h @@ -215,6 +215,7 @@ inline const char* DeviceName(int type) { case kDLROCM: return "rocm"; case kOpenGL: return "opengl"; case kDLExtDev: return "ext_dev"; + case kDLMicroDev: return "micro_dev"; default: LOG(FATAL) << "unknown type =" << type; return "Unknown"; } } diff --git a/python/tvm/__init__.py b/python/tvm/__init__.py index 5765eed0ad8b..56b8b3d9d298 100644 --- a/python/tvm/__init__.py +++ b/python/tvm/__init__.py @@ -42,7 +42,7 @@ from . import ndarray as nd from .ndarray import context, cpu, gpu, opencl, cl, vulkan, metal, mtl -from .ndarray import vpi, rocm, opengl, ext_dev +from .ndarray import vpi, rocm, opengl, ext_dev, micro_dev from ._ffi.runtime_ctypes import TypeCode, TVMType from ._ffi.ndarray import TVMContext diff --git a/python/tvm/_ffi/runtime_ctypes.py b/python/tvm/_ffi/runtime_ctypes.py index 54e0b8c85fdb..0d28abd46cb2 100644 --- a/python/tvm/_ffi/runtime_ctypes.py +++ b/python/tvm/_ffi/runtime_ctypes.py @@ -143,6 +143,7 @@ class TVMContext(ctypes.Structure): 10: 'rocm', 11: 'opengl', 12: 'ext_dev', + 13: 'micro_dev', } STR2MASK = { 'llvm': 1, @@ -163,6 +164,7 @@ class TVMContext(ctypes.Structure): 'rocm': 10, 'opengl': 11, 'ext_dev': 12, + 'micro_dev': 13, } def __init__(self, device_type, device_id): super(TVMContext, self).__init__() diff --git a/python/tvm/contrib/binutil.py b/python/tvm/contrib/binutil.py new file mode 100644 index 000000000000..a444cdc0495e --- /dev/null +++ b/python/tvm/contrib/binutil.py @@ -0,0 +1,258 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Utilities for binary file manipulation""" +import os +import subprocess +from . import util +from .._ffi.base import py_str +from ..api import register_func + +@register_func("tvm_callback_get_section_size") +def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix): + """Finds size of the section in the binary. + Assumes `size` shell command exists (typically works only on Linux machines) + + Parameters + ---------- + binary_path : str + path of the binary file + + section_name : str + name of section + + toolchain_prefix : str + prefix for binary names in target compiler toolchain + + Returns + ------- + size : integer + size of the section in bytes + """ + if not os.path.isfile(binary_path): + raise RuntimeError("no such file \"{}\"".format(binary_path)) + # We use the "-A" flag here to get the ".rodata" section's size, which is + # not included by default. + size_proc = subprocess.Popen( + ["{}size".format(toolchain_prefix), "-A", binary_path], stdout=subprocess.PIPE) + (size_output, _) = size_proc.communicate() + size_output = size_output.decode("utf-8") + if size_proc.returncode != 0: + msg = "error in finding section size:\n" + msg += py_str(out) + raise RuntimeError(msg) + + # TODO(weberlo): Refactor this method and `*relocate_binary` so they are + # both aware of [".bss", ".sbss", ".sdata"] being relocated to ".bss". + section_mapping = { + ".text": [".text"], + ".rodata": [".rodata"], + ".data": [".data", ".sdata"], + ".bss": [".bss", ".sbss"], + } + sections_to_sum = section_mapping["." + section_name] + section_size = 0 + # Skip the first two header lines in the `size` output. + for line in size_output.split("\n")[2:]: + tokens = list(filter(lambda s: len(s) != 0, line.split(" "))) + if len(tokens) != 3: + continue + entry_name = tokens[0] + entry_size = int(tokens[1]) + if entry_name in sections_to_sum: + section_size += entry_size + return section_size + + +@register_func("tvm_callback_relocate_binary") +def tvm_callback_relocate_binary( + binary_path, text_addr, rodata_addr, data_addr, bss_addr, toolchain_prefix): + """Relocates sections in the binary to new addresses + + Parameters + ---------- + binary_path : str + path of the binary file + + text_addr : str + text section absolute address + + rodata_addr : str + rodata section absolute address + + data_addr : str + data section absolute address + + bss_addr : str + bss section absolute address + + toolchain_prefix : str + prefix for binary names in target compiler toolchain + + Returns + ------- + rel_bin : bytearray + the relocated binary + """ + tmp_dir = util.tempdir() + rel_obj_path = tmp_dir.relpath("relocated.o") + ld_script_contents = "" + # TODO(weberlo): There should be a better way to configure this for different archs. + if "riscv" in toolchain_prefix: + ld_script_contents += "OUTPUT_ARCH( \"riscv\" )\n\n" + # TODO(weberlo): Generate the script in a more procedural manner. + ld_script_contents += """ +SECTIONS +{ + . = %s; + . = ALIGN(8); + .text : + { + *(.text) + . = ALIGN(8); + *(.text*) + } + . = %s; + . = ALIGN(8); + .rodata : + { + *(.rodata) + . = ALIGN(8); + *(.rodata*) + } + . = %s; + . = ALIGN(8); + .data : + { + *(.data) + . = ALIGN(8); + *(.data*) + . = ALIGN(8); + *(.sdata) + } + . = %s; + . = ALIGN(8); + .bss : + { + *(.bss) + . = ALIGN(8); + *(.bss*) + . = ALIGN(8); + *(.sbss) + } +} + """ % (text_addr, rodata_addr, data_addr, bss_addr) + rel_ld_script_path = tmp_dir.relpath("relocated.lds") + with open(rel_ld_script_path, "w") as f: + f.write(ld_script_contents) + ld_proc = subprocess.Popen(["{}ld".format(toolchain_prefix), binary_path, + "-T", rel_ld_script_path, + "-o", rel_obj_path], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + (out, _) = ld_proc.communicate() + if ld_proc.returncode != 0: + msg = "linking error using ld:\n" + msg += py_str(out) + raise RuntimeError(msg) + with open(rel_obj_path, "rb") as f: + rel_bin = bytearray(f.read()) + return rel_bin + + +@register_func("tvm_callback_read_binary_section") +def tvm_callback_read_binary_section(binary, section, toolchain_prefix): + """Returns the contents of the specified section in the binary byte array + + Parameters + ---------- + binary : bytearray + contents of the binary + + section : str + type of section + + toolchain_prefix : str + prefix for binary names in target compiler toolchain + + Returns + ------- + section_bin : bytearray + contents of the read section + """ + tmp_dir = util.tempdir() + tmp_bin = tmp_dir.relpath("temp.bin") + tmp_section = tmp_dir.relpath("tmp_section.bin") + with open(tmp_bin, "wb") as out_file: + out_file.write(bytes(binary)) + objcopy_proc = subprocess.Popen(["{}objcopy".format(toolchain_prefix), "--dump-section", + ".{}={}".format(section, tmp_section), + tmp_bin], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + (out, _) = objcopy_proc.communicate() + if objcopy_proc.returncode != 0: + msg = "error in using objcopy:\n" + msg += py_str(out) + raise RuntimeError(msg) + if os.path.isfile(tmp_section): + # Get section content if it exists. + with open(tmp_section, "rb") as f: + section_bin = bytearray(f.read()) + else: + # Return empty bytearray if the section does not exist. + section_bin = bytearray("", "utf-8") + return section_bin + + +@register_func("tvm_callback_get_symbol_map") +def tvm_callback_get_symbol_map(binary, toolchain_prefix): + """Obtains a map of symbols to addresses in the passed binary + + Parameters + ---------- + binary : bytearray + contents of the binary + + toolchain_prefix : str + prefix for binary names in target compiler toolchain + + Returns + ------- + map_str : str + map of defined symbols to addresses, encoded as a series of + alternating newline-separated keys and values + """ + tmp_dir = util.tempdir() + tmp_obj = tmp_dir.relpath("tmp_obj.bin") + with open(tmp_obj, "wb") as out_file: + out_file.write(bytes(binary)) + nm_proc = subprocess.Popen(["{}nm".format(toolchain_prefix), "-C", "--defined-only", tmp_obj], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + (nm_output, _) = nm_proc.communicate() + if nm_proc.returncode != 0: + msg = "error in using nm:\n" + msg += py_str(nm_output) + raise RuntimeError(msg) + nm_output = nm_output.decode("utf8").splitlines() + map_str = "" + for line in nm_output: + line = line.split() + map_str += line[2] + "\n" + map_str += line[0] + "\n" + return map_str diff --git a/python/tvm/contrib/cc.py b/python/tvm/contrib/cc.py index 26ac672880a9..43d653255ded 100644 --- a/python/tvm/contrib/cc.py +++ b/python/tvm/contrib/cc.py @@ -14,7 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -"""Util to invoke c++ compilers in the system.""" +"""Util to invoke C/C++ compilers in the system.""" # pylint: disable=invalid-name from __future__ import absolute_import as _abs import sys @@ -24,11 +24,10 @@ from .._ffi.base import py_str from .util import tempdir - def create_shared(output, objects, options=None, - cc="g++"): + compile_cmd="g++"): """Create shared library. Parameters @@ -36,17 +35,17 @@ def create_shared(output, output : str The target shared library. - objects : list + objects : List[str] List of object files. - options : list + options : List[str] The list of additional options string. - cc : str, optional - The compile string. + compile_cmd : Optional[str] + The compiler command. """ if sys.platform == "darwin" or sys.platform.startswith("linux"): - _linux_shared(output, objects, options, cc) + _linux_compile(output, objects, options, compile_cmd) elif sys.platform == "win32": _windows_shared(output, objects, options) else: @@ -56,40 +55,44 @@ def create_shared(output, # assign so as default output format create_shared.output_format = "so" if sys.platform != "win32" else "dll" - -def cross_compiler(cc, options=None, output_format="so"): +def cross_compiler(compile_func, base_options=None, output_format="so"): """Create a cross compiler function. Parameters ---------- - cc : str - The cross compiler name. + compile_func : Callable[[str, str, Optional[str]], None] + Function that performs the actual compilation - options : list, optional + options : Optional[List[str]] List of additional optional string. - output_format : str, optional + output_format : Optional[str] Library output format. Returns ------- - fcompile : function + fcompile : Callable[[str, str, Optional[str]], None] A compilation function that can be passed to export_library. """ - def _fcompile(outputs, objects, opts=None): - opts = opts if opts else [] - if options: - opts += options - _linux_shared(outputs, objects, opts, cc=cc) + if base_options is None: + base_options = [] + def _fcompile(outputs, objects, options=None): + all_options = base_options + if options is not None: + all_options += options + compile_func(outputs, objects, options=all_options) _fcompile.output_format = output_format return _fcompile -def _linux_shared(output, objects, options, cc="g++"): - cmd = [cc] - cmd += ["-shared", "-fPIC"] - if sys.platform == "darwin": - cmd += ["-undefined", "dynamic_lookup"] +def _linux_compile(output, objects, options, compile_cmd="g++"): + cmd = [compile_cmd] + if output.endswith(".so") or output.endswith(".dylib"): + cmd += ["-shared", "-fPIC"] + if sys.platform == "darwin": + cmd += ["-undefined", "dynamic_lookup"] + elif output.endswith(".obj"): + cmd += ["-c"] cmd += ["-o", output] if isinstance(objects, str): cmd += [objects] diff --git a/python/tvm/micro/__init__.py b/python/tvm/micro/__init__.py new file mode 100644 index 000000000000..d69edfa1cbaf --- /dev/null +++ b/python/tvm/micro/__init__.py @@ -0,0 +1,9 @@ +"""uTVM module for bare-metal backends. + +uTVM (or the micro backend) enables provides support for bare-metal devices. +Its targets currently include a host-emulated device which is used for testing, +and JTAG-based openocd device which allows actual interfacing with microdevices. +""" + +from ..contrib import binutil +from .base import Session, cross_compiler, create_micro_lib diff --git a/python/tvm/micro/base.py b/python/tvm/micro/base.py new file mode 100644 index 000000000000..7cb13c4fa2f5 --- /dev/null +++ b/python/tvm/micro/base.py @@ -0,0 +1,204 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Base definitions for micro.""" + +from __future__ import absolute_import + +import logging +import os +import sys + +from tvm.contrib import util as _util +from tvm.contrib import cc as _cc + +from .._ffi.function import _init_api +from .._ffi.libinfo import find_include_path + +SUPPORTED_DEVICE_TYPES = ["host"] + +class Session: + """MicroTVM Device Session + + Parameters + ---------- + device_type : str + type of low-level device + + toolchain_prefix : str + toolchain prefix to be used. For example, a prefix of + "riscv64-unknown-elf-" means "riscv64-unknown-elf-gcc" is used as + the compiler and "riscv64-unknown-elf-ld" is used as the linker, + etc. + + Example + -------- + .. code-block:: python + + c_mod = ... # some module generated with "c" as the target + device_type = "host" + with tvm.micro.Session(device_type) as sess: + sess.create_micro_mod(c_mod) + """ + + def __init__(self, device_type, toolchain_prefix): + if device_type not in SUPPORTED_DEVICE_TYPES: + raise RuntimeError("unknown micro device type \"{}\"".format(device_type)) + self._check_system() + + # First, find and compile runtime library. + runtime_src_path = os.path.join(_get_micro_device_dir(), "utvm_runtime.c") + tmp_dir = _util.tempdir() + runtime_obj_path = tmp_dir.relpath("utvm_runtime.obj") + create_micro_lib( + runtime_obj_path, runtime_src_path, toolchain_prefix, include_dev_lib_header=False) + + self.module = _CreateSession(device_type, runtime_obj_path, toolchain_prefix) + self._enter = self.module["enter"] + self._exit = self.module["exit"] + + def _check_system(self): + """Check if the user's system is supported by MicroTVM. + + Raises error if not supported. + """ + if not sys.platform.startswith("linux"): + raise RuntimeError("microTVM is currently only supported on Linux") + # TODO(weberlo): Add 32-bit support. + # It's primarily the compilation pipeline that isn't compatible. + if sys.maxsize <= 2**32: + raise RuntimeError("microTVM is currently only supported on 64-bit platforms") + + def __enter__(self): + self._enter() + + def __exit__(self, exc_type, exc_value, exc_traceback): + self._exit() + + +def _get_micro_device_dir(): + """Get directory path for uTVM runtime source files. + + Return + ------ + micro_device_dir : str + directory path + """ + micro_dir = os.path.dirname(os.path.realpath(os.path.expanduser(__file__))) + micro_device_dir = os.path.join(micro_dir, "..", "..", "..", + "src", "runtime", "micro", "device") + return micro_device_dir + + +def cross_compiler(toolchain_prefix, include_dev_lib_header=True): + """Creates a cross compile function that wraps `create_micro_lib`. + + For use in `tvm.module.Module.export_library`. + + Parameters + ---------- + toolchain_prefix : str + toolchain prefix to be used + + include_dev_lib_header : Optional[bool] + whether to include the device library header containing definitions of + library functions. + + Return + ------ + func : Callable[[str, str, Optional[str]], None] + cross compile function taking a destination path for the object file + and a path for the input source file. + + Example + -------- + .. code-block:: python + + c_mod = ... # some module generated with "c" as the target + fcompile = tvm.micro.cross_compiler(toolchain_prefix="") + c_mod.export_library("dev_lib.obj", fcompile=fcompile) + """ + def compile_func(obj_path, src_path, **kwargs): + if isinstance(obj_path, list): + obj_path = obj_path[0] + if isinstance(src_path, list): + src_path = src_path[0] + create_micro_lib(obj_path, src_path, toolchain_prefix, + kwargs.get("options", None), include_dev_lib_header) + return _cc.cross_compiler(compile_func) + + +def create_micro_lib( + obj_path, src_path, toolchain_prefix, options=None, include_dev_lib_header=True): + """Compiles code into a binary for the target micro device. + + Parameters + ---------- + obj_path : Optional[str] + path to generated object file (defaults to same directory as `src_path`) + + src_path : str + path to source file + + toolchain_prefix : str + toolchain prefix to be used + + include_dev_lib_header : bool + whether to include the device library header containing definitions of + library functions. + """ + def replace_suffix(s, new_suffix): + if "." in os.path.basename(s): + # There already exists an extension. + return os.path.join( + os.path.dirname(s), + ".".join(os.path.basename(s).split(".")[:-1] + [new_suffix])) + # No existing extension; we can just append. + return s + "." + new_suffix + + # uTVM object files cannot have an ".o" suffix, because it triggers the + # code path for creating shared objects in `tvm.module.load`. So we replace + # ".o" suffixes with ".obj". + if obj_path.endswith(".o"): + logging.warning( + "\".o\" suffix in \"%s\" has been replaced with \".obj\"", obj_path) + obj_path = replace_suffix(obj_path, "obj") + + options = ["-I" + path for path in find_include_path()] + options += ["-I{}".format(_get_micro_device_dir())] + options += ["-fno-stack-protector"] + if sys.maxsize > 2**32 and sys.platform.startswith("linux"): + # Only add this option if the host is a 64-bit Linux. + options += ["-mcmodel=large"] + compile_cmd = "{}gcc".format(toolchain_prefix) + + if include_dev_lib_header: + # Create a temporary copy of the source, so we can inject the dev lib + # header without modifying the original. + tmp_dir = _util.tempdir() + temp_src_path = tmp_dir.relpath("temp.c") + with open(src_path, "r") as f: + src_lines = f.read().splitlines() + src_lines.insert(0, "#include \"utvm_device_dylib_redirect.c\"") + with open(temp_src_path, "w") as f: + f.write("\n".join(src_lines)) + src_path = temp_src_path + + _cc.create_shared(obj_path, src_path, options, compile_cmd) + + +_init_api("tvm.micro", "tvm.micro.base") diff --git a/python/tvm/ndarray.py b/python/tvm/ndarray.py index c24a189b78d3..f9c7cc6c5403 100644 --- a/python/tvm/ndarray.py +++ b/python/tvm/ndarray.py @@ -190,6 +190,22 @@ def ext_dev(dev_id=0): return TVMContext(12, dev_id) +def micro_dev(dev_id=0): + """Construct a micro device + + Parameters + ---------- + dev_id : int, optional + The integer device id + + Returns + ------- + ctx : TVMContext + The created context + """ + return TVMContext(13, dev_id) + + cl = opencl mtl = metal diff --git a/src/api/api_pass.cc b/src/api/api_pass.cc index e5b003cafb87..5a81d6fb5e10 100644 --- a/src/api/api_pass.cc +++ b/src/api/api_pass.cc @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY diff --git a/src/codegen/codegen_c.cc b/src/codegen/codegen_c.cc index bbd28baea9b5..395d3f3178c6 100644 --- a/src/codegen/codegen_c.cc +++ b/src/codegen/codegen_c.cc @@ -443,7 +443,7 @@ inline void PrintBinaryExpr(const T* op, } } -inline void PrintBinaryIntrinsitc(const Call* op, +inline void PrintBinaryIntrinsic(const Call* op, const char *opstr, std::ostream& os, // NOLINT(*) CodeGenC* p) { @@ -528,20 +528,20 @@ void CodeGenC::VisitExpr_(const Call *op, std::ostream& os) { // NOLINT(*) } os << ")"; } else if (op->is_intrinsic(Call::bitwise_and)) { - PrintBinaryIntrinsitc(op, " & ", os, this); + PrintBinaryIntrinsic(op, " & ", os, this); } else if (op->is_intrinsic(Call::bitwise_xor)) { - PrintBinaryIntrinsitc(op, " ^ ", os, this); + PrintBinaryIntrinsic(op, " ^ ", os, this); } else if (op->is_intrinsic(Call::bitwise_or)) { - PrintBinaryIntrinsitc(op, " | ", os, this); + PrintBinaryIntrinsic(op, " | ", os, this); } else if (op->is_intrinsic(Call::bitwise_not)) { CHECK_EQ(op->args.size(), 1U); os << "(~"; this->PrintExpr(op->args[0], os); os << ')'; } else if (op->is_intrinsic(Call::shift_left)) { - PrintBinaryIntrinsitc(op, " << ", os, this); + PrintBinaryIntrinsic(op, " << ", os, this); } else if (op->is_intrinsic(Call::shift_right)) { - PrintBinaryIntrinsitc(op, " >> ", os, this); + PrintBinaryIntrinsic(op, " >> ", os, this); } else if (op->is_intrinsic(intrinsic::tvm_if_then_else)) { os << "("; PrintExpr(op->args[0], os); diff --git a/src/codegen/codegen_c.h b/src/codegen/codegen_c.h index a591b571f662..5e84cd945bc5 100644 --- a/src/codegen/codegen_c.h +++ b/src/codegen/codegen_c.h @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY diff --git a/src/codegen/codegen_c_host.cc b/src/codegen/codegen_c_host.cc index ca7b070a97c7..ef010ee050f2 100644 --- a/src/codegen/codegen_c_host.cc +++ b/src/codegen/codegen_c_host.cc @@ -31,13 +31,13 @@ namespace tvm { namespace codegen { CodeGenCHost::CodeGenCHost() { - module_name = GetUniqueName("__tvm_module_ctx"); + module_name_ = GetUniqueName("__tvm_module_ctx"); } void CodeGenCHost::Init(bool output_ssa) { decl_stream << "#include \"tvm/runtime/c_runtime_api.h\"\n"; decl_stream << "#include \"tvm/runtime/c_backend_api.h\"\n"; - decl_stream << "extern void* " << module_name << " = NULL;\n"; + decl_stream << "extern void* " << module_name_ << " = NULL;\n"; CodeGenC::Init(output_ssa); } @@ -154,12 +154,13 @@ void CodeGenCHost::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLI os << "))"; } -void CodeGenCHost::PrintGetFuncFromBackend(std::string func_name, std::string packed_func_name) { +void CodeGenCHost::PrintGetFuncFromBackend(const std::string& func_name, + const std::string& packed_func_name) { this->PrintIndent(); this->stream << "if (" << packed_func_name << " == NULL) {\n"; int packed_func_if_scope = this->BeginScope(); this->PrintIndent(); - this->stream << "if (TVMBackendGetFuncFromEnv(" << module_name + this->stream << "if (TVMBackendGetFuncFromEnv(" << module_name_ << ", \"" << func_name << "\"" << ", &" << packed_func_name << ") != 0) {\n"; int get_func_env_scope = this->BeginScope(); @@ -173,7 +174,7 @@ void CodeGenCHost::PrintGetFuncFromBackend(std::string func_name, std::string pa this->stream << "}\n"; } -void CodeGenCHost::PrintFuncCall(std::string packed_func_name, int num_args) { +void CodeGenCHost::PrintFuncCall(const std::string& packed_func_name, int num_args) { this->PrintIndent(); std::string ret_val = GetUniqueName("ret_val"); std::string ret_type_code = GetUniqueName("ret_type_code"); @@ -251,6 +252,29 @@ void CodeGenCHost::VisitStmt_(const AssertStmt *op) { // NOLINT(*) this->PrintStmt(op->body); } +void CodeGenCHost::VisitExpr_(const Min *op, std::ostream& os) { // NOLINT(*) + PrintTernaryCondExpr(op, "<", os); +} + +void CodeGenCHost::VisitExpr_(const Max *op, std::ostream& os) { // NOLINT(*) + PrintTernaryCondExpr(op, ">", os); +} + +template +inline void CodeGenCHost::PrintTernaryCondExpr(const T* op, + const char* compare, + std::ostream& os) { // NOLINT(*) + std::ostringstream temp_a; + VisitExpr(op->a, temp_a); + std::string a_id = SSAGetID(temp_a.str(), op->a.type()); + std::ostringstream temp_b; + VisitExpr(op->b, temp_b); + std::string b_id = SSAGetID(temp_b.str(), op->b.type()); + + os << "((" << a_id << ") " << compare << " (" << b_id << ") " + << "? (" << a_id << ") : (" << b_id << "))"; +} + runtime::Module BuildCHost(Array funcs) { using tvm::runtime::Registry; bool output_ssa = false; diff --git a/src/codegen/codegen_c_host.h b/src/codegen/codegen_c_host.h index 23ae185512e1..ad18383f98c4 100644 --- a/src/codegen/codegen_c_host.h +++ b/src/codegen/codegen_c_host.h @@ -45,12 +45,30 @@ class CodeGenCHost final : public CodeGenC { // overload visitor functions void VisitExpr_(const Broadcast* op, std::ostream& os) final; // NOLINT(*) void VisitExpr_(const Call *op, std::ostream& os) final; // NOLINT(*) + // overload min and max to use the ternary operator, so we don't rely on the + // standard library implementations + void VisitExpr_(const Min *op, std::ostream& os) final; // NOLINT(*) + void VisitExpr_(const Max *op, std::ostream& os) final; // NOLINT(*) + void VisitStmt_(const AssertStmt *op) final; // NOLINT(*) private: - std::string module_name; - void PrintGetFuncFromBackend(std::string func_name, std::string packed_func_name); - void PrintFuncCall(std::string packed_func_name, int num_args); + std::string module_name_; + + void PrintGetFuncFromBackend(const std::string& func_name, const std::string& packed_func_name); + void PrintFuncCall(const std::string& packed_func_name, int num_args); + + /*! + * \brief Print ternary conditional operator implementing binary `op` + * Forces the operands to be in SSA form. + * \param op binary operator being expressed + * \param compare string representation of comparison operator + * \param os stream reference to print into + */ + template + inline void PrintTernaryCondExpr(const T* op, + const char* compare, + std::ostream& os); // NOLINT(*) }; } // namespace codegen diff --git a/src/runtime/micro/device/utvm_device_dylib_redirect.c b/src/runtime/micro/device/utvm_device_dylib_redirect.c new file mode 100644 index 000000000000..7919afa37eb5 --- /dev/null +++ b/src/runtime/micro/device/utvm_device_dylib_redirect.c @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file utvm_device_dylib_redirect.cc + * \brief uTVM dynamic linking stubs + * + * This is a library that gets included in each uTVM library. We redirect + * each library call into a pre-defined global function pointer, and we patch + * the correct addresses of each function into the pointers when we load the + * library. + */ +#ifdef __cplusplus +extern "C" { +#endif +#include +#include + +void *(*TVMBackendAllocWorkspace_)(int, int, uint64_t, int, int) = + (void *(*)(int, int, uint64_t, int, int)) NULL; +int (*TVMBackendFreeWorkspace_)(int, int, void*) = (int (*)(int, int, void*)) NULL; +void (*TVMAPISetLastError_)(const char*) = (void (*)(const char*)) NULL; + +void* TVMBackendAllocWorkspace(int device_type, int device_id, uint64_t size, + int dtype_code_hint, int dtype_bits_hint) { + return (*TVMBackendAllocWorkspace_)(device_type, device_id, size, dtype_code_hint, + dtype_bits_hint); +} + +int TVMBackendFreeWorkspace(int device_type, int device_id, void* ptr) { + return (*TVMBackendFreeWorkspace_)(device_type, device_id, ptr); +} + +void TVMAPISetLastError(const char* msg) { + (*TVMAPISetLastError_)(msg); +} + +#ifdef __cplusplus +} // TVM_EXTERN_C +#endif diff --git a/src/runtime/micro/device/utvm_runtime.c b/src/runtime/micro/device/utvm_runtime.c new file mode 100644 index 000000000000..cdd8438da809 --- /dev/null +++ b/src/runtime/micro/device/utvm_runtime.c @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file utvm_runtime.cc + * \brief uTVM runtime + * + * All function calls go through `UTVMMain`, which reads from the current + * `UTVMTask` and calls the appropriate function with the arguments from the + * task. + * + * Additionally included in this file are definitions for some of the most + * common functions used in the C runtime API. + */ +#ifdef __cplusplus +extern "C" { +#endif + +#include "utvm_runtime.h" + +// Task pointers must be patched before calling a function. +UTVMTask task; + +// These pointers are patched at load time to point to the workspace section. +char* utvm_workspace_begin = NULL; // NOLINT(*) +char* utvm_workspace_end = NULL; // NOLINT(*) +char* utvm_workspace_curr = NULL; // NOLINT(*) +// Keep track of how many active allocations there are on the workspace. +size_t utvm_num_active_allocs = 0; + +const char* utvm_last_error = NULL; // NOLINT(*) +int32_t utvm_return_code = 0; // NOLINT(*) + +// We use a dummy function to signal execution is finished for device +// backends which require breakpoints. +void UTVMDone() { } + +void UTVMMain() { + utvm_workspace_curr = utvm_workspace_begin; + utvm_num_active_allocs = 0; + utvm_last_error = NULL; // NOLINT(*) + utvm_return_code = 0; + utvm_return_code = task.func((void*) task.arg_values, (void*) task.arg_type_codes, // NOLINT(*) + task.num_args); + UTVMDone(); +} + +void* TVMBackendAllocWorkspace(int device_type, int device_id, uint64_t size, + int dtype_code_hint, int dtype_bits_hint) { + // Align up to 8 bytes. + utvm_workspace_curr += (8 - ((uintptr_t) utvm_workspace_curr % 8)) % 8; // NOLINT(*) + if (utvm_workspace_curr + size > utvm_workspace_end) { + // Out of space in workspace. + return NULL; + } + void* ret_ptr = (void*) utvm_workspace_curr; // NOLINT(*) + utvm_workspace_curr += size; + utvm_num_active_allocs++; + return ret_ptr; +} + +int TVMBackendFreeWorkspace(int device_type, int device_id, void* ptr) { + utvm_num_active_allocs--; + if (utvm_num_active_allocs < 0) { + TVMAPISetLastError("free called with no active workspace allocations"); + // Reset allocations and workspace (for future task executions). + utvm_num_active_allocs = 0; + utvm_workspace_curr = utvm_workspace_begin; + return -1; + } else if (utvm_num_active_allocs == 0) { + // No more allocations. Reset workspace. + utvm_workspace_curr = utvm_workspace_begin; + return 0; + } else { + return 0; + } +} + +void TVMAPISetLastError(const char* msg) { + utvm_last_error = msg; +} + +#ifdef __cplusplus +} // TVM_EXTERN_C +#endif diff --git a/src/runtime/micro/device/utvm_runtime.h b/src/runtime/micro/device/utvm_runtime.h new file mode 100644 index 000000000000..526726d1e0d8 --- /dev/null +++ b/src/runtime/micro/device/utvm_runtime.h @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file utvm_runtime.h + * \brief uTVM runtime headers + */ +#ifndef TVM_RUNTIME_MICRO_DEVICE_UTVM_RUNTIME_H_ +#define TVM_RUNTIME_MICRO_DEVICE_UTVM_RUNTIME_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +/*! + * \brief Task structure for uTVM + */ +typedef struct { + /*! \brief Pointer to function to call for this task */ + int32_t (*func)(void*, void*, int32_t); + /*! \brief Array of argument values */ + TVMValue* arg_values; + /*! \brief Array of type codes for each argument value */ + int* arg_type_codes; + /*! \brief Number of arguments */ + int32_t num_args; +} UTVMTask; + +#ifdef __cplusplus +} // TVM_EXTERN_C +#endif + +#endif // TVM_RUNTIME_MICRO_DEVICE_UTVM_RUNTIME_H_ diff --git a/src/runtime/micro/host_low_level_device.cc b/src/runtime/micro/host_low_level_device.cc new file mode 100644 index 000000000000..3a034cffccf0 --- /dev/null +++ b/src/runtime/micro/host_low_level_device.cc @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file host_low_level_device.cc + * \brief emulated low-level micro device implementation on host machine + */ + +#include +#include +#include +#include "micro_common.h" +#include "low_level_device.h" + +namespace tvm { +namespace runtime { + +/*! + * \brief emulated low-level device on host machine + */ +class HostLowLevelDevice final : public LowLevelDevice { + public: + /*! + * \brief constructor to initialize on-host memory region to act as device + * \param num_bytes size of the emulated on-device memory region + */ + explicit HostLowLevelDevice(size_t num_bytes) : size_(num_bytes) { + size_t size_in_pages = (num_bytes + kPageSize - 1) / kPageSize; + // TODO(weberlo): Set permissions per section (e.g., read-write perms for + // the heap, execute perms for text, etc.). + int mmap_prot = PROT_READ | PROT_WRITE | PROT_EXEC; + int mmap_flags = MAP_ANONYMOUS | MAP_PRIVATE; + base_addr_ = reinterpret_cast( + mmap(nullptr, size_in_pages * kPageSize, mmap_prot, mmap_flags, -1, 0)); + } + + /*! + * \brief destructor to deallocate on-host device region + */ + virtual ~HostLowLevelDevice() { + munmap(reinterpret_cast(base_addr_), size_); + } + + void Read(DevBaseOffset offset, void* buf, size_t num_bytes) { + void* addr = ToDevPtr(offset).cast_to(); + std::memcpy(buf, addr, num_bytes); + } + + void Write(DevBaseOffset offset, const void* buf, size_t num_bytes) { + void* addr = ToDevPtr(offset).cast_to(); + std::memcpy(addr, buf, num_bytes); + } + + void Execute(DevBaseOffset func_offset, DevBaseOffset breakpoint) { + DevPtr func_addr = ToDevPtr(func_offset); + reinterpret_cast(func_addr.value())(); + } + + std::uintptr_t base_addr() const final { + return base_addr_; + } + + const char* device_type() const final { + return "host"; + } + + private: + /*! \brief base address of the micro device memory region */ + std::uintptr_t base_addr_; + /*! \brief size of memory region */ + size_t size_; +}; + +const std::shared_ptr HostLowLevelDeviceCreate(size_t num_bytes) { + std::shared_ptr lld = + std::make_shared(num_bytes); + return lld; +} + +} // namespace runtime +} // namespace tvm diff --git a/src/runtime/micro/low_level_device.h b/src/runtime/micro/low_level_device.h new file mode 100644 index 000000000000..a3b2e35ce16a --- /dev/null +++ b/src/runtime/micro/low_level_device.h @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file low_level_device.h + * \brief Abstract low-level micro device management + */ +#ifndef TVM_RUNTIME_MICRO_LOW_LEVEL_DEVICE_H_ +#define TVM_RUNTIME_MICRO_LOW_LEVEL_DEVICE_H_ + +#include + +#include "micro_common.h" + +namespace tvm { +namespace runtime { +/*! + * \brief virtual interface for low-level micro device management + */ +class LowLevelDevice { + public: + /*! \brief virtual destructor */ + virtual ~LowLevelDevice() {} + + /*! + * \brief reads num_bytes from device memory at base_addr + offset into buffer + * \param offset on-device memory offset pointer to be read from + * \param buffer on-host buffer to be read into + * \param num_bytes number of bytes to be read + */ + virtual void Read(DevBaseOffset offset, + void* buffer, + size_t num_bytes) = 0; + + /*! + * \brief writes num_bytes from buffer to device memory at base_addr + offset + * \param offset on-device memory offset pointer to be written to + * \param buffer on-host buffer to be written + * \param num_bytes number of bytes to be written + */ + virtual void Write(DevBaseOffset offset, + const void* buffer, + size_t num_bytes) = 0; + + /*! + * \brief starts execution of device at offset + * \param func_addr offset of the init stub function + * \param breakpoint breakpoint at which to stop function execution + */ + virtual void Execute(DevBaseOffset func_offset, DevBaseOffset breakpoint) = 0; + + /*! + * \brief convert from base offset to absolute address + * \param offset base offset + */ + DevPtr ToDevPtr(DevBaseOffset offset) { + return DevPtr(base_addr() + offset.value()); + } + + /*! + * \brief convert from absolute address to base offset + * \param ptr absolute address + */ + DevBaseOffset ToDevOffset(DevPtr ptr) { + return DevBaseOffset(ptr.value() - base_addr()); + } + + /*! + * \brief getter function for low-level device type + * \return string containing device type + */ + virtual const char* device_type() const = 0; + + protected: + /*! + * \brief getter function for base_addr + * \return the base address of the device memory region + */ + virtual std::uintptr_t base_addr() const = 0; +}; + +/*! + * \brief create a host low-level device + * \param num_bytes size of the memory region + */ +const std::shared_ptr HostLowLevelDeviceCreate(size_t num_bytes); + +} // namespace runtime +} // namespace tvm +#endif // TVM_RUNTIME_MICRO_LOW_LEVEL_DEVICE_H_ diff --git a/src/runtime/micro/micro_common.cc b/src/runtime/micro/micro_common.cc new file mode 100644 index 000000000000..459d00d419c7 --- /dev/null +++ b/src/runtime/micro/micro_common.cc @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file micro_common.cc + * \brief common utilties for uTVM + */ + +#include +#include +#include +#include +#include +#include +#include "micro_session.h" +#include "micro_common.h" +#include "low_level_device.h" + +namespace tvm { +namespace runtime { + +size_t GetDefaultSectionSize(SectionKind kind) { + switch (kind) { + case SectionKind::kText: + return 0xF0000; + case SectionKind::kRodata: + return 0xF000; + case SectionKind::kData: + return 0xF00; + case SectionKind::kBss: + return 0xF00; + case SectionKind::kArgs: + return 0xF00000; + case SectionKind::kStack: + return 0xF000; + case SectionKind::kHeap: + return 0xF000000; + case SectionKind::kWorkspace: + return 0xF000000; + default: + LOG(FATAL) << "invalid section " << static_cast(kind); + return 0; + } +} + +const char* SectionToString(SectionKind section) { + switch (section) { + case SectionKind::kText: return "text"; + case SectionKind::kRodata: return "rodata"; + case SectionKind::kData: return "data"; + case SectionKind::kBss: return "bss"; + case SectionKind::kArgs: return "args"; + case SectionKind::kStack: return "stack"; + case SectionKind::kHeap: return "heap"; + case SectionKind::kWorkspace: return "workspace"; + default: return ""; + } +} + +static std::string AddrToString(void* addr) { + std::stringstream stream; + if (addr != nullptr) + stream << addr; + else + stream << "0x0"; + std::string string_addr = stream.str(); + return string_addr; +} + +std::string RelocateBinarySections(const std::string& binary_path, + DevPtr text, + DevPtr rodata, + DevPtr data, + DevPtr bss, + const std::string& toolchain_prefix) { + const auto* f = Registry::Get("tvm_callback_relocate_binary"); + CHECK(f != nullptr) + << "Require tvm_callback_relocate_binary to exist in registry"; + std::string relocated_bin = (*f)(binary_path, + AddrToString(text.cast_to()), + AddrToString(rodata.cast_to()), + AddrToString(data.cast_to()), + AddrToString(bss.cast_to()), + toolchain_prefix); + return relocated_bin; +} + +std::string ReadSection(const std::string& binary, + SectionKind section, + const std::string& toolchain_prefix) { + CHECK(section == SectionKind::kText || section == SectionKind::kRodata || + section == SectionKind::kData || section == SectionKind::kBss) + << "ReadSection requires section to be one of text, rodata, data, or bss."; + const auto* f = Registry::Get("tvm_callback_read_binary_section"); + CHECK(f != nullptr) + << "Require tvm_callback_read_binary_section to exist in registry"; + TVMByteArray arr; + arr.data = &binary[0]; + arr.size = binary.length(); + std::string section_contents = (*f)(arr, SectionToString(section), toolchain_prefix); + return section_contents; +} + +size_t GetSectionSize(const std::string& binary_path, + SectionKind section, + const std::string& toolchain_prefix, + size_t align) { + CHECK(section == SectionKind::kText || section == SectionKind::kRodata || + section == SectionKind::kData || section == SectionKind::kBss) + << "GetSectionSize requires section to be one of text, rodata, data, or bss."; + const auto* f = Registry::Get("tvm_callback_get_section_size"); + CHECK(f != nullptr) + << "Require tvm_callback_get_section_size to exist in registry"; + int size = (*f)(binary_path, SectionToString(section), toolchain_prefix); + return UpperAlignValue(size, align); +} + +} // namespace runtime +} // namespace tvm diff --git a/src/runtime/micro/micro_common.h b/src/runtime/micro/micro_common.h new file mode 100644 index 000000000000..67b5349060b4 --- /dev/null +++ b/src/runtime/micro/micro_common.h @@ -0,0 +1,314 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file micro_common.h + */ +#ifndef TVM_RUNTIME_MICRO_MICRO_COMMON_H_ +#define TVM_RUNTIME_MICRO_MICRO_COMMON_H_ + +#include + +#include + +#include +#include +#include + +namespace tvm { +namespace runtime { + +/*! + * \brief enum of device memory region sections + * + * The order in which the enum variants are defined also defines the order of + * the sections in device memory. + */ +enum class SectionKind : size_t { + kText = 0, + kRodata, + kData, + kBss, + kArgs, + kStack, + kHeap, + kWorkspace, + kNumKinds, +}; + +/*! \brief default size alignment */ +constexpr int kDefaultSizeAlignment = 8; + +/*! \brief Base class for interfacing with device locations (pointers/offsets) */ +class DeviceLocation { + public: + /*! \brief construct a location with value `value` */ + explicit DeviceLocation(std::uintptr_t value) : value_(value) {} + + /*! \brief default constructor */ + DeviceLocation() : value_(0) {} + + /*! \brief construct a null location */ + explicit DeviceLocation(std::nullptr_t value) : value_(0) {} + + /*! \brief destructor */ + virtual ~DeviceLocation() {} + + /*! + * \brief get value of location + * \return value of location + */ + std::uintptr_t value() const { return value_; } + + /*! + * \brief cast location to type `T` + * \return casted result + */ + template + T cast_to() const { return reinterpret_cast(value_); } + + /*! \brief check if location is null */ + bool operator==(std::nullptr_t) const { return value_ == 0; } + + /*! \brief check if location is not null */ + bool operator!=(std::nullptr_t) const { return value_ != 0; } + + protected: + /*! \brief raw value storing the location */ + std::uintptr_t value_; +}; + +/*! \brief absolute device address */ +class DevPtr : public DeviceLocation { + public: + /*! \brief construct an absolute address with value `value` */ + explicit DevPtr(std::uintptr_t val) : DeviceLocation(val) {} + + /*! \brief default constructor */ + DevPtr() : DeviceLocation() {} + + /*! \brief construct a null absolute address */ + explicit DevPtr(std::nullptr_t val) : DeviceLocation(val) {} + + /*! \brief add an integer to this absolute address to get a larger absolute address */ + DevPtr operator+(size_t n) const { + return DevPtr(value_ + n); + } + + /*! \brief mutably add an integer to this absolute address */ + DevPtr& operator+=(size_t n) { + value_ += n; + return *this; + } + + /*! \brief subtract an integer from this absolute address to get a smaller absolute address */ + DevPtr operator-(size_t n) const { + return DevPtr(value_ - n); + } + + /*! \brief mutably subtract an integer from this absolute address */ + DevPtr& operator-=(size_t n) { + value_ -= n; + return *this; + } +}; + +/*! \brief offset from device base address */ +class DevBaseOffset : public DeviceLocation { + public: + /*! \brief construct a base offset with value `value` */ + explicit DevBaseOffset(std::uintptr_t value) : DeviceLocation(value) {} + + /*! \brief default constructor */ + DevBaseOffset() : DeviceLocation() {} + + /*! \brief construct a null base offset */ + explicit DevBaseOffset(std::nullptr_t value) : DeviceLocation(value) {} + + /*! \brief add an integer to this base offset to get a larger base offset */ + DevBaseOffset operator+(size_t n) const { + return DevBaseOffset(value_ + n); + } + + /*! \brief mutably add an integer to this base offset */ + DevBaseOffset& operator+=(size_t n) { + value_ += n; + return *this; + } + + /*! \brief subtract an integer from this base offset to get a smaller base offset */ + DevBaseOffset operator-(size_t n) const { + return DevBaseOffset(value_ - n); + } + + /*! \brief mutably subtract an integer from this base offset */ + DevBaseOffset& operator-=(size_t n) { + value_ -= n; + return *this; + } +}; + +/*! + * \brief map from symbols to their on-device offsets + */ +class SymbolMap { + public: + /*! + * \brief default constructor + */ + SymbolMap() {} + + /*! + * \brief constructor that builds the mapping + * \param binary contents of binary object file + * \param toolchain_prefix prefix of compiler toolchain to use + */ + SymbolMap(const std::string& binary, + const std::string& toolchain_prefix) { + const auto* f = Registry::Get("tvm_callback_get_symbol_map"); + CHECK(f != nullptr) << "require tvm_callback_get_symbol_map to exist in registry"; + TVMByteArray arr; + arr.data = &binary[0]; + arr.size = binary.length(); + std::string map_str = (*f)(arr, toolchain_prefix); + // Parse symbols and addresses from returned string. + std::stringstream stream; + stream << map_str; + std::string name; + std::uintptr_t addr; + stream >> name; + stream >> std::hex >> addr; + while (stream) { + map_[name] = DevPtr(addr); + stream >> name; + stream >> std::hex >> addr; + } + } + + /*! + * \brief retrieve on-device offset for a symbol name + * \param name name of the symbol + * \return on-device offset of the symbol + */ + DevPtr operator[](const std::string& name) const { + auto result = map_.find(name); + CHECK(result != map_.end()) << "\"" << name << "\" not in symbol map"; + return result->second; + } + + private: + /*! \brief backing map */ + std::unordered_map map_; +}; + +/*! \brief struct containing start and size of a device memory region */ +struct DevMemRegion { + /*! \brief section start offset */ + DevBaseOffset start; + /*! \brief size of section */ + size_t size; +}; + +/*! \brief struct containing section locations and symbol mappings */ +struct BinaryInfo { + /*! \brief text section region */ + DevMemRegion text_section; + /*! \brief rodata section region */ + DevMemRegion rodata_section; + /*! \brief data section region */ + DevMemRegion data_section; + /*! \brief bss section region */ + DevMemRegion bss_section; + /*! \brief symbol map to offsets */ + SymbolMap symbol_map; +}; + +// TODO(weberlo): should this be here? +/*! \brief number of bytes in each page */ +constexpr int kPageSize = 4096; + +const DevBaseOffset kDeviceStart = DevBaseOffset(64); + +/*! + * \brief return default size of given section kind in bytes + */ +size_t GetDefaultSectionSize(SectionKind kind); + +/*! + * \brief upper-aligns value according to specified alignment + * \param value value to be aligned + * \param align alignment + * \return upper-aligned value + */ +inline size_t UpperAlignValue(size_t value, size_t align) { + return value + (align - (value % align)) % align; +} + +/*! + * \brief maps section enums to text + * \param section section type + * \return text form of the specified section + */ +const char* SectionToString(SectionKind section); + +/*! + * \brief links binary by repositioning section addresses + * \param binary_name input binary filename + * \param text new text section address + * \param rodata new rodata section address + * \param data new data section address + * \param bss new bss section address + * \param toolchain_prefix prefix of compiler toolchain to use + * \return relocated binary file contents + */ +std::string RelocateBinarySections(const std::string& binary_name, + DevPtr text, + DevPtr rodata, + DevPtr data, + DevPtr bss, + const std::string& toolchain_prefix); + +/*! + * \brief reads section from binary + * \param binary input binary contents + * \param section section type to be read + * \param toolchain_prefix prefix of compiler toolchain to use + * \return contents of the section + */ +std::string ReadSection(const std::string& binary, + SectionKind section, + const std::string& toolchain_prefix); + +/*! + * \brief finds size of the section in the binary + * \param binary input binary contents + * \param section section type + * \param toolchain_prefix prefix of compiler toolchain to use + * \param align alignment of the returned size (default: 8) + * \return size of the section if it exists, 0 otherwise + */ +size_t GetSectionSize(const std::string& binary_name, + SectionKind section, + const std::string& toolchain_prefix, + size_t align = kDefaultSizeAlignment); + +} // namespace runtime +} // namespace tvm +#endif // TVM_RUNTIME_MICRO_MICRO_COMMON_H_ diff --git a/src/runtime/micro/micro_device_api.cc b/src/runtime/micro/micro_device_api.cc new file mode 100644 index 000000000000..88328a2a4305 --- /dev/null +++ b/src/runtime/micro/micro_device_api.cc @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file micro_device_api.cc + */ + +#include +#include +#include +#include "../workspace_pool.h" +#include "micro_session.h" + +namespace tvm { +namespace runtime { +/*! + * \brief device API for uTVM micro devices + */ +class MicroDeviceAPI final : public DeviceAPI { + public: + /*! \brief constructor */ + MicroDeviceAPI() { } + + void SetDevice(TVMContext ctx) final {} + + void GetAttr(TVMContext ctx, DeviceAttrKind kind, TVMRetValue* rv) final { + if (kind == kExist) { + *rv = 1; + } + } + + void* AllocDataSpace(TVMContext ctx, + size_t nbytes, + size_t alignment, + TVMType type_hint) final { + std::shared_ptr& session = MicroSession::Current(); + void* data = session->AllocateInSection(SectionKind::kHeap, nbytes).cast_to(); + CHECK(data != nullptr) << "unable to allocate " << nbytes << " bytes on device heap"; + MicroDevSpace* dev_space = new MicroDevSpace(); + dev_space->data = data; + dev_space->session = session; + return static_cast(dev_space); + } + + void FreeDataSpace(TVMContext ctx, void* ptr) final { + MicroDevSpace* dev_space = static_cast(ptr); + dev_space->session->FreeInSection( + SectionKind::kHeap, DevBaseOffset(reinterpret_cast(dev_space->data))); + delete dev_space; + } + + void CopyDataFromTo(const void* from, + size_t from_offset, + void* to, + size_t to_offset, + size_t size, + TVMContext ctx_from, + TVMContext ctx_to, + TVMType type_hint, + TVMStreamHandle stream) final { + std::tuple type_from_to(ctx_from.device_type, ctx_to.device_type); + if (type_from_to == std::make_tuple(kDLMicroDev, kDLMicroDev)) { + // Copying from the device to the device. + + MicroDevSpace* from_space = static_cast(const_cast(from)); + MicroDevSpace* to_space = static_cast(const_cast(to)); + CHECK(from_space->session == to_space->session) + << "attempt to copy data between different micro sessions (" << from_space->session + << " != " << to_space->session << ")"; + CHECK(ctx_from.device_id == ctx_to.device_id) + << "can only copy between the same micro device"; + std::shared_ptr& session = from_space->session; + const std::shared_ptr& lld = session->low_level_device(); + + DevBaseOffset from_dev_offset = GetDevLoc(from_space, from_offset); + DevBaseOffset to_dev_offset = GetDevLoc(to_space, to_offset); + + std::vector buffer(size); + lld->Read(from_dev_offset, static_cast(buffer.data()), size); + lld->Write(to_dev_offset, static_cast(buffer.data()), size); + } else if (type_from_to == std::make_tuple(kDLMicroDev, kDLCPU)) { + // Reading from the device. + + MicroDevSpace* from_space = static_cast(const_cast(from)); + std::shared_ptr& session = from_space->session; + const std::shared_ptr& lld = session->low_level_device(); + + DevBaseOffset from_dev_offset = GetDevLoc(from_space, from_offset); + void* to_host_ptr = GetHostLoc(to, to_offset); + lld->Read(from_dev_offset, to_host_ptr, size); + } else if (type_from_to == std::make_tuple(kDLCPU, kDLMicroDev)) { + // Writing to the device. + + MicroDevSpace* to_space = static_cast(const_cast(to)); + std::shared_ptr& session = to_space->session; + const std::shared_ptr& lld = session->low_level_device(); + + void* from_host_ptr = GetHostLoc(from, from_offset); + DevBaseOffset to_dev_offset = GetDevLoc(to_space, to_offset); + lld->Write(to_dev_offset, from_host_ptr, size); + } else { + LOG(FATAL) << "Expect copy from/to micro device or between micro device\n"; + } + } + + void StreamSync(TVMContext ctx, TVMStreamHandle stream) final { + } + + void* AllocWorkspace(TVMContext ctx, size_t size, TVMType type_hint) final { + std::shared_ptr& session = MicroSession::Current(); + + void* data = session->AllocateInSection(SectionKind::kWorkspace, size).cast_to(); + CHECK(data != nullptr) << "unable to allocate " << size << " bytes on device workspace"; + MicroDevSpace* dev_space = new MicroDevSpace(); + dev_space->data = data; + dev_space->session = session; + return static_cast(dev_space); + } + + void FreeWorkspace(TVMContext ctx, void* data) final { + MicroDevSpace* dev_space = static_cast(data); + std::shared_ptr& session = dev_space->session; + session->FreeInSection(SectionKind::kWorkspace, + DevBaseOffset(reinterpret_cast(dev_space->data))); + delete dev_space; + } + + /*! + * \brief obtain a global singleton of MicroDeviceAPI + * \return global shared pointer to MicroDeviceAPI + */ + static const std::shared_ptr& Global() { + static std::shared_ptr inst = std::make_shared(); + return inst; + } + + private: + DevBaseOffset GetDevLoc(MicroDevSpace* dev_space, size_t offset) { + DevBaseOffset dev_offset = + DevBaseOffset(reinterpret_cast(dev_space->data) + offset); + return dev_offset; + } + + void* GetHostLoc(const void* ptr, size_t offset) { + return reinterpret_cast(reinterpret_cast(ptr) + offset); + } +}; + +// register device that can be obtained from Python frontend +TVM_REGISTER_GLOBAL("device_api.micro_dev") +.set_body([](TVMArgs args, TVMRetValue* rv) { + DeviceAPI* ptr = MicroDeviceAPI::Global().get(); + *rv = static_cast(ptr); + }); +} // namespace runtime +} // namespace tvm diff --git a/src/runtime/micro/micro_module.cc b/src/runtime/micro/micro_module.cc new file mode 100644 index 000000000000..85cd35982138 --- /dev/null +++ b/src/runtime/micro/micro_module.cc @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! +* Copyright (c) 2019 by Contributors +* \file micro_module.cc +*/ + +#include +#include +#include +#include +#include +#include "micro_session.h" +#include "low_level_device.h" +#include "micro_common.h" +#include "../pack_args.h" + +namespace tvm { +namespace runtime { +/*! + * \brief module for uTVM micro devices + */ +class MicroModuleNode final : public ModuleNode { + public: + MicroModuleNode() {} + + ~MicroModuleNode() {} + + const char* type_key() const final { + return "micro"; + } + + PackedFunc GetFunction(const std::string& name, + const std::shared_ptr& sptr_to_self) final; + + /*! + * \brief initializes module by establishing device connection and loads binary + * \param binary_path path of the binary to be loaded + */ + void InitMicroModule(const std::string& binary_path) { + session_ = MicroSession::Current(); + binary_path_ = binary_path; + binary_info_ = session_->LoadBinary(binary_path_); + } + + /*! + * \brief runs selected function on the micro device + * \param func_name name of the function to be run + * \param func_offset offset of the function to be run + * \param args type-erased arguments passed to the function + */ + void RunFunction(const std::string& func_name, DevBaseOffset func_offset, const TVMArgs& args) { + session_->PushToExecQueue(func_offset, args); + } + + private: + /*! \brief module binary info */ + BinaryInfo binary_info_; + /*! \brief path to module binary */ + std::string binary_path_; + /*! \brief global session pointer */ + std::shared_ptr session_; +}; + +class MicroWrappedFunc { + public: + MicroWrappedFunc(MicroModuleNode* m, + std::shared_ptr session, + const std::string& func_name, + DevBaseOffset func_offset) { + m_ = m; + session_ = session; + func_name_ = func_name; + func_offset_ = func_offset; + } + + void operator()(TVMArgs args, TVMRetValue* rv) const { + m_->RunFunction(func_name_, func_offset_, args); + } + + private: + /*! \brief internal module */ + MicroModuleNode* m_; + /*! \brief reference to the session for this function (to keep the session alive) */ + std::shared_ptr session_; + /*! \brief name of the function */ + std::string func_name_; + /*! \brief offset of the function to be called */ + DevBaseOffset func_offset_; +}; + +PackedFunc MicroModuleNode::GetFunction( + const std::string& name, + const std::shared_ptr& sptr_to_self) { + DevBaseOffset func_offset = + session_->low_level_device()->ToDevOffset(binary_info_.symbol_map[name]); + MicroWrappedFunc f(this, session_, name, func_offset); + return PackedFunc(f); +} + +// register loadfile function to load module from Python frontend +TVM_REGISTER_GLOBAL("module.loadfile_micro_dev") +.set_body([](TVMArgs args, TVMRetValue* rv) { + std::shared_ptr n = std::make_shared(); + n->InitMicroModule(args[0]); + *rv = runtime::Module(n); + }); +} // namespace runtime +} // namespace tvm diff --git a/src/runtime/micro/micro_section_allocator.h b/src/runtime/micro/micro_section_allocator.h new file mode 100644 index 000000000000..e2abb477ada1 --- /dev/null +++ b/src/runtime/micro/micro_section_allocator.h @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file micro_section_allocator.h + */ +#ifndef TVM_RUNTIME_MICRO_MICRO_SECTION_ALLOCATOR_H_ +#define TVM_RUNTIME_MICRO_MICRO_SECTION_ALLOCATOR_H_ + +#include +#include "micro_common.h" + +namespace tvm { +namespace runtime { + +/*! + * \brief allocator for an on-device memory section + */ +class MicroSectionAllocator { + public: + /*! + * \brief constructor that specifies section boundaries + * \param region location and size of the section on the device + */ + explicit MicroSectionAllocator(DevMemRegion region) + : start_offset_(region.start), + size_(0), + capacity_(region.size) { + CHECK_EQ(start_offset_.value() % 8, 0) << "micro section not aligned to 8 bytes"; + } + + /*! + * \brief destructor + */ + ~MicroSectionAllocator() {} + + /*! + * \brief memory allocator + * \param size size of allocated memory in bytes + * \return pointer to allocated memory region in section, nullptr if out of space + */ + DevBaseOffset Allocate(size_t size) { + size_ = UpperAlignValue(size_, 8); + CHECK(size_ + size < capacity_) + << "cannot alloc " << size << " bytes in section with start_addr " << + start_offset_.value(); + DevBaseOffset alloc_ptr = start_offset_ + size_; + size_ += size; + alloc_map_[alloc_ptr.value()] = size; + return alloc_ptr; + } + + /*! + * \brief free prior allocation from section + * \param offs offset to allocated memory + * \note simple allocator scheme, more complex versions will be implemented later + */ + void Free(DevBaseOffset offs) { + std::uintptr_t ptr = offs.value(); + CHECK(alloc_map_.find(ptr) != alloc_map_.end()) << "freed pointer was never allocated"; + alloc_map_.erase(ptr); + if (alloc_map_.empty()) { + size_ = 0; + } + } + + /*! + * \brief start offset of the memory region managed by this allocator + */ + DevBaseOffset start_offset() const { return start_offset_; } + + /*! + * \brief current end offset of the space being used in this memory region + */ + DevBaseOffset curr_end_offset() const { return start_offset_ + size_; } + + /*! + * \brief end offset of the memory region managed by this allocator + */ + DevBaseOffset max_end_offset() const { return start_offset_ + capacity_; } + + /*! + * \brief size of the section + */ + size_t size() const { return size_; } + + /*! + * \brief capacity of the section + */ + size_t capacity() const { return capacity_; } + + private: + /*! \brief start address of the section */ + DevBaseOffset start_offset_; + /*! \brief current size of the section */ + size_t size_; + /*! \brief total storage capacity of the section */ + size_t capacity_; + /*! \brief allocation map for allocation sizes */ + std::unordered_map alloc_map_; +}; + +} // namespace runtime +} // namespace tvm +#endif // TVM_RUNTIME_MICRO_MICRO_SECTION_ALLOCATOR_H_ diff --git a/src/runtime/micro/micro_session.cc b/src/runtime/micro/micro_session.cc new file mode 100644 index 000000000000..ca6f4469d406 --- /dev/null +++ b/src/runtime/micro/micro_session.cc @@ -0,0 +1,381 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file micro_session.cc + * \brief session to manage multiple micro modules + * + * Each session consists of an interaction with a *single* logical device. + * Within that interaction, multiple TVM modules can be loaded on the logical + * device. + * + * Multiple sessions can exist simultaneously, but there is only ever one + * *active* session. The idea of an active session mainly has implications for + * the frontend, in that one must make a session active in order to allocate + * new TVM objects on it. Aside from that, previously allocated objects can be + * used even if the session which they belong to is not currently active. + */ + +#include +#include +#include +#include +#include +#include +#include "micro_session.h" +#include "low_level_device.h" +#include "target_data_layout_encoder.h" + +namespace tvm { +namespace runtime { + +struct TVMMicroSessionThreadLocalEntry { + std::stack> session_stack; +}; + +typedef dmlc::ThreadLocalStore TVMMicroSessionThreadLocalStore; + +std::shared_ptr& MicroSession::Current() { + TVMMicroSessionThreadLocalEntry *entry = TVMMicroSessionThreadLocalStore::Get(); + CHECK_GT(entry->session_stack.size(), 0) << "No current session"; + return entry->session_stack.top(); +} + +void MicroSession::EnterWithScope(std::shared_ptr session) { + TVMMicroSessionThreadLocalEntry *entry = TVMMicroSessionThreadLocalStore::Get(); + entry->session_stack.push(session); +} + +void MicroSession::ExitWithScope() { + TVMMicroSessionThreadLocalEntry *entry = TVMMicroSessionThreadLocalStore::Get(); + CHECK(!entry->session_stack.empty()); + entry->session_stack.pop(); +} + +MicroSession::MicroSession() { + DevBaseOffset curr_start_offset = kDeviceStart; + for (size_t i = 0; i < static_cast(SectionKind::kNumKinds); i++) { + size_t section_size = GetDefaultSectionSize(static_cast(i)); + section_allocators_[i] = std::make_shared(DevMemRegion { + .start = curr_start_offset, + .size = section_size, + }); + curr_start_offset += section_size; + } + memory_size_ = curr_start_offset.cast_to(); +} + +MicroSession::~MicroSession() { + for (size_t i = 0; i < static_cast(SectionKind::kNumKinds); i++) { + section_allocators_[i] = nullptr; + } + + low_level_device_ = nullptr; +} + +void MicroSession::CreateSession(const std::string& device_type, + const std::string& binary_path, + const std::string& toolchain_prefix) { + // TODO(weberlo): make device type enum + if (device_type == "host") { + low_level_device_ = HostLowLevelDeviceCreate(memory_size_); + } else { + LOG(FATAL) << "unsupported micro low-level device"; + } + SetRuntimeBinaryPath(binary_path); + CHECK(!runtime_binary_path_.empty()) << "uTVM runtime not initialized"; + runtime_bin_info_ = LoadBinary(runtime_binary_path_, /* patch_dylib_pointers */ false); + utvm_main_symbol_ = low_level_device()->ToDevOffset(runtime_symbol_map()["UTVMMain"]); + utvm_done_symbol_ = low_level_device()->ToDevOffset(runtime_symbol_map()["UTVMDone"]); + + // Patch workspace pointers to the start of the workspace section. + DevBaseOffset workspace_start_offset = GetAllocator(SectionKind::kWorkspace)->start_offset(); + DevBaseOffset workspace_end_offset = GetAllocator(SectionKind::kWorkspace)->max_end_offset(); + void* workspace_start_addr = + low_level_device_->ToDevPtr(workspace_start_offset).cast_to(); + void* workspace_end_addr = + low_level_device_->ToDevPtr(workspace_end_offset).cast_to(); + DevSymbolWrite(runtime_symbol_map(), "utvm_workspace_begin", workspace_start_addr); + DevSymbolWrite(runtime_symbol_map(), "utvm_workspace_end", workspace_end_addr); +} + +void MicroSession::PushToExecQueue(DevBaseOffset func, const TVMArgs& args) { + int32_t (*func_dev_addr)(void*, void*, int32_t) = + reinterpret_cast( + low_level_device()->ToDevPtr(func).value()); + + // Create an allocator stream for the memory region after the most recent + // allocation in the args section. + DevPtr args_addr = + low_level_device()->ToDevPtr(GetAllocator(SectionKind::kArgs)->curr_end_offset()); + TargetDataLayoutEncoder encoder(args_addr); + + std::tuple arg_field_addrs = EncoderAppend(&encoder, args); + // Flush `stream` to device memory. + DevBaseOffset stream_dev_offset = + GetAllocator(SectionKind::kArgs)->Allocate(encoder.buf_size()); + low_level_device()->Write(stream_dev_offset, + reinterpret_cast(encoder.data()), + encoder.buf_size()); + + UTVMTask task = { + .func = func_dev_addr, + .arg_values = std::get<0>(arg_field_addrs).cast_to(), + .arg_type_codes = std::get<1>(arg_field_addrs).cast_to(), + .num_args = args.num_args, + }; + // Write the task. + DevSymbolWrite(runtime_symbol_map(), "task", task); + low_level_device()->Execute(utvm_main_symbol_, utvm_done_symbol_); + // Check if there was an error during execution. If so, log it. + CheckDeviceError(); + + GetAllocator(SectionKind::kArgs)->Free(stream_dev_offset); +} + +std::tuple MicroSession::EncoderAppend( + TargetDataLayoutEncoder* encoder, const TVMArgs& args) { + const int* type_codes = args.type_codes; + int num_args = args.num_args; + + auto tvm_vals_slot = encoder->Alloc(num_args); + auto type_codes_slot = encoder->Alloc(num_args); + + for (int i = 0; i < num_args; i++) { + switch (type_codes[i]) { + case kNDArrayContainer: + case kArrayHandle: { + TVMArray* base_arr_handle = args[i]; + // All uTVM arrays store a `MicroDevSpace` struct in their `data` field, + // which wraps the actual data and stores a reference to the session, in + // order to prevent premature session destruction. + void* old_data = base_arr_handle->data; + // Mutate the array to unwrap the `data` field. + base_arr_handle->data = reinterpret_cast(old_data)->data; + // Now, encode the unwrapped version. + void* arr_ptr = EncoderAppend(encoder, *base_arr_handle).cast_to(); + // And restore the original wrapped version. + base_arr_handle->data = old_data; + + TVMValue val; + val.v_handle = arr_ptr; + tvm_vals_slot.WriteValue(val); + break; + } + // TODO(weberlo): Implement `double` and `int64` case. + case kDLFloat: + case kDLInt: + case kDLUInt: + default: + LOG(FATAL) << "unsupported type code for writing args: " << type_codes[i]; + break; + } + } + type_codes_slot.WriteArray(type_codes, num_args); + + return std::make_tuple(tvm_vals_slot.start_addr(), type_codes_slot.start_addr()); +} + +DevPtr MicroSession::EncoderAppend(TargetDataLayoutEncoder* encoder, const TVMArray& arr) { + auto tvm_arr_slot = encoder->Alloc(); + auto shape_slot = encoder->Alloc(arr.ndim); + + // `shape` and `strides` are stored on the host, so we need to write them to + // the device first. The `data` field is already allocated on the device and + // is a device pointer, so we don't need to write it. + shape_slot.WriteArray(arr.shape, arr.ndim); + DevPtr shape_addr = shape_slot.start_addr(); + DevPtr strides_addr = DevPtr(nullptr); + if (arr.strides != nullptr) { + auto stride_slot = encoder->Alloc(arr.ndim); + stride_slot.WriteArray(arr.strides, arr.ndim); + strides_addr = stride_slot.start_addr(); + } + + // Copy `arr`, update the copy's pointers to be device pointers, then + // write the copy to `tvm_arr_slot`. + TVMArray dev_arr = arr; + // Update the device type to look like a host, because codegen generates + // checks that it is a host array. + CHECK(dev_arr.ctx.device_type == static_cast(kDLMicroDev)) + << "attempt to write TVMArray with non-micro device type"; + dev_arr.ctx.device_type = DLDeviceType::kDLCPU; + // Add the base address of the device to the array's data's device offset to + // get a device address. + DevBaseOffset arr_offset(reinterpret_cast(arr.data)); + dev_arr.data = low_level_device()->ToDevPtr(arr_offset).cast_to(); + dev_arr.shape = shape_addr.cast_to(); + dev_arr.strides = strides_addr.cast_to(); + tvm_arr_slot.WriteValue(dev_arr); + return tvm_arr_slot.start_addr(); +} + +void MicroSession::CheckDeviceError() { + int32_t return_code = DevSymbolRead(runtime_symbol_map(), "utvm_return_code"); + + if (return_code) { + std::uintptr_t last_error = + DevSymbolRead(runtime_symbol_map(), "utvm_last_error"); + std::string last_error_str; + if (last_error) { + DevBaseOffset last_err_offset = low_level_device()->ToDevOffset(DevPtr(last_error)); + last_error_str = ReadString(last_err_offset); + } + LOG(FATAL) << "error during micro function execution:\n" + << " return code: " << std::dec << return_code << "\n" + << " dev str addr: 0x" << std::hex << last_error << "\n" + << " dev str data: " << last_error_str << std::endl; + } +} + +BinaryInfo MicroSession::LoadBinary(const std::string& binary_path, bool patch_dylib_pointers) { + DevMemRegion text_section; + DevMemRegion rodata_section; + DevMemRegion data_section; + DevMemRegion bss_section; + + text_section.size = GetSectionSize(binary_path, SectionKind::kText, toolchain_prefix_); + rodata_section.size = GetSectionSize(binary_path, SectionKind::kRodata, toolchain_prefix_); + data_section.size = GetSectionSize(binary_path, SectionKind::kData, toolchain_prefix_); + bss_section.size = GetSectionSize(binary_path, SectionKind::kBss, toolchain_prefix_); + + text_section.start = AllocateInSection(SectionKind::kText, text_section.size); + rodata_section.start = AllocateInSection(SectionKind::kRodata, rodata_section.size); + data_section.start = AllocateInSection(SectionKind::kData, data_section.size); + bss_section.start = AllocateInSection(SectionKind::kBss, bss_section.size); + CHECK(text_section.start != nullptr && rodata_section.start != nullptr && + data_section.start != nullptr && bss_section.start != nullptr) + << "not enough space to load module on device"; + + std::string relocated_bin = RelocateBinarySections( + binary_path, + low_level_device_->ToDevPtr(text_section.start), + low_level_device_->ToDevPtr(rodata_section.start), + low_level_device_->ToDevPtr(data_section.start), + low_level_device_->ToDevPtr(bss_section.start), + toolchain_prefix_); + std::string text_contents = ReadSection(relocated_bin, SectionKind::kText, toolchain_prefix_); + std::string rodata_contents = ReadSection(relocated_bin, SectionKind::kRodata, toolchain_prefix_); + std::string data_contents = ReadSection(relocated_bin, SectionKind::kData, toolchain_prefix_); + std::string bss_contents = ReadSection(relocated_bin, SectionKind::kBss, toolchain_prefix_); + low_level_device_->Write(text_section.start, &text_contents[0], text_section.size); + low_level_device_->Write(rodata_section.start, &rodata_contents[0], rodata_section.size); + low_level_device_->Write(data_section.start, &data_contents[0], data_section.size); + low_level_device_->Write(bss_section.start, &bss_contents[0], bss_section.size); + SymbolMap symbol_map {relocated_bin, toolchain_prefix_}; + + if (patch_dylib_pointers) { + // Patch device lib pointers. + PatchImplHole(symbol_map, "TVMBackendAllocWorkspace"); + PatchImplHole(symbol_map, "TVMBackendFreeWorkspace"); + PatchImplHole(symbol_map, "TVMAPISetLastError"); + } + + return BinaryInfo { + .text_section = text_section, + .rodata_section = rodata_section, + .data_section = data_section, + .bss_section = bss_section, + .symbol_map = symbol_map, + }; +} + +void MicroSession::PatchImplHole(const SymbolMap& symbol_map, const std::string& func_name) { + void* runtime_impl_addr = runtime_symbol_map()[func_name].cast_to(); + std::stringstream func_name_underscore; + func_name_underscore << func_name << "_"; + DevSymbolWrite(symbol_map, func_name_underscore.str(), runtime_impl_addr); +} + +void MicroSession::SetRuntimeBinaryPath(std::string path) { + runtime_binary_path_ = path; +} + +std::string MicroSession::ReadString(DevBaseOffset str_offset) { + std::stringstream result; + const size_t buf_size = 256; + std::vector buf(buf_size, 0); + size_t i = buf_size; + while (i == buf_size) { + low_level_device()->Read(str_offset, buf.data(), buf_size); + i = 0; + while (i < buf_size) { + if (buf[i] == 0) break; + result << buf[i]; + i++; + } + str_offset = str_offset + i; + } + return result.str(); +} + +DevBaseOffset MicroSession::AllocateInSection(SectionKind type, size_t size) { + return GetAllocator(type)->Allocate(size); +} + +void MicroSession::FreeInSection(SectionKind type, DevBaseOffset ptr) { + return GetAllocator(type)->Free(ptr); +} + +template +T MicroSession::DevSymbolRead(const SymbolMap& symbol_map, const std::string& symbol) { + DevBaseOffset sym_offset = low_level_device()->ToDevOffset(symbol_map[symbol]); + T result; + low_level_device()->Read(sym_offset, &result, sizeof(T)); + return result; +} + +template +void MicroSession::DevSymbolWrite(const SymbolMap& symbol_map, + const std::string& symbol, + const T& value) { + DevBaseOffset sym_offset = low_level_device()->ToDevOffset(symbol_map[symbol]); + low_level_device()->Write(sym_offset, &value, sizeof(T)); +} + +PackedFunc MicroSession::GetFunction( + const std::string& name, + const std::shared_ptr& sptr_to_self) { + if (name == "enter") { + return PackedFunc([sptr_to_self](TVMArgs args, TVMRetValue* rv) { + MicroSession::EnterWithScope(std::dynamic_pointer_cast(sptr_to_self)); + }); + } else if (name == "exit") { + return PackedFunc([sptr_to_self](TVMArgs args, TVMRetValue* rv) { + MicroSession::ExitWithScope(); + }); + } else { + return PackedFunc(); + } +} + +// create micro session and low-level device from Python frontend +TVM_REGISTER_GLOBAL("micro._CreateSession") +.set_body([](TVMArgs args, TVMRetValue* rv) { + const std::string& device_type = args[0]; + const std::string& binary_path = args[1]; + const std::string& toolchain_prefix = args[2]; + std::shared_ptr session = std::make_shared(); + session->CreateSession(device_type, binary_path, toolchain_prefix); + *rv = Module(session); + }); + +} // namespace runtime +} // namespace tvm diff --git a/src/runtime/micro/micro_session.h b/src/runtime/micro/micro_session.h new file mode 100644 index 000000000000..e1635498bd45 --- /dev/null +++ b/src/runtime/micro/micro_session.h @@ -0,0 +1,252 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file micro_session.h + */ +#ifndef TVM_RUNTIME_MICRO_MICRO_SESSION_H_ +#define TVM_RUNTIME_MICRO_MICRO_SESSION_H_ + +#include "micro_common.h" +#include "micro_section_allocator.h" + +#include +#include + +#include +#include +#include +#include +#include + +#include "low_level_device.h" +#include "device/utvm_runtime.h" +#include "target_data_layout_encoder.h" + +namespace tvm { +namespace runtime { + +/*! + * \brief session for facilitating micro device interaction + */ +class MicroSession : public ModuleNode { + public: + /*! + * \brief Get member function to front-end + * \param name The name of the function. + * \param sptr_to_self The pointer to the module node. + * \return The corresponding member function. + */ + virtual PackedFunc GetFunction(const std::string& name, + const std::shared_ptr& sptr_to_self); + + /*! + * \return The type key of the executor. + */ + const char* type_key() const final { + return "MicroSession"; + } + + /*! + * \brief constructor + */ + MicroSession(); + + /*! + * \brief destructor + */ + ~MicroSession(); + + static std::shared_ptr& Current(); + + /*! + * \brief creates session by setting up a low-level device and initting allocators for it + * \param args TVMArgs passed into the micro.init packedfunc + */ + void CreateSession(const std::string& device_type, + const std::string& binary_path, + const std::string& toolchain_prefix); + + /*! + * \brief ends the session by destructing the low-level device and its allocators + */ + void EndSession(); + + /*! + * \brief allocate memory in section + * \param type type of section to allocate in + * \param size size of allocated memory in bytes + * \return pointer to allocated memory region in section, nullptr if out of space + */ + DevBaseOffset AllocateInSection(SectionKind type, size_t size); + + /*! + * \brief free prior allocation from section + * \param type type of section to allocate in + * \param ptr pointer to allocated memory + */ + void FreeInSection(SectionKind type, DevBaseOffset ptr); + + /*! + * \brief read string from device to host + * \param str_offset device offset of first character of string + * \return host copy of device string that was read + */ + std::string ReadString(DevBaseOffset str_offset); + + /*! + * \brief sets up runtime metadata for `func` and copies arguments for on-device execution + * \param func address of the function to be executed + * \param args args to the packed function + */ + void PushToExecQueue(DevBaseOffset func, const TVMArgs& args); + + /*! + * \brief loads binary onto device + * \param binary_path path to binary object file + * \param patch_dylib_pointers whether runtime API function pointer patching is needed + * \return info about loaded binary + */ + BinaryInfo LoadBinary(const std::string& binary_path, bool patch_dylib_pointers = true); + + /*! + * \brief read value of symbol from device memory + * \param symbol_map symbol map to read location of symbol from + * \param symbol name of symbol being read from + * \return value at symbol in memory + */ + template + T DevSymbolRead(const SymbolMap& symbol_map, const std::string& symbol); + + /*! + * \brief write value into device memory corresponding to symbol + * \param symbol_map symbol map to read location of symbol from + * \param symbol name of symbol being written to + * \param value value being written into symbol + */ + template + void DevSymbolWrite(const SymbolMap& symbol_map, const std::string& symbol, const T& value); + + /*! + * \brief returns low-level device pointer + * \note assumes low-level device has been initialized + */ + const std::shared_ptr& low_level_device() const { + CHECK(low_level_device_ != nullptr) << "attempt to get uninitialized low-level device"; + return low_level_device_; + } + + private: + /*! \brief low-level device pointer */ + std::shared_ptr low_level_device_; + /*! \brief prefix for binary names in target compiler toolchain */ + std::string toolchain_prefix_; + /*! \brief array of memory allocators for each on-device section */ + std::shared_ptr + section_allocators_[static_cast(SectionKind::kNumKinds)]; + /*! \brief total number of bytes of usable device memory for this session */ + size_t memory_size_; + /*! \brief uTVM runtime binary info */ + BinaryInfo runtime_bin_info_; + /*! \brief path to uTVM runtime source code */ + std::string runtime_binary_path_; + /*! \brief offset of the runtime entry function */ + DevBaseOffset utvm_main_symbol_; + /*! \brief offset of the runtime exit breakpoint */ + DevBaseOffset utvm_done_symbol_; + + /*! + * \brief patches a function pointer in this module to an implementation + * \param func_name name of the function pointer being patched + */ + void PatchImplHole(const SymbolMap& symbol_map, const std::string& func_name); + + /*! + * \brief sets the runtime binary path + * \param path to runtime binary + */ + void SetRuntimeBinaryPath(std::string path); + + /*! + * \brief appends arguments to the host-side buffer of `encoder` + * \param encoder encoder being used to append `args` + * \param args args to be appended + * \return device address of the allocated args + */ + std::tuple EncoderAppend(TargetDataLayoutEncoder* encoder, const TVMArgs& args); + + /*! + * \brief appends a `TVMArray` to the host-side buffer of `encoder` + * \param encoder encoder being used to append `arr` + * \param arr TVMArray to be appended + * \return device address of the allocated `TVMArray` + */ + DevPtr EncoderAppend(TargetDataLayoutEncoder* encoder, const TVMArray& arr); + + /*! + * \brief checks and logs if there was an error during the device's most recent execution + */ + void CheckDeviceError(); + + /*! + * \brief returns section allocator corresponding to the given section kind + * \param kind kind of target section + * \return shared pointer to section allocator + */ + std::shared_ptr GetAllocator(SectionKind kind) { + return section_allocators_[static_cast(kind)]; + } + + /*! + * \brief returns the symbol map for the uTVM runtime + * \return reference to symbol map + */ + const SymbolMap& runtime_symbol_map() { + return runtime_bin_info_.symbol_map; + } + + /*! + * \brief Push a new session context onto the thread-local stack. + * The session on top of the stack is used as the current global session. + */ + static void EnterWithScope(std::shared_ptr session); + /*! + * \brief Pop a session off the thread-local context stack, + * restoring the previous session as the current context. + */ + static void ExitWithScope(); +}; + +/*! + * \brief a device memory region associated with the session that allocated it + * + * We use this to store a reference to the session in each allocated object and + * only deallocate the session once there are no more references to it. + */ +struct MicroDevSpace { + /*! \brief data being wrapped */ + void* data; + /*! \brief shared ptr to session where this data is valid */ + std::shared_ptr session; +}; + +} // namespace runtime +} // namespace tvm +#endif // TVM_RUNTIME_MICRO_MICRO_SESSION_H_ diff --git a/src/runtime/micro/target_data_layout_encoder.h b/src/runtime/micro/target_data_layout_encoder.h new file mode 100644 index 000000000000..56ae788cc696 --- /dev/null +++ b/src/runtime/micro/target_data_layout_encoder.h @@ -0,0 +1,190 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file target_data_layout_encoder.h + * \brief uTVM data layout encoder + */ +#ifndef TVM_RUNTIME_MICRO_TARGET_DATA_LAYOUT_ENCODER_H_ +#define TVM_RUNTIME_MICRO_TARGET_DATA_LAYOUT_ENCODER_H_ + +#include +#include "device/utvm_runtime.h" + +namespace tvm { +namespace runtime { + +// TODO(weberlo): Handle endianness. + +/*! + * \brief data encoder for uTVM that builds a host-side buffer + */ +class TargetDataLayoutEncoder { + public: + /*! + * \brief helper class for writing into `TargetDataLayoutEncoder` + */ + template + class Slot { + public: + /*! + * \brief constructor + * \param parent pointer to parent encoder + * \param start_offset start byte offset of the slot in the backing buffer + * \param size size (in bytes) of the memory region allocated for this slot + * \param start_addr start address of the slot in the device's memory + */ + Slot(TargetDataLayoutEncoder* parent, size_t start_offset, size_t size, DevPtr start_addr); + + ~Slot(); + + /*! + * \brief writes `sizeof(T) * num_elems` bytes of data from `arr` + * \param arr array to be read from + * \param num_elems number of elements in array + */ + void WriteArray(const T* arr, size_t num_elems); + + /*! + * \brief writes `val` + * \param val value to be written + */ + void WriteValue(const T& val); + + /*! + * \brief returns start address of the slot in device memory + * \return device start address + */ + DevPtr start_addr(); + + /*! + * \brief returns number of bytes allocated for this slot + * \return size of this slot + */ + size_t size(); + + private: + /*! \brief pointer to parent encoder */ + TargetDataLayoutEncoder* parent_; + /*! \brief start offset of the slot in the parent's backing parent_buffer */ + size_t start_offset_; + /*! \brief current offset relative to the start offset of this slot */ + size_t curr_offset_; + /*! \brief size (in bytes) of the memory region allocated for this slot */ + size_t size_; + /*! \brief start address of the slot in the device's memory */ + DevPtr start_addr_; + }; + + /*! + * \brief constructor + * \param start_addr start address of the encoder in device memory + */ + explicit TargetDataLayoutEncoder(DevPtr start_addr) + : buf_(std::vector()), curr_offset_(0) { + start_addr_ = DevPtr(UpperAlignValue(start_addr.value(), 8)); + } + + /*! + * \brief allocates a slot for `sizeof(T) * num_elems` bytes of data + * \param num_elems number of elements of type `T` being allocated (defaults to 1) + * \return slot of size `sizeof(T) * num_elems` bytes + */ + template + Slot Alloc(size_t num_elems = 1) { + curr_offset_ = UpperAlignValue(curr_offset_, 8); + size_t size = sizeof(T) * num_elems; + if (curr_offset_ + size > buf_.size()) { + buf_.resize(curr_offset_ + size); + } + size_t slot_start_offset = curr_offset_; + curr_offset_ += size; + return Slot(this, slot_start_offset, size, start_addr_ + slot_start_offset); + } + + /*! + * \brief returns the array backing the encoder's buffer + * \return array backing the encoder's buffer + */ + uint8_t* data() { + return buf_.data(); + } + + /*! + * \brief returns current size of the encoder's buffer + * \return buffer size + */ + size_t buf_size() { + return buf_.size(); + } + + private: + /*! \brief in-memory backing buffer */ + std::vector buf_; + /*! \brief current offset */ + size_t curr_offset_; + /*! \brief start address of the encoder in device memory */ + DevPtr start_addr_; +}; + +template +TargetDataLayoutEncoder::Slot::Slot(TargetDataLayoutEncoder* parent, + size_t start_offset, + size_t size, + DevPtr start_addr) + : parent_(parent), + start_offset_(start_offset), + curr_offset_(0), + size_(size), + start_addr_(start_addr) {} + +template +TargetDataLayoutEncoder::Slot::~Slot() { + CHECK(curr_offset_ == size_) << "unwritten space in slot"; +} + +template +void TargetDataLayoutEncoder::Slot::WriteArray(const T* arr, size_t num_elems) { + if (num_elems == 0) return; + size_t size = sizeof(T) * num_elems; + CHECK(curr_offset_ + size <= size_) << "not enough space in slot"; + uint8_t* curr_ptr = &(parent_->data())[start_offset_ + curr_offset_]; + std::memcpy(curr_ptr, arr, size); + curr_offset_ += size; +} + +template +void TargetDataLayoutEncoder::Slot::WriteValue(const T& val) { + WriteArray(&val, 1); +} + +template +DevPtr TargetDataLayoutEncoder::Slot::start_addr() { + return start_addr_; +} + +template +size_t TargetDataLayoutEncoder::Slot::size() { + return size_; +} + +} // namespace runtime +} // namespace tvm +#endif // TVM_RUNTIME_MICRO_TARGET_DATA_LAYOUT_ENCODER_H_ diff --git a/src/runtime/module.cc b/src/runtime/module.cc index 34f3dc9f8f83..eedb1238d760 100644 --- a/src/runtime/module.cc +++ b/src/runtime/module.cc @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -143,6 +143,8 @@ bool RuntimeEnabled(const std::string& target) { f_name = "device_api.rpc"; } else if (target == "vpi" || target == "verilog") { f_name = "device_api.vpi"; + } else if (target == "micro_dev") { + f_name = "device_api.micro_dev"; } else if (target.length() >= 5 && target.substr(0, 5) == "nvptx") { f_name = "device_api.gpu"; } else if (target.length() >= 4 && target.substr(0, 4) == "rocm") { diff --git a/tests/python/contrib/test_binutil.py b/tests/python/contrib/test_binutil.py new file mode 100644 index 000000000000..617a758d2752 --- /dev/null +++ b/tests/python/contrib/test_binutil.py @@ -0,0 +1,151 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Test various utilities for interaction with compiled binaries. + +Specifically, we test the following capabilities: + - querying the size of a binary section + - relocating sections within a binary to new addresses + - reading the contents of a binary section + - querying the address of a symbol in the binary +""" + +import tvm +import subprocess +from tvm.contrib import util +from tvm.contrib import cc +from tvm.contrib.binutil import * + +TOOLCHAIN_PREFIX = "" + +def make_binary(): + prog = "int a = 7; \ + int main() { \ + int b = 5; \ + return 0; \ + }" + tmp_dir = util.tempdir() + tmp_source = tmp_dir.relpath("source.c") + tmp_obj = tmp_dir.relpath("obj.obj") + with open(tmp_source, "w") as f: + f.write(prog) + cc.create_shared(tmp_obj, tmp_source, [], + compile_cmd="{}gcc".format(TOOLCHAIN_PREFIX)) + prog_bin = bytearray(open(tmp_obj, "rb").read()) + return prog_bin + + +def test_tvm_callback_get_section_size(binary=None): + if binary is None: + binary = make_binary() + tmp_dir = util.tempdir() + tmp_bin = tmp_dir.relpath("obj.bin") + with open(tmp_bin, "wb") as f: + f.write(binary) + def verify(): + print("Text section size: %d" % + tvm_callback_get_section_size(tmp_bin, "text", TOOLCHAIN_PREFIX)) + print("Data section size: %d" % + tvm_callback_get_section_size(tmp_bin, "data", TOOLCHAIN_PREFIX)) + print("Bss section size: %d" % + tvm_callback_get_section_size(tmp_bin, "bss", TOOLCHAIN_PREFIX)) + print() + verify() + + +def test_tvm_callback_relocate_binary(): + binary = make_binary() + tmp_dir = util.tempdir() + tmp_bin = tmp_dir.relpath("obj.bin") + with open(tmp_bin, "wb") as f: + f.write(binary) + def verify(): + text_loc_str = "0x0" + rodata_loc_str = "0x10000" + data_loc_str = "0x20000" + bss_loc_str = "0x30000" + rel_bin = tvm_callback_relocate_binary( + tmp_bin, text_loc_str, rodata_loc_str, data_loc_str, bss_loc_str, TOOLCHAIN_PREFIX) + print("Relocated binary section sizes") + test_tvm_callback_get_section_size(binary=rel_bin) + relf = tmp_dir.relpath("rel.bin") + with open(relf, "wb") as f: + f.write(rel_bin) + nm_proc = subprocess.Popen(["nm", "-C", "--defined-only", relf], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + (out, _) = nm_proc.communicate() + # Ensure the relocated symbols are within the ranges we specified. + text_loc = int(text_loc_str, 16) + data_loc = int(data_loc_str, 16) + bss_loc = int(bss_loc_str, 16) + symbol_entries = out.decode("utf-8").split("\n") + for entry in symbol_entries: + if len(entry) == 0: + continue + sym_loc, section, sym_name = entry.split(' ') + sym_loc = int(sym_loc, 16) + if section == 'T': # text + assert sym_loc >= text_loc and sym_loc < data_loc + elif section == 'D': # data + assert sym_loc >= data_loc and sym_loc < bss_loc + elif section == 'B': # bss + assert sym_loc >= bss_loc + verify() + + +def test_tvm_callback_read_binary_section(): + binary = make_binary() + def verify(): + text_bin = tvm_callback_read_binary_section(binary, "text", TOOLCHAIN_PREFIX) + data_bin = tvm_callback_read_binary_section(binary, "data", TOOLCHAIN_PREFIX) + bss_bin = tvm_callback_read_binary_section(binary, "bss", TOOLCHAIN_PREFIX) + print("Read text section part of binary? %r" % (text_bin in binary)) + print("Read data section part of binary? %r" % (data_bin in binary)) + print("Read bss section part of binary? %r" % (bss_bin in binary)) + print() + verify() + + +def test_tvm_callback_get_symbol_map(): + binary = make_binary() + tmp_dir = util.tempdir() + tmp_bin = tmp_dir.relpath("obj.bin") + with open(tmp_bin, "wb") as f: + f.write(binary) + def verify(): + text_loc_str = "0x0" + rodata_loc_str = "0x10000" + data_loc_str = "0x20000" + bss_loc_str = "0x30000" + rel_bin = tvm_callback_relocate_binary( + tmp_bin, text_loc_str, rodata_loc_str, data_loc_str, bss_loc_str, TOOLCHAIN_PREFIX) + symbol_map = tvm_callback_get_symbol_map(rel_bin, TOOLCHAIN_PREFIX) + symbols = set() + for i, line in enumerate(symbol_map.split('\n')): + # Every other line is the value the symbol maps to. + if i % 2 == 0: + symbols.add(line) + assert "a" in symbols + assert "main" in symbols + verify() + + +if __name__ == "__main__": + test_tvm_callback_get_section_size() + test_tvm_callback_relocate_binary() + test_tvm_callback_read_binary_section() + test_tvm_callback_get_symbol_map() diff --git a/tests/python/unittest/test_codegen_c_host.py b/tests/python/unittest/test_codegen_c_host.py index 70b38e178f69..5161c6899db9 100644 --- a/tests/python/unittest/test_codegen_c_host.py +++ b/tests/python/unittest/test_codegen_c_host.py @@ -95,31 +95,6 @@ def check_c(): with tvm.build_config(offset_factor=4): check_c() - -def test_reinterpret(): - nn = 1024 - n = tvm.convert(nn) - A = tvm.placeholder((n,), name='A', dtype="int32") - B = tvm.compute(A.shape, lambda *i: tvm.call_pure_intrin("float32", "reinterpret", A(*i)), name='B') - s = tvm.create_schedule(B.op) - - def check_c(): - mhost = tvm.build(s, [A, B], "c", name="reinterpret") - temp = util.tempdir() - path_dso = temp.relpath("temp.so") - mhost.export_library(path_dso) - m = tvm.module.load(path_dso) - fadd = m['reinterpret'] - ctx = tvm.cpu(0) - n = nn - a = tvm.nd.array(np.random.randint(-2 ** 30, 2 ** 30, size=n).astype(A.dtype), ctx) - b = tvm.nd.array(np.zeros(n, dtype=B.dtype), ctx) - fadd(a, b) - tvm.testing.assert_allclose( - b.asnumpy(), a.asnumpy().view('float32')) - check_c() - if __name__ == "__main__": test_add() test_add_pipeline() - test_reinterpret() diff --git a/tests/python/unittest/test_codegen_c_host_fadd.py b/tests/python/unittest/test_codegen_c_host_fadd.py new file mode 100644 index 000000000000..f5cde828f81e --- /dev/null +++ b/tests/python/unittest/test_codegen_c_host_fadd.py @@ -0,0 +1,140 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import tvm +import numpy as np +from tvm import relay +from tvm.contrib import util + +def test_add(): + nn = 1024 + n = tvm.convert(nn) + A = tvm.placeholder((n,), name='A') + B = tvm.placeholder((n,), name='B') + C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') + s = tvm.create_schedule(C.op) + + def check_c(): + mhost = tvm.build(s, [A, B, C], "c", name="fadd") + temp = util.tempdir() + path_dso = temp.relpath("temp.so") + mhost.export_library(path_dso) + print(mhost.get_source()) + m = tvm.module.load(path_dso) + fadd = m['fadd'] + ctx = tvm.cpu(0) + # launch the kernel. + n = nn + a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx) + b = tvm.nd.array(np.random.uniform(size=n).astype(B.dtype), ctx) + c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx) + fadd(a, b, c) + tvm.testing.assert_allclose( + c.asnumpy(), a.asnumpy() + b.asnumpy()) + check_c() + +def test_relay_id(): + # x = relay.var("x") + # f = relay.Function([x], x) + x = relay.var('x', shape=[]) + func = relay.Function([x], x) + ttype = relay.TensorType([], dtype='float32') + relay.FuncType([ttype], ttype) + mod = relay.module.Module() + func_gvar = relay.GlobalVar("f") + mod[func_gvar] = func + print(mod) + + +def test_add_pipeline(): + nn = 1024 + n = tvm.convert(nn) + A = tvm.placeholder((n,), name='A') + B = tvm.placeholder((n,), name='B') + AA = tvm.compute((n,), lambda *i: A(*i), name='A') + BB = tvm.compute((n,), lambda *i: B(*i), name='B') + T = tvm.compute(A.shape, lambda *i: AA(*i) + BB(*i), name='T') + C = tvm.compute(A.shape, lambda *i: T(*i), name='C') + s = tvm.create_schedule(C.op) + xo, xi = s[C].split(C.op.axis[0], factor=4) + xo1, xo2 = s[C].split(xo, factor=13) + s[C].parallel(xo2) + s[C].pragma(xo1, "parallel_launch_point") + s[C].pragma(xo2, "parallel_stride_pattern") + s[C].pragma(xo2, "parallel_barrier_when_finish") + s[C].vectorize(xi) + + def check_c(): + if not tvm.module.enabled("llvm"): + return + # Specifically allow offset to test codepath when offset is available + Ab = tvm.decl_buffer( + A.shape, A.dtype, + elem_offset=tvm.var('Aoffset'), + offset_factor=8, + name='A') + binds = {A : Ab} + # BUILD and invoke the kernel. + f1 = tvm.lower(s, [A,B,C], name="fadd_pipeline") + fsplits = [x for x in tvm.ir_pass.SplitHostDevice(f1)] + fsplits[0] = tvm.ir_pass.LowerTVMBuiltin(fsplits[0]) + mhost = tvm.codegen.build_module(fsplits[0], "c") + temp = util.tempdir() + path_dso = temp.relpath("temp.so") + mhost.export_library(path_dso) + m = tvm.module.load(path_dso) + fadd = m["fadd_pipeline"] + ctx = tvm.cpu(0) + # launch the kernel. + n = nn + a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx) + b = tvm.nd.array(np.random.uniform(size=n).astype(B.dtype), ctx) + c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx) + fadd(a, b, c) + tvm.testing.assert_allclose( + c.asnumpy(), a.asnumpy() + b.asnumpy()) + + with tvm.build_config(offset_factor=4): + check_c() + + +def test_reinterpret(): + nn = 1024 + n = tvm.convert(nn) + A = tvm.placeholder((n,), name='A', dtype="int32") + B = tvm.compute(A.shape, lambda *i: tvm.call_pure_intrin("float32", "reinterpret", A(*i)), name='B') + s = tvm.create_schedule(B.op) + + def check_c(): + mhost = tvm.build(s, [A, B], "c", name="reinterpret") + temp = util.tempdir() + path_dso = temp.relpath("temp.so") + mhost.export_library(path_dso) + m = tvm.module.load(path_dso) + fadd = m['reinterpret'] + ctx = tvm.cpu(0) + n = nn + a = tvm.nd.array(np.random.randint(-2 ** 30, 2 ** 30, size=n).astype(A.dtype), ctx) + b = tvm.nd.array(np.zeros(n, dtype=B.dtype), ctx) + fadd(a, b) + tvm.testing.assert_allclose( + b.asnumpy(), a.asnumpy().view('float32')) + check_c() + +if __name__ == "__main__": + test_add() + test_add_pipeline() + test_reinterpret() diff --git a/tests/python/unittest/test_runtime_micro.py b/tests/python/unittest/test_runtime_micro.py new file mode 100644 index 000000000000..06461bd978a6 --- /dev/null +++ b/tests/python/unittest/test_runtime_micro.py @@ -0,0 +1,306 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os + +from nose.tools import nottest +import numpy as np +import tvm +from tvm.contrib import graph_runtime, util +from tvm import relay +import tvm.micro as micro +from tvm.relay.testing import resnet + +# Use the host emulated micro device. +DEVICE_TYPE = "host" +TOOLCHAIN_PREFIX = "" + +def create_micro_mod(c_mod, toolchain_prefix): + """Produces a micro module from a given module. + + Parameters + ---------- + c_mod : tvm.module.Module + module with "c" as its target backend + + toolchain_prefix : str + toolchain prefix to be used (see `tvm.micro.Session` docs) + + Return + ------ + micro_mod : tvm.module.Module + micro module for the target device + """ + temp_dir = util.tempdir() + lib_obj_path = temp_dir.relpath("dev_lib.obj") + c_mod.export_library(lib_obj_path, fcompile=tvm.micro.cross_compiler(toolchain_prefix="")) + micro_mod = tvm.module.load(lib_obj_path, "micro_dev") + return micro_mod + + +def relay_micro_build(func, toolchain_prefix, params=None): + """Create a graph runtime module with a micro device context from a Relay function. + + Parameters + ---------- + func : relay.Function + function to compile + + params : dict + input parameters that do not change during inference + + Return + ------ + mod : tvm.module.Module + graph runtime module for the target device + """ + with tvm.build_config(disable_vectorize=True): + graph, c_mod, params = relay.build(func, target="c", params=params) + micro_mod = create_micro_mod(c_mod, TOOLCHAIN_PREFIX) + ctx = tvm.micro_dev(0) + mod = graph_runtime.create(graph, micro_mod, ctx) + mod.set_input(**params) + return mod + + +# TODO(weberlo): Add example program to test scalar double/int TVMValue serialization. + +def test_alloc(): + """Test tensor allocation on the device.""" + if not tvm.module.enabled("micro_dev"): + return + shape = (1024,) + dtype = "float32" + with micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX): + ctx = tvm.micro_dev(0) + np_tensor = np.random.uniform(size=shape).astype(dtype) + micro_tensor = tvm.nd.array(np_tensor, ctx) + tvm.testing.assert_allclose(np_tensor, micro_tensor.asnumpy()) + + +def test_add(): + """Test a module which performs addition.""" + if not tvm.module.enabled("micro_dev"): + return + shape = (1024,) + dtype = "float32" + + # Construct TVM expression. + tvm_shape = tvm.convert(shape) + A = tvm.placeholder(tvm_shape, name="A", dtype=dtype) + B = tvm.placeholder(tvm_shape, name="B", dtype=dtype) + C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name="C") + s = tvm.create_schedule(C.op) + + func_name = "fadd" + c_mod = tvm.build(s, [A, B, C], target="c", name=func_name) + + with micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX): + micro_mod = create_micro_mod(c_mod, TOOLCHAIN_PREFIX) + micro_func = micro_mod[func_name] + ctx = tvm.micro_dev(0) + a = tvm.nd.array(np.random.uniform(size=shape).astype(dtype), ctx) + b = tvm.nd.array(np.random.uniform(size=shape).astype(dtype), ctx) + c = tvm.nd.array(np.zeros(shape, dtype=dtype), ctx) + micro_func(a, b, c) + + tvm.testing.assert_allclose( + c.asnumpy(), a.asnumpy() + b.asnumpy()) + + +def test_workspace_add(): + """Test a module which uses a workspace to compute an intermediate value.""" + if not tvm.module.enabled("micro_dev"): + return + shape = (1024,) + dtype = "float32" + + # Construct TVM expression. + tvm_shape = tvm.convert(shape) + A = tvm.placeholder(tvm_shape, name="A", dtype=dtype) + B = tvm.placeholder(tvm_shape, name="B", dtype=dtype) + B = tvm.compute(A.shape, lambda *i: A(*i) + 1, name="B") + C = tvm.compute(A.shape, lambda *i: B(*i) + 1, name="C") + s = tvm.create_schedule(C.op) + + func_name = "fadd_two_workspace" + c_mod = tvm.build(s, [A, C], target="c", name=func_name) + + with micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX): + micro_mod = create_micro_mod(c_mod, TOOLCHAIN_PREFIX) + micro_func = micro_mod[func_name] + ctx = tvm.micro_dev(0) + a = tvm.nd.array(np.random.uniform(size=shape).astype(dtype), ctx) + c = tvm.nd.array(np.zeros(shape, dtype=dtype), ctx) + micro_func(a, c) + + tvm.testing.assert_allclose( + c.asnumpy(), a.asnumpy() + 2.0) + + +def test_graph_runtime(): + """Test a program which uses the graph runtime.""" + if not tvm.module.enabled("micro_dev"): + return + shape = (1024,) + dtype = "float32" + + # Construct Relay program. + x = relay.var("x", relay.TensorType(shape=shape, dtype=dtype)) + xx = relay.multiply(x, x) + z = relay.add(xx, relay.const(1.0)) + func = relay.Function([x], z) + + with micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX): + mod = relay_micro_build(func, TOOLCHAIN_PREFIX) + + x_in = np.random.uniform(size=shape[0]).astype(dtype) + mod.run(x=x_in) + result = mod.get_output(0).asnumpy() + + tvm.testing.assert_allclose( + result, x_in * x_in + 1.0) + + +def test_multiple_modules(): + """Test loading multiple modules on the device simultaneously.""" + if not tvm.module.enabled("micro_dev"): + return + shape = (1024,) + dtype = "float32" + + # Construct Relay add program. + x = relay.var("x", relay.TensorType(shape=shape, dtype=dtype)) + ret = relay.add(x, relay.const(1.0)) + add_const_func = relay.Function([x], ret) + # Construct Relay subtract program. + x = relay.var("x", relay.TensorType(shape=shape, dtype=dtype)) + ret = relay.subtract(x, relay.const(1.0)) + sub_const_func = relay.Function([x], ret) + + with micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX): + add_const_mod = relay_micro_build(add_const_func, TOOLCHAIN_PREFIX) + sub_const_mod = relay_micro_build(sub_const_func, TOOLCHAIN_PREFIX) + + x_in = np.random.uniform(size=shape[0]).astype(dtype) + add_const_mod.run(x=x_in) + add_result = add_const_mod.get_output(0).asnumpy() + sub_const_mod.run(x=x_in) + sub_result = sub_const_mod.get_output(0).asnumpy() + + tvm.testing.assert_allclose( + add_result, x_in + 1.0) + tvm.testing.assert_allclose( + sub_result, x_in - 1.0) + +def test_interleave_sessions(): + """Test closing and reopening sessions.""" + if not tvm.module.enabled("micro_dev"): + return + shape = (1024,) + dtype = "float32" + + # Construct Relay add program. + x = relay.var("x", relay.TensorType(shape=shape, dtype=dtype)) + ret = relay.add(x, relay.const(1.0)) + add_const_func = relay.Function([x], ret) + + sess_a = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX) + sess_b = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX) + with sess_a: + np_tensor_a = np.random.uniform(size=shape).astype(dtype) + micro_tensor_a = tvm.nd.array(np_tensor_a, tvm.micro_dev(0)) + with sess_b: + np_tensor_b = np.random.uniform(size=shape).astype(dtype) + micro_tensor_b = tvm.nd.array(np_tensor_b, tvm.micro_dev(0)) + with sess_a: + add_const_mod = relay_micro_build(add_const_func, TOOLCHAIN_PREFIX) + add_const_mod.run(x=micro_tensor_a) + add_result = add_const_mod.get_output(0).asnumpy() + tvm.testing.assert_allclose( + add_result, np_tensor_a + 1.0) + with sess_b: + add_const_mod = relay_micro_build(add_const_func, TOOLCHAIN_PREFIX) + add_const_mod.run(x=micro_tensor_b) + add_result = add_const_mod.get_output(0).asnumpy() + tvm.testing.assert_allclose( + add_result, np_tensor_b + 1.0) + + +def test_nested_sessions(): + """Test entering and exiting nested session contexts.""" + if not tvm.module.enabled("micro_dev"): + return + shape = (1024,) + dtype = "float32" + + # Construct Relay add program. + x = relay.var("x", relay.TensorType(shape=shape, dtype=dtype)) + ret = relay.add(x, relay.const(1.0)) + add_const_func = relay.Function([x], ret) + + sess_a = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX) + sess_b = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX) + with sess_a: + np_tensor_a = np.random.uniform(size=shape).astype(dtype) + micro_tensor_a = tvm.nd.array(np_tensor_a, tvm.micro_dev(0)) + with sess_b: + np_tensor_b = np.random.uniform(size=shape).astype(dtype) + micro_tensor_b = tvm.nd.array(np_tensor_b, tvm.micro_dev(0)) + add_const_mod = relay_micro_build(add_const_func, TOOLCHAIN_PREFIX) + add_const_mod.run(x=micro_tensor_a) + add_result = add_const_mod.get_output(0).asnumpy() + tvm.testing.assert_allclose( + add_result, np_tensor_a + 1.0) + + +def test_inactive_session_use(): + """Test the use of objects allocated in a session that is no longer active.""" + if not tvm.module.enabled("micro_dev"): + return + shape = (1024,) + dtype = "float32" + + # Construct Relay add program. + x = relay.var("x", relay.TensorType(shape=shape, dtype=dtype)) + ret = relay.add(x, relay.const(1.0)) + add_const_func = relay.Function([x], ret) + + sess_a = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX) + sess_b = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX) + with sess_a: + np_tensor_a = np.random.uniform(size=shape).astype(dtype) + micro_tensor_a = tvm.nd.array(np_tensor_a, tvm.micro_dev(0)) + add_const_mod = relay_micro_build(add_const_func, TOOLCHAIN_PREFIX) + + with sess_b: + # These objects belong to `sess_a`. + add_const_mod.run(x=micro_tensor_a) + add_result = add_const_mod.get_output(0).asnumpy() + tvm.testing.assert_allclose( + add_result, np_tensor_a + 1.0) + + +if __name__ == "__main__": + test_alloc() + test_add() + test_workspace_add() + test_graph_runtime() + test_multiple_modules() + test_interleave_sessions() + test_nested_sessions() + test_inactive_session_use() diff --git a/topi/python/topi/generic/nn.py b/topi/python/topi/generic/nn.py index 5a62bee7de7a..59ee7001bfd2 100644 --- a/topi/python/topi/generic/nn.py +++ b/topi/python/topi/generic/nn.py @@ -24,7 +24,7 @@ def _default_schedule(outs, auto_inline): """Default schedule for llvm.""" target = tvm.target.current_target(allow_none=False) outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - if target.target_name != "llvm": + if target.target_name not in ("llvm", "c"): raise RuntimeError("schedule not registered for '%s'" % target) s = tvm.create_schedule([x.op for x in outs]) if auto_inline: diff --git a/topi/python/topi/testing/pool_grad_python.py b/topi/python/topi/testing/pool_grad_python.py index d916b2edb181..f1e51f0c957e 100644 --- a/topi/python/topi/testing/pool_grad_python.py +++ b/topi/python/topi/testing/pool_grad_python.py @@ -36,7 +36,7 @@ def pool_grad_nchw(a_np, out_grad_np, pad_np = np.zeros(shape=(n, ic, ih+pt+pb, iw+pl+pr)).astype(dtype) no_zero = (range(n), range(ic), (range(pt, ih+pt)), (range(pl, iw+pl))) pad_np[np.ix_(*no_zero)] = a_np - _, oc, oh, ow = out_grad_np.shape + _, _, oh, ow = out_grad_np.shape pool_grad_np = np.zeros(shape=a_np.shape) pad_pool_grad_np = np.zeros(shape=pad_np.shape)