Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Doc] IRModule #17298

Merged
merged 1 commit into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
281 changes: 281 additions & 0 deletions docs/get_started/tutorials/ir_module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,281 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

"""
.. _ir_module:

IRModule
========
This tutorial presents the core abstraction of Apache TVM Unity, the IRModule.
The IRModule encompasses the **entirety** of the ML models, incorporating the
computational graph, tensor programs, and potential calls to external libraries.

.. contents:: Table of Contents
:local:
:depth: 1
"""

import numpy as np
import tvm
from tvm import relax

######################################################################
# Create IRModule
# ---------------
# IRModules can be initialized in various ways. We demonstrate a few of them
# below.

import torch
from torch import fx, nn
from tvm.relax.frontend.torch import from_fx

######################################################################
# Import from existing models
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
# The most common way to initialize an IRModule is to import from an existing
# model. Apache TVM Unity accommodates imports from a range of frameworks,
# such as PyTorch and ONNX. This tutorial solely demonstrates the import process
# from PyTorch.


# Create a dummy model
class TorchModel(nn.Module):
def __init__(self):
super(TorchModel, self).__init__()
self.fc1 = nn.Linear(784, 256)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(256, 10)

def forward(self, x):
x = self.fc1(x)
x = self.relu1(x)
x = self.fc2(x)
return x


# Give the input shape and data type
input_info = [((1, 784), "float32")]

# Convert the model to IRModule
with torch.no_grad():
torch_fx_model = fx.symbolic_trace(TorchModel())
mod_from_torch = from_fx(torch_fx_model, input_info, keep_params_as_input=True)

mod_from_torch, params_from_torch = relax.frontend.detach_params(mod_from_torch)
# Print the IRModule
mod_from_torch.show()

######################################################################
# Write with Relax NN Module
# ~~~~~~~~~~~~~~~~~~~~~~~~~~
# Apache TVM Unity also provides a set of PyTorch-liked APIs, to help users
# write the IRModule directly.

from tvm.relax.frontend import nn


class RelaxModel(nn.Module):
def __init__(self):
super(RelaxModel, self).__init__()
self.fc1 = nn.Linear(784, 256)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(256, 10)

def forward(self, x):
x = self.fc1(x)
x = self.relu1(x)
x = self.fc2(x)
return x


mod_from_relax, params_from_relax = RelaxModel().export_tvm(
{"forward": {"x": nn.spec.Tensor((1, 784), "float32")}}
)
mod_from_relax.show()

######################################################################
# Create via TVMScript
# ~~~~~~~~~~~~~~~~~~~~
# TVMScript is a Python-based DSL for IRModules. We are able to
# directly output the IRModule in the TVMScript syntax, or alternatively,
# parse the TVMScript to obtain an IRModule.

from tvm.script import ir as I
from tvm.script import relax as R


@I.ir_module
class TVMScriptModule:
@R.function
def main(
x: R.Tensor((1, 784), dtype="float32"),
fc1_weight: R.Tensor((256, 784), dtype="float32"),
fc1_bias: R.Tensor((256,), dtype="float32"),
fc2_weight: R.Tensor((10, 256), dtype="float32"),
fc2_bias: R.Tensor((10,), dtype="float32"),
) -> R.Tensor((1, 10), dtype="float32"):
R.func_attr({"num_input": 1})
with R.dataflow():
permute_dims = R.permute_dims(fc1_weight, axes=None)
matmul = R.matmul(x, permute_dims, out_dtype="void")
add = R.add(matmul, fc1_bias)
relu = R.nn.relu(add)
permute_dims1 = R.permute_dims(fc2_weight, axes=None)
matmul1 = R.matmul(relu, permute_dims1, out_dtype="void")
add1 = R.add(matmul1, fc2_bias)
gv = add1
R.output(gv)
return gv


mod_from_script = TVMScriptModule
mod_from_script.show()

######################################################################
# Attributes of an IRModule
# -------------------------
# An IRModule is a collection of functions, indexed by GlobalVars.

mod = mod_from_torch
print(mod.get_global_vars())

######################################################################
# We can access the functions in the IRModule by indexing with the GlobalVars
# or their names

# index by global var name
print(mod["main"])
# index by global var, and checking they are the same function
(gv,) = mod.get_global_vars()
assert mod[gv] == mod["main"]

######################################################################
# Transformations on IRModules
# ----------------------------
# Transformations are the import component of Apache TVM Unity. One transformation
# takes in an IRModule and outputs another IRModule. We can apply a sequence of
# transformations to an IRModule to obtain a new IRModule. That is the common way to
# optimize a model.
#
# In this getting started tutorial, we only demonstrate how to apply transformations
# to an IRModule. For details of each transformation, please refer to the
# :ref:`Transformation API Reference <api-relax-transformation>`

######################################################################
# We first apply **LegalizeOps** transformation to the IRModule. This transformation
# will convert the Relax module into a mixed stage, with both Relax and TensorIR function
# within the same module. Meanwhile, the Relax operators will be converted into ``call_tir``.

mod = mod_from_torch
mod = relax.transform.LegalizeOps()(mod)
mod.show()

######################################################################
# After the transformation, there are much more functions inside the module. Let's print
# the global vars again.

print(mod.get_global_vars())

######################################################################
# Next, Apache TVM Unity provides a set of default transformation pipelines for users,
# to simplify the transformation process. We can then apply the default pipeline to the module.
# The default **zero** pipeline contains very fundamental transformations, including:
#
# - **LegalizeOps**: This transform converts the Relax operators into `call_tir` functions
# with the corresponding TensorIR Functions. After this transform, the IRModule will
# contain both Relax functions and TensorIR functions.
# - **AnnotateTIROpPattern**: This transform annotates the pattern of the TensorIR functions,
# preparing them for subsequent operator fusion.
# - **FoldConstant**: This pass performs constant folding, optimizing operations
# involving constants.
# - **FuseOps and FuseTIR**: These two passes work together to fuse operators based on the
# patterns annotated in the previous step (AnnotateTIROpPattern). These passes transform
# both Relax functions and TensorIR functions.
#
# .. note::
#
# Here, we have applied **LegalizeOps** twice in the flow. The second time is useless but
# harmless.
#
# Every passes can be duplicated in the flow, since we ensure the passes can handle all legal
# IRModule inputs. This design can help users to construct their own pipeline.

mod = relax.get_pipeline("zero")(mod)
mod.show()

######################################################################
# Deploy the IRModule Universally
# -------------------------------
# After the optimization, we can compile the model into a TVM runtime module.
# Notably, Apache TVM Unity provides the ability of universal deployment, which means
# we can deploy the same IRModule on different backends, including CPU, GPU, and other emerging
# backends.
#
# Deploy on CPU
# ~~~~~~~~~~~~~
# We can deploy the IRModule on CPU by specifying the target as ``llvm``.

exec = relax.build(mod, target="llvm")
dev = tvm.cpu()
vm = relax.VirtualMachine(exec, dev)

raw_data = np.random.rand(1, 784).astype("float32")
data = tvm.nd.array(raw_data, dev)
cpu_out = vm["main"](data, *params_from_torch["main"]).numpy()
print(cpu_out)

######################################################################
# Deploy on GPU
# ~~~~~~~~~~~~~
# Besides, CPU backend, we can also deploy the IRModule on GPU. GPU requires
# programs containing extra information, such as the thread bindings and shared memory
# allocations. We need a further transformation to generate the GPU programs.
#
# We use ``DLight`` to generate the GPU programs. In this tutorial, we won't go into
# the details of ``DLight``.
#

from tvm import dlight as dl

with tvm.target.Target("cuda"):
gpu_mod = dl.ApplyDefaultSchedule(
dl.gpu.Matmul(),
dl.gpu.Fallback(),
)(mod)

######################################################################
# Now we can compile the IRModule on GPU, the similar way as we did on CPU.

exec = relax.build(gpu_mod, target="cuda")
dev = tvm.device("cuda", 0)
vm = relax.VirtualMachine(exec, dev)
# Need to allocate data and params on GPU device
data = tvm.nd.array(raw_data, dev)
gpu_params = [tvm.nd.array(p, dev) for p in params_from_torch["main"]]
gpu_out = vm["main"](data, *gpu_params).numpy()
print(gpu_out)

# Check the correctness of the results
assert np.allclose(cpu_out, gpu_out, atol=1e-3)

######################################################################
# Deploy on Other Backends
# ~~~~~~~~~~~~~~~~~~~~~~~~
# Apache TVM Unity also supports other backends, such as different kinds of GPUs
# (Metal, ROCm, Vulkan and OpenCL), different kinds of CPUs (x86, ARM), and other
# emerging backends (e.g., WebAssembly). The deployment process is similar to the
# GPU backend.
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ driving its costs down.

install/index
get_started/tutorials/quick_start
get_started/tutorials/ir_module
contribute/index

.. toctree::
Expand Down
Loading