From 8bf383e6920764cfe02736a238eae3ba4a616c32 Mon Sep 17 00:00:00 2001 From: huajsj Date: Fri, 27 May 2022 19:45:12 -0700 Subject: [PATCH 01/39] [Runtime][PipelineExecutor] Tutorial of using pipeline executor. Tutorial of using pipeline executor including the byoc use case. --- .../using_with_pipeline_executor.py | 187 ++++++++++++++++++ 1 file changed, 187 insertions(+) create mode 100644 gallery/how_to/work_with_relay/using_with_pipeline_executor.py diff --git a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py new file mode 100644 index 000000000000..17f14fef72a3 --- /dev/null +++ b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py @@ -0,0 +1,187 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Using Pipeline Executor in Relay +================================= +**Author**: `Hua Jiang `_ + +This is a short tutorial on how to use Pipeline Executor with Relay. + +Relay uses TVM internally to generate target specific code. For example, with cuda backend TVM generates cuda kernels for all layers in the user provided network. +But sometimes it is also helpful to incorporate external libraries developed by various vendors into Relay. +Luckily, TVM has a mechanism to transparently call into these libraries. +For Relay users, all we need to do is just to set a target string appropriately. + +Before we can use external libraries from Relay, your TVM needs to be built with libraries you want to use. +For example, to use cuDNN, USE_CUDNN option in `cmake/config.cmake` needs to be enabled, and cuDNN include and library directories need to be specified if necessary. + +To begin with, we import Relay and TVM. +""" +import tvm +from tvm import te +import numpy as np +from tvm.contrib import graph_executor as runtime +from tvm import relay +from tvm.relay import testing +import tvm.testing +import time + +###################################################################### +# Create a simple network, this network can be a pre-trained model too. +# ----------------------- +# Let's create a very simple network for demonstration. +# It consists of convolution, batch normalization, and ReLU activation. +def get_network(): + out_channels = 16 + batch_size = 1 + data = relay.var("data", relay.TensorType((batch_size, 3, 224, 224), "float32")) + weight = relay.var("weight") + second_weight = relay.var("second_weight") + bn_gamma = relay.var("bn_gamma") + bn_beta = relay.var("bn_beta") + bn_mmean = relay.var("bn_mean") + bn_mvar = relay.var("bn_var") + simple_net = relay.nn.conv2d( + data=data, weight=weight, kernel_size=(3, 3), channels=out_channels, padding=(1, 1) + ) + simple_net = relay.nn.batch_norm(simple_net, bn_gamma, bn_beta, bn_mmean, bn_mvar)[0] + simple_net = relay.nn.relu(simple_net) + simple_net = relay.nn.conv2d( + data=simple_net, + weight=second_weight, + kernel_size=(3, 3), + channels=out_channels, + padding=(1, 1), + ) + simple_net = relay.Function(relay.analysis.free_vars(simple_net), simple_net) + data_shape = (batch_size, 3, 224, 224) + net, params = testing.create_workload(simple_net) + return net, params, data_shape + + +net, params, data_shape = get_network() +###################################################################### +# Apply a customer graph splitting function. +# ------------------------------- +# We use an testing linear graph splitting function as a example. User also can create their +# own splitting function logic. +import os + +os.sys.path.append(os.path.abspath("../../../tests/python/relay")) +from test_pipeline_executor import graph_split + +# Splitting the network into two subgraphs. +split_config = [{"op_name": "nn.relu", "op_index": 0}] +subgraphs = graph_split(net["main"], split_config, params) +############################################################## +# The generated subgraphs should look something like below. +##subgraphs[0]) +# +# def @main(%data: Tensor[(1, 3, 224, 224), float32]) { +# %0 = nn.conv2d(%data, meta[relay.Constant][0] /* ty=Tensor[(16, 3, 3, 3), float32] */, padding=[1, 1, 1, 1], channels=16, kernel_size=[3, 3]) /* ty=Tensor[(1, 16, 224, 224), float32] */; +# %1 = nn.batch_norm(%0, meta[relay.Constant][1] /* ty=Tensor[(16), float32] */, meta[relay.Constant][2] /* ty=Tensor[(16), float32]*/, meta[relay.Constant][3] /* ty=Tensor[(16), float32] */, meta[relay.Constant][4] /* ty=Tensor[(16), float32] */) /* ty=(Tensor[(1,16, 224, 224), float32], Tensor[(16), float32], Tensor[(16), float32]) */; +# %2 = %1.0; +# nn.relu(%2) /* ty=Tensor[(1, 16, 224, 224), float32] */ +# } +# +# +##subgraphs[1] +# +# def @main(%data_n_0: Tensor[(1, 16, 224, 224), float32]) { +# nn.conv2d(%data_n_0, meta[relay.Constant][0] /* ty=Tensor[(16, 16, 3, 3), float32] */, padding=[1, 1, 1, 1], channels=16, kernel_size=[3, 3]) /* ty=Tensor[(1, 16, 224, 224), float32] */ +# } +############################################################## + +############################################################## +# Enable the pipeline executor, and doing the configuration. +# ------------------------------------------------------------- +# In build/config.cmake set USE_PIPELINE_EXECUTOR as ON to enable pipeline executor +# import pipeline_executor, and pipeline_executor_build +############################################################## +from tvm.contrib import graph_executor, pipeline_executor, pipeline_executor_build + +# Create subgraph pipeline configuration. +mod0, mod1 = subgraphs[0], subgraphs[1] +#################################################################### +# Associate the subgraph module with a target. +# Set the codegen of the second subgraph module as dnnl, and the target as the CPU +# Enable dnnl by set USE_DNNL_CODEGEN as on in config.cmake and install MKL-DNN. +# using BYOC to apply dnnl codegen +mod0 = relay.transform.AnnotateTarget(["dnnl"])(mod0) +mod0 = relay.transform.MergeCompilerRegions()(mod0) +mod0 = relay.transform.PartitionGraph()(mod0) +# Start setting the pipeline configure. +pipe_config = pipeline_executor_build.PipelineConfig() +# Set the compile target of the second subgraph module as CPU. +pipe_config[mod0].target = "llvm" +pipe_config[mod0].dev = tvm.cpu(0) +# Set the cpu afinity for control flow, for example use cpu 0 for control flow. +pipe_config[mod1].cpu_affinity = "0" +# Set the compile target of the second subgraph module as CPU. +pipe_config[mod1].target = "llvm" +pipe_config[mod1].dev = tvm.cpu(0) +# Set the cpu afinity for control flow, for example use cpu 1 for control flow. +pipe_config[mod1].cpu_affinity = "1" +pipe_config["input"]["data"].connect(pipe_config[mod0]["input"]["data"]) +pipe_config[mod0]["output"][0].connect(pipe_config[mod1]["input"]["data_n_0"]) +pipe_config[mod1]["output"]["0"].connect(pipe_config["output"][0]) +#################################################################### +# The pipeline configuration like below(print(pipe_config)). +# +# Inputs +# |data: mod0:data +# +# output +# |output(0) : mod1.output(0) +# +# connections +# |mod0.output(0)-> mod1.data_n_0 +# Build the pipeline executor +with tvm.transform.PassContext(opt_level=3): + pipeline_mod_factory = pipeline_executor_build.build(pipe_config) +# Export the parameter configuration to a file. +directory_path = tvm.contrib.utils.tempdir().temp_dir +# If the directory does not exist, create it. +if not os.path.exists(directory_path): + os.makedirs(directory_path) +config_file_name = pipeline_mod_factory.export_library(directory_path) +# Use the load function to create and initialize PipelineModule. +pipeline_module = pipeline_executor.PipelineModule.load_library(config_file_name) +# Allocated a data. +data = np.random.uniform(-1, 1, size=data_shape).astype("float32") +# Run the pipeline executor +pipeline_module.set_input("data", tvm.nd.array(data)) +pipeline_module.run() +outputs = [] +while not (outputs := pipeline_module.get_output()): + time.sleep(0.001) +# Run with graph_executor and verify the output of pipeline executor. +target = "llvm" +dev = tvm.device(target, 0) +lib0 = relay.build_module.build(mod0, target, params=params) +lib1 = relay.build_module.build(mod1, target, params=params) +module0 = runtime.GraphModule(lib0["default"](dev)) +module1 = runtime.GraphModule(lib1["default"](dev)) +module0.set_input("data", data) +module0.run() +out_shape = (1, 16, 224, 224) +out = module0.get_output(0, tvm.nd.empty(out_shape)) +module1.set_input("data_n_0", out) +module1.run() +out = module1.get_output(0, tvm.nd.empty(out_shape)) +# Verify the result. +tvm.testing.assert_allclose(outputs[0].numpy(), out.numpy()) From 6332de0ce1f17e4dc846770f5f2125aad345e3fa Mon Sep 17 00:00:00 2001 From: Hua Jiang Date: Thu, 2 Jun 2022 21:55:52 -0700 Subject: [PATCH 02/39] fix ci issue --- .../how_to/work_with_relay/using_with_pipeline_executor.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py index 17f14fef72a3..111071cb0f57 100644 --- a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py @@ -81,7 +81,7 @@ def get_network(): # own splitting function logic. import os -os.sys.path.append(os.path.abspath("../../../tests/python/relay")) +os.sys.path.append(os.path.abspath(os.environ["TVM_HOME"] + "/tests/python/relay")) from test_pipeline_executor import graph_split # Splitting the network into two subgraphs. @@ -167,7 +167,8 @@ def get_network(): pipeline_module.set_input("data", tvm.nd.array(data)) pipeline_module.run() outputs = [] -while not (outputs := pipeline_module.get_output()): +while not outputs: + outputs = pipeline_module.get_output() time.sleep(0.001) # Run with graph_executor and verify the output of pipeline executor. target = "llvm" From cb49f993f1f538ea7b54b17b8c078eb282683096 Mon Sep 17 00:00:00 2001 From: Hua Jiang Date: Thu, 2 Jun 2022 21:59:38 -0700 Subject: [PATCH 03/39] document change. --- .../work_with_relay/using_with_pipeline_executor.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py index 111071cb0f57..d3e52d7e70d3 100644 --- a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py @@ -20,16 +20,6 @@ **Author**: `Hua Jiang `_ This is a short tutorial on how to use Pipeline Executor with Relay. - -Relay uses TVM internally to generate target specific code. For example, with cuda backend TVM generates cuda kernels for all layers in the user provided network. -But sometimes it is also helpful to incorporate external libraries developed by various vendors into Relay. -Luckily, TVM has a mechanism to transparently call into these libraries. -For Relay users, all we need to do is just to set a target string appropriately. - -Before we can use external libraries from Relay, your TVM needs to be built with libraries you want to use. -For example, to use cuDNN, USE_CUDNN option in `cmake/config.cmake` needs to be enabled, and cuDNN include and library directories need to be specified if necessary. - -To begin with, we import Relay and TVM. """ import tvm from tvm import te From 226fc58db82bc666a899aa399f476cbfea74308a Mon Sep 17 00:00:00 2001 From: Hua Jiang Date: Fri, 3 Jun 2022 00:05:23 -0700 Subject: [PATCH 04/39] triger build --- gallery/how_to/work_with_relay/using_with_pipeline_executor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py index d3e52d7e70d3..2df82546f449 100644 --- a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py @@ -19,7 +19,7 @@ ================================= **Author**: `Hua Jiang `_ -This is a short tutorial on how to use Pipeline Executor with Relay. +This is a short tutorial on how to use the Pipeline Executor with Relay. """ import tvm from tvm import te From 031b3ad13dd2a1bf679ab83cfb873b5dee836cbc Mon Sep 17 00:00:00 2001 From: huajsj Date: Sat, 4 Jun 2022 22:20:29 -0700 Subject: [PATCH 05/39] fix doc issue --- .../using_with_pipeline_executor.py | 57 ++++++++++--------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py index 2df82546f449..17fa994c982f 100644 --- a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py @@ -30,9 +30,9 @@ import tvm.testing import time -###################################################################### +####################################################################### # Create a simple network, this network can be a pre-trained model too. -# ----------------------- +# --------------------------------------------------------------------- # Let's create a very simple network for demonstration. # It consists of convolution, batch normalization, and ReLU activation. def get_network(): @@ -64,9 +64,9 @@ def get_network(): net, params, data_shape = get_network() -###################################################################### +############################################# # Apply a customer graph splitting function. -# ------------------------------- +# ------------------------------------------ # We use an testing linear graph splitting function as a example. User also can create their # own splitting function logic. import os @@ -77,28 +77,30 @@ def get_network(): # Splitting the network into two subgraphs. split_config = [{"op_name": "nn.relu", "op_index": 0}] subgraphs = graph_split(net["main"], split_config, params) -############################################################## +########################################################### # The generated subgraphs should look something like below. -##subgraphs[0]) -# -# def @main(%data: Tensor[(1, 3, 224, 224), float32]) { -# %0 = nn.conv2d(%data, meta[relay.Constant][0] /* ty=Tensor[(16, 3, 3, 3), float32] */, padding=[1, 1, 1, 1], channels=16, kernel_size=[3, 3]) /* ty=Tensor[(1, 16, 224, 224), float32] */; -# %1 = nn.batch_norm(%0, meta[relay.Constant][1] /* ty=Tensor[(16), float32] */, meta[relay.Constant][2] /* ty=Tensor[(16), float32]*/, meta[relay.Constant][3] /* ty=Tensor[(16), float32] */, meta[relay.Constant][4] /* ty=Tensor[(16), float32] */) /* ty=(Tensor[(1,16, 224, 224), float32], Tensor[(16), float32], Tensor[(16), float32]) */; -# %2 = %1.0; -# nn.relu(%2) /* ty=Tensor[(1, 16, 224, 224), float32] */ -# } -# -# -##subgraphs[1] -# -# def @main(%data_n_0: Tensor[(1, 16, 224, 224), float32]) { -# nn.conv2d(%data_n_0, meta[relay.Constant][0] /* ty=Tensor[(16, 16, 3, 3), float32] */, padding=[1, 1, 1, 1], channels=16, kernel_size=[3, 3]) /* ty=Tensor[(1, 16, 224, 224), float32] */ -# } -############################################################## +#---------------------------------------------------------- +``` +#subgraphs[0]) -############################################################## + def @main(%data: Tensor[(1, 3, 224, 224), float32]) { + %0 = nn.conv2d(%data, meta[relay.Constant][0] /* ty=Tensor[(16, 3, 3, 3), float32] */, padding=[1, 1, 1, 1], channels=16, kernel_size=[3, 3]) /* ty=Tensor[(1, 16, 224, 224), float32] */; + %1 = nn.batch_norm(%0, meta[relay.Constant][1] /* ty=Tensor[(16), float32] */, meta[relay.Constant][2] /* ty=Tensor[(16), float32]*/, meta[relay.Constant][3] /* ty=Tensor[(16), float32] */, meta[relay.Constant][4] /* ty=Tensor[(16), float32] */) /* ty=(Tensor[(1,16, 224, 224), float32], Tensor[(16), float32], Tensor[(16), float32]) */; + %2 = %1.0; + nn.relu(%2) /* ty=Tensor[(1, 16, 224, 224), float32] */ + } + + +#subgraphs[1] + + def @main(%data_n_0: Tensor[(1, 16, 224, 224), float32]) { + nn.conv2d(%data_n_0, meta[relay.Constant][0] /* ty=Tensor[(16, 16, 3, 3), float32] */, padding=[1, 1, 1, 1], channels=16, kernel_size=[3, 3]) /* ty=Tensor[(1, 16, 224, 224), float32] */ + } +``` + +############################################################ # Enable the pipeline executor, and doing the configuration. -# ------------------------------------------------------------- +# ---------------------------------------------------------- # In build/config.cmake set USE_PIPELINE_EXECUTOR as ON to enable pipeline executor # import pipeline_executor, and pipeline_executor_build ############################################################## @@ -129,9 +131,11 @@ def get_network(): pipe_config["input"]["data"].connect(pipe_config[mod0]["input"]["data"]) pipe_config[mod0]["output"][0].connect(pipe_config[mod1]["input"]["data_n_0"]) pipe_config[mod1]["output"]["0"].connect(pipe_config["output"][0]) -#################################################################### -# The pipeline configuration like below(print(pipe_config)). -# +########################################## +# The pipeline configuration like below(). +#----------------------------------------- +``` +print(pipe_config) # Inputs # |data: mod0:data # @@ -140,6 +144,7 @@ def get_network(): # # connections # |mod0.output(0)-> mod1.data_n_0 +``` # Build the pipeline executor with tvm.transform.PassContext(opt_level=3): pipeline_mod_factory = pipeline_executor_build.build(pipe_config) From d046177a3a5945f6d82a6ee50823f48975595989 Mon Sep 17 00:00:00 2001 From: huajsj Date: Sat, 4 Jun 2022 22:38:23 -0700 Subject: [PATCH 06/39] fix ci issue --- .../using_with_pipeline_executor.py | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py index 17fa994c982f..b2ea6f7a85fc 100644 --- a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py @@ -79,8 +79,8 @@ def get_network(): subgraphs = graph_split(net["main"], split_config, params) ########################################################### # The generated subgraphs should look something like below. -#---------------------------------------------------------- -``` +# ---------------------------------------------------------- +""" #subgraphs[0]) def @main(%data: Tensor[(1, 3, 224, 224), float32]) { @@ -96,7 +96,7 @@ def @main(%data: Tensor[(1, 3, 224, 224), float32]) { def @main(%data_n_0: Tensor[(1, 16, 224, 224), float32]) { nn.conv2d(%data_n_0, meta[relay.Constant][0] /* ty=Tensor[(16, 16, 3, 3), float32] */, padding=[1, 1, 1, 1], channels=16, kernel_size=[3, 3]) /* ty=Tensor[(1, 16, 224, 224), float32] */ } -``` +""" ############################################################ # Enable the pipeline executor, and doing the configuration. @@ -133,18 +133,18 @@ def @main(%data_n_0: Tensor[(1, 16, 224, 224), float32]) { pipe_config[mod1]["output"]["0"].connect(pipe_config["output"][0]) ########################################## # The pipeline configuration like below(). -#----------------------------------------- -``` +# ----------------------------------------- +""" print(pipe_config) -# Inputs -# |data: mod0:data -# -# output -# |output(0) : mod1.output(0) -# -# connections -# |mod0.output(0)-> mod1.data_n_0 -``` + Inputs + |data: mod0:data + + output + |output(0) : mod1.output(0) + + connections + |mod0.output(0)-> mod1.data_n_0 +""" # Build the pipeline executor with tvm.transform.PassContext(opt_level=3): pipeline_mod_factory = pipeline_executor_build.build(pipe_config) From 8d01a7f87e907a2dcd35ef46fcb46b1865291846 Mon Sep 17 00:00:00 2001 From: huajsj Date: Sat, 4 Jun 2022 23:27:04 -0700 Subject: [PATCH 07/39] doc issue --- .../how_to/work_with_relay/using_with_pipeline_executor.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py index b2ea6f7a85fc..80dadb6ad0f0 100644 --- a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py @@ -101,9 +101,8 @@ def @main(%data_n_0: Tensor[(1, 16, 224, 224), float32]) { ############################################################ # Enable the pipeline executor, and doing the configuration. # ---------------------------------------------------------- -# In build/config.cmake set USE_PIPELINE_EXECUTOR as ON to enable pipeline executor -# import pipeline_executor, and pipeline_executor_build -############################################################## +# In build/config.cmake set USE_PIPELINE_EXECUTOR as ON to enable pipeline executor. + from tvm.contrib import graph_executor, pipeline_executor, pipeline_executor_build # Create subgraph pipeline configuration. From 86cfbe4340810c0da5fdd51b5d7cdc9ca724c271 Mon Sep 17 00:00:00 2001 From: huajsj Date: Wed, 8 Jun 2022 00:40:39 -0700 Subject: [PATCH 08/39] fix ci issue --- .../using_with_pipeline_executor.py | 80 ++++++++++++------- 1 file changed, 49 insertions(+), 31 deletions(-) diff --git a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py index 80dadb6ad0f0..e26a45f68689 100644 --- a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py @@ -46,9 +46,11 @@ def get_network(): bn_mmean = relay.var("bn_mean") bn_mvar = relay.var("bn_var") simple_net = relay.nn.conv2d( - data=data, weight=weight, kernel_size=(3, 3), channels=out_channels, padding=(1, 1) + data=data, weight=weight, kernel_size=(3, 3), + channels=out_channels, padding=(1, 1) ) - simple_net = relay.nn.batch_norm(simple_net, bn_gamma, bn_beta, bn_mmean, bn_mvar)[0] + simple_net = relay.nn.batch_norm(simple_net, bn_gamma, bn_beta, + bn_mmean, bn_mvar)[0] simple_net = relay.nn.relu(simple_net) simple_net = relay.nn.conv2d( data=simple_net, @@ -64,22 +66,22 @@ def get_network(): net, params, data_shape = get_network() -############################################# -# Apply a customer graph splitting function. -# ------------------------------------------ +########################################### +# Splitting the network into two subgraphs. +# ----------------------------------------- # We use an testing linear graph splitting function as a example. User also can create their # own splitting function logic. import os - -os.sys.path.append(os.path.abspath(os.environ["TVM_HOME"] + "/tests/python/relay")) +test_path = os.path.join(os.path.dirname(__file__), '../../../tests/python/relay') +os.sys.path.append(test_path) from test_pipeline_executor import graph_split - +########################################### # Splitting the network into two subgraphs. split_config = [{"op_name": "nn.relu", "op_index": 0}] subgraphs = graph_split(net["main"], split_config, params) ########################################################### # The generated subgraphs should look something like below. -# ---------------------------------------------------------- + """ #subgraphs[0]) @@ -98,41 +100,42 @@ def @main(%data_n_0: Tensor[(1, 16, 224, 224), float32]) { } """ -############################################################ -# Enable the pipeline executor, and doing the configuration. -# ---------------------------------------------------------- +######################################### +# Create subgraph pipeline configuration. +# --------------------------------------- # In build/config.cmake set USE_PIPELINE_EXECUTOR as ON to enable pipeline executor. - from tvm.contrib import graph_executor, pipeline_executor, pipeline_executor_build - +######################################### # Create subgraph pipeline configuration. -mod0, mod1 = subgraphs[0], subgraphs[1] -#################################################################### # Associate the subgraph module with a target. -# Set the codegen of the second subgraph module as dnnl, and the target as the CPU -# Enable dnnl by set USE_DNNL_CODEGEN as on in config.cmake and install MKL-DNN. -# using BYOC to apply dnnl codegen +# Using BYOC to set the codegen of the second subgraph module. +# To use dnnl the 'USE_DNNL_CODEGEN' should set as ON in config.cmake and installing MKL-DNN. +mod0, mod1 = subgraphs[0], subgraphs[1] mod0 = relay.transform.AnnotateTarget(["dnnl"])(mod0) mod0 = relay.transform.MergeCompilerRegions()(mod0) mod0 = relay.transform.PartitionGraph()(mod0) -# Start setting the pipeline configure. +################################################# +# Get the pipeline executor configuration object. pipe_config = pipeline_executor_build.PipelineConfig() -# Set the compile target of the second subgraph module as CPU. +########################################################################### +# Set the compile target of the second subgraph module for example as LLVM. pipe_config[mod0].target = "llvm" pipe_config[mod0].dev = tvm.cpu(0) -# Set the cpu afinity for control flow, for example use cpu 0 for control flow. +############################################################################### +# Set the cpu afinity for control flow, for example using cpu 0 for control flow. pipe_config[mod1].cpu_affinity = "0" -# Set the compile target of the second subgraph module as CPU. +############################################################## +# Set the compile target of the second subgraph module as LLVM. pipe_config[mod1].target = "llvm" pipe_config[mod1].dev = tvm.cpu(0) -# Set the cpu afinity for control flow, for example use cpu 1 for control flow. +################################################################################# +# Set the cpu afinity for control flow, for example using cpu 1 for control flow. pipe_config[mod1].cpu_affinity = "1" pipe_config["input"]["data"].connect(pipe_config[mod0]["input"]["data"]) pipe_config[mod0]["output"][0].connect(pipe_config[mod1]["input"]["data_n_0"]) pipe_config[mod1]["output"]["0"].connect(pipe_config["output"][0]) -########################################## -# The pipeline configuration like below(). -# ----------------------------------------- +###################################### +# The pipeline configuration as below. """ print(pipe_config) Inputs @@ -144,27 +147,41 @@ def @main(%data_n_0: Tensor[(1, 16, 224, 224), float32]) { connections |mod0.output(0)-> mod1.data_n_0 """ -# Build the pipeline executor +############################## +# Build the pipeline executor. +# ---------------------------- with tvm.transform.PassContext(opt_level=3): pipeline_mod_factory = pipeline_executor_build.build(pipe_config) # Export the parameter configuration to a file. directory_path = tvm.contrib.utils.tempdir().temp_dir +############################################# # If the directory does not exist, create it. if not os.path.exists(directory_path): os.makedirs(directory_path) config_file_name = pipeline_mod_factory.export_library(directory_path) +################################################################ # Use the load function to create and initialize PipelineModule. +# -------------------------------------------------------------- pipeline_module = pipeline_executor.PipelineModule.load_library(config_file_name) -# Allocated a data. + + +############################ +# Run the pipeline executor. +# -------------------------- +# Allocated a input data. data = np.random.uniform(-1, 1, size=data_shape).astype("float32") -# Run the pipeline executor pipeline_module.set_input("data", tvm.nd.array(data)) +########################################################################## +# Run the two subgraph in pipeline mode and get the output asynchronously. pipeline_module.run() outputs = [] while not outputs: outputs = pipeline_module.get_output() time.sleep(0.001) -# Run with graph_executor and verify the output of pipeline executor. +###################################### +# Use graph_executor for verification. +# ------------------------------------ +# Run these two subgraphs in sequence with graph_executor to get the output. target = "llvm" dev = tvm.device(target, 0) lib0 = relay.build_module.build(mod0, target, params=params) @@ -178,5 +195,6 @@ def @main(%data_n_0: Tensor[(1, 16, 224, 224), float32]) { module1.set_input("data_n_0", out) module1.run() out = module1.get_output(0, tvm.nd.empty(out_shape)) +#################### # Verify the result. tvm.testing.assert_allclose(outputs[0].numpy(), out.numpy()) From 22788ba2565299665e9c5311fc535e456a3739c1 Mon Sep 17 00:00:00 2001 From: huajsj Date: Wed, 8 Jun 2022 00:46:22 -0700 Subject: [PATCH 09/39] fix ci issue. --- .../work_with_relay/using_with_pipeline_executor.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py index e26a45f68689..43349a366946 100644 --- a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py @@ -46,11 +46,9 @@ def get_network(): bn_mmean = relay.var("bn_mean") bn_mvar = relay.var("bn_var") simple_net = relay.nn.conv2d( - data=data, weight=weight, kernel_size=(3, 3), - channels=out_channels, padding=(1, 1) + data=data, weight=weight, kernel_size=(3, 3), channels=out_channels, padding=(1, 1) ) - simple_net = relay.nn.batch_norm(simple_net, bn_gamma, bn_beta, - bn_mmean, bn_mvar)[0] + simple_net = relay.nn.batch_norm(simple_net, bn_gamma, bn_beta, bn_mmean, bn_mvar)[0] simple_net = relay.nn.relu(simple_net) simple_net = relay.nn.conv2d( data=simple_net, @@ -72,9 +70,11 @@ def get_network(): # We use an testing linear graph splitting function as a example. User also can create their # own splitting function logic. import os -test_path = os.path.join(os.path.dirname(__file__), '../../../tests/python/relay') + +test_path = os.path.join(os.path.dirname(__file__), "../../../tests/python/relay") os.sys.path.append(test_path) from test_pipeline_executor import graph_split + ########################################### # Splitting the network into two subgraphs. split_config = [{"op_name": "nn.relu", "op_index": 0}] @@ -105,6 +105,7 @@ def @main(%data_n_0: Tensor[(1, 16, 224, 224), float32]) { # --------------------------------------- # In build/config.cmake set USE_PIPELINE_EXECUTOR as ON to enable pipeline executor. from tvm.contrib import graph_executor, pipeline_executor, pipeline_executor_build + ######################################### # Create subgraph pipeline configuration. # Associate the subgraph module with a target. From 9a550fbfe2c4c6216d83a1961d41a22daa0e0c7f Mon Sep 17 00:00:00 2001 From: huajsj Date: Wed, 8 Jun 2022 11:32:18 -0700 Subject: [PATCH 10/39] fix __file__ not found problem. this is a known issue of sphinx-gallery https://github.com/sphinx-gallery/sphinx-gallery/issues/211 --- .../how_to/work_with_relay/using_with_pipeline_executor.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) mode change 100644 => 100755 gallery/how_to/work_with_relay/using_with_pipeline_executor.py diff --git a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py old mode 100644 new mode 100755 index 43349a366946..93d2d6aa7761 --- a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py @@ -69,10 +69,11 @@ def get_network(): # ----------------------------------------- # We use an testing linear graph splitting function as a example. User also can create their # own splitting function logic. +import inspect import os -test_path = os.path.join(os.path.dirname(__file__), "../../../tests/python/relay") -os.sys.path.append(test_path) +test_path = os.path.dirname(inspect.getfile(lambda: None)) +os.sys.path.append(os.path.join(test_path, "../../../tests/python/relay")) from test_pipeline_executor import graph_split ########################################### From 1b532586493714e9647db00974be63863c69ad2f Mon Sep 17 00:00:00 2001 From: huajsj Date: Wed, 8 Jun 2022 17:46:36 -0700 Subject: [PATCH 11/39] fix byoc with dnnl issue --- .../using_with_pipeline_executor.py | 211 ++++++++++-------- 1 file changed, 112 insertions(+), 99 deletions(-) diff --git a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py index 93d2d6aa7761..bb1467c8e0e7 100755 --- a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py @@ -93,6 +93,7 @@ def @main(%data: Tensor[(1, 3, 224, 224), float32]) { nn.relu(%2) /* ty=Tensor[(1, 16, 224, 224), float32] */ } +peline-tutorial #subgraphs[1] @@ -101,102 +102,114 @@ def @main(%data_n_0: Tensor[(1, 16, 224, 224), float32]) { } """ -######################################### -# Create subgraph pipeline configuration. -# --------------------------------------- -# In build/config.cmake set USE_PIPELINE_EXECUTOR as ON to enable pipeline executor. -from tvm.contrib import graph_executor, pipeline_executor, pipeline_executor_build - -######################################### -# Create subgraph pipeline configuration. -# Associate the subgraph module with a target. -# Using BYOC to set the codegen of the second subgraph module. -# To use dnnl the 'USE_DNNL_CODEGEN' should set as ON in config.cmake and installing MKL-DNN. -mod0, mod1 = subgraphs[0], subgraphs[1] -mod0 = relay.transform.AnnotateTarget(["dnnl"])(mod0) -mod0 = relay.transform.MergeCompilerRegions()(mod0) -mod0 = relay.transform.PartitionGraph()(mod0) -################################################# -# Get the pipeline executor configuration object. -pipe_config = pipeline_executor_build.PipelineConfig() -########################################################################### -# Set the compile target of the second subgraph module for example as LLVM. -pipe_config[mod0].target = "llvm" -pipe_config[mod0].dev = tvm.cpu(0) -############################################################################### -# Set the cpu afinity for control flow, for example using cpu 0 for control flow. -pipe_config[mod1].cpu_affinity = "0" -############################################################## -# Set the compile target of the second subgraph module as LLVM. -pipe_config[mod1].target = "llvm" -pipe_config[mod1].dev = tvm.cpu(0) -################################################################################# -# Set the cpu afinity for control flow, for example using cpu 1 for control flow. -pipe_config[mod1].cpu_affinity = "1" -pipe_config["input"]["data"].connect(pipe_config[mod0]["input"]["data"]) -pipe_config[mod0]["output"][0].connect(pipe_config[mod1]["input"]["data_n_0"]) -pipe_config[mod1]["output"]["0"].connect(pipe_config["output"][0]) -###################################### -# The pipeline configuration as below. -""" -print(pipe_config) - Inputs - |data: mod0:data - - output - |output(0) : mod1.output(0) - - connections - |mod0.output(0)-> mod1.data_n_0 -""" -############################## -# Build the pipeline executor. -# ---------------------------- -with tvm.transform.PassContext(opt_level=3): - pipeline_mod_factory = pipeline_executor_build.build(pipe_config) -# Export the parameter configuration to a file. -directory_path = tvm.contrib.utils.tempdir().temp_dir -############################################# -# If the directory does not exist, create it. -if not os.path.exists(directory_path): - os.makedirs(directory_path) -config_file_name = pipeline_mod_factory.export_library(directory_path) -################################################################ -# Use the load function to create and initialize PipelineModule. -# -------------------------------------------------------------- -pipeline_module = pipeline_executor.PipelineModule.load_library(config_file_name) - - -############################ -# Run the pipeline executor. -# -------------------------- -# Allocated a input data. -data = np.random.uniform(-1, 1, size=data_shape).astype("float32") -pipeline_module.set_input("data", tvm.nd.array(data)) -########################################################################## -# Run the two subgraph in pipeline mode and get the output asynchronously. -pipeline_module.run() -outputs = [] -while not outputs: - outputs = pipeline_module.get_output() - time.sleep(0.001) -###################################### -# Use graph_executor for verification. -# ------------------------------------ -# Run these two subgraphs in sequence with graph_executor to get the output. -target = "llvm" -dev = tvm.device(target, 0) -lib0 = relay.build_module.build(mod0, target, params=params) -lib1 = relay.build_module.build(mod1, target, params=params) -module0 = runtime.GraphModule(lib0["default"](dev)) -module1 = runtime.GraphModule(lib1["default"](dev)) -module0.set_input("data", data) -module0.run() -out_shape = (1, 16, 224, 224) -out = module0.get_output(0, tvm.nd.empty(out_shape)) -module1.set_input("data_n_0", out) -module1.run() -out = module1.get_output(0, tvm.nd.empty(out_shape)) -#################### -# Verify the result. -tvm.testing.assert_allclose(outputs[0].numpy(), out.numpy()) +########################################################### +# Run the two subgraphs in pipeline with pipeline executor. +# --------------------------------------------------------- +# Define a function to do all the codegen and pipeline executor works. +def run_pipeline_executor(): + ######################################### + # Create subgraph pipeline configuration. + # --------------------------------------- + # In build/config.cmake set USE_PIPELINE_EXECUTOR as ON to enable pipeline executor. + from tvm.contrib import graph_executor, pipeline_executor, pipeline_executor_build + + ######################################### + # Create subgraph pipeline configuration. + # Associate the subgraph module with a target. + # Using BYOC to set the codegen of the second subgraph module. + # To use dnnl the 'USE_DNNL_CODEGEN' should set as ON in config.cmake and installing MKL-DNN. + mod0, mod1 = subgraphs[0], subgraphs[1] + mod0 = relay.transform.AnnotateTarget(["dnnl"])(mod0) + mod0 = relay.transform.MergeCompilerRegions()(mod0) + mod0 = relay.transform.PartitionGraph()(mod0) + ################################################# + # Get the pipeline executor configuration object. + pipe_config = pipeline_executor_build.PipelineConfig() + ########################################################################### + # Set the compile target of the second subgraph module for example as LLVM. + pipe_config[mod0].target = "llvm" + pipe_config[mod0].dev = tvm.cpu(0) + ############################################################################### + # Set the cpu afinity for control flow, for example using cpu 0 for control flow. + pipe_config[mod1].cpu_affinity = "0" + ############################################################## + # Set the compile target of the second subgraph module as LLVM. + pipe_config[mod1].target = "llvm" + pipe_config[mod1].dev = tvm.cpu(0) + ################################################################################# + # Set the cpu afinity for control flow, for example using cpu 1 for control flow. + pipe_config[mod1].cpu_affinity = "1" + pipe_config["input"]["data"].connect(pipe_config[mod0]["input"]["data"]) + pipe_config[mod0]["output"][0].connect(pipe_config[mod1]["input"]["data_n_0"]) + pipe_config[mod1]["output"]["0"].connect(pipe_config["output"][0]) + ###################################### + # The pipeline configuration as below. + """ + print(pipe_config) + Inputs + |data: mod0:data + + output + |output(0) : mod1.output(0) + + connections + |mod0.output(0)-> mod1.data_n_0 + """ + ############################## + # Build the pipeline executor. + # ---------------------------- + with tvm.transform.PassContext(opt_level=3): + pipeline_mod_factory = pipeline_executor_build.build(pipe_config) + ############################################### + # Export the parameter configuration to a file. + directory_path = tvm.contrib.utils.tempdir().temp_dir + ############################################# + # If the directory does not exist, create it. + if not os.path.exists(directory_path): + os.makedirs(directory_path) + config_file_name = pipeline_mod_factory.export_library(directory_path) + ################################################################ + # Use the load function to create and initialize PipelineModule. + # -------------------------------------------------------------- + pipeline_module = pipeline_executor.PipelineModule.load_library(config_file_name) + + ############################ + # Run the pipeline executor. + # -------------------------- + # Allocated a input data. + data = np.random.uniform(-1, 1, size=data_shape).astype("float32") + pipeline_module.set_input("data", tvm.nd.array(data)) + ########################################################################## + # Run the two subgraph in pipeline mode and get the output asynchronously. + pipeline_module.run() + outputs = [] + while not outputs: + outputs = pipeline_module.get_output() + time.sleep(0.001) + ###################################### + # Use graph_executor for verification. + # ------------------------------------ + # Run these two subgraphs in sequence with graph_executor to get the output. + target = "llvm" + dev = tvm.device(target, 0) + lib0 = relay.build_module.build(mod0, target, params=params) + lib1 = relay.build_module.build(mod1, target, params=params) + module0 = runtime.GraphModule(lib0["default"](dev)) + module1 = runtime.GraphModule(lib1["default"](dev)) + module0.set_input("data", data) + module0.run() + out_shape = (1, 16, 224, 224) + out = module0.get_output(0, tvm.nd.empty(out_shape)) + module1.set_input("data_n_0", out) + module1.run() + out = module1.get_output(0, tvm.nd.empty(out_shape)) + #################### + # Verify the result. + tvm.testing.assert_allclose(outputs[0].numpy(), out.numpy()) + + +################################################################################## +# To run pipeline executor with dnnl, USE_PIPELINE_EXECUTOR need to get set as ON. +# and the 'USE_DNNL_CODEGEN' should set as ON in config.cmake and installing MKL-DNN. + +# run_pipeline_executor() From 7757b1bcb2c39fad56d5db7ad5fe14f419153e90 Mon Sep 17 00:00:00 2001 From: huajsj Date: Thu, 9 Jun 2022 13:44:18 -0700 Subject: [PATCH 12/39] enable dnnl and pipeline executor --- gallery/how_to/work_with_relay/using_with_pipeline_executor.py | 2 +- tests/scripts/task_config_build_gpu.sh | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py index bb1467c8e0e7..380cf79c42bf 100755 --- a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py @@ -212,4 +212,4 @@ def run_pipeline_executor(): # To run pipeline executor with dnnl, USE_PIPELINE_EXECUTOR need to get set as ON. # and the 'USE_DNNL_CODEGEN' should set as ON in config.cmake and installing MKL-DNN. -# run_pipeline_executor() +run_pipeline_executor() diff --git a/tests/scripts/task_config_build_gpu.sh b/tests/scripts/task_config_build_gpu.sh index 9a71983886dd..8d0ecc5e3c21 100755 --- a/tests/scripts/task_config_build_gpu.sh +++ b/tests/scripts/task_config_build_gpu.sh @@ -47,3 +47,5 @@ echo set\(USE_LIBBACKTRACE AUTO\) >> config.cmake echo set\(USE_CCACHE OFF\) >> config.cmake echo set\(SUMMARIZE ON\) >> config.cmake echo set\(HIDE_PRIVATE_SYMBOLS ON\) >> config.cmake +echo set\(USE_PIPELINE_EXECUTOR ON\) >> config.cmake +echo set\(USE_DNNL_CODEGEN ON\) >> config.cmake From 15db48a98226ddb8099dda4df22b5c90132daa10 Mon Sep 17 00:00:00 2001 From: huajsj Date: Thu, 9 Jun 2022 14:48:05 -0700 Subject: [PATCH 13/39] trigger build --- gallery/how_to/work_with_relay/using_with_pipeline_executor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py index 380cf79c42bf..a4e5c1a425a9 100755 --- a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py @@ -19,7 +19,7 @@ ================================= **Author**: `Hua Jiang `_ -This is a short tutorial on how to use the Pipeline Executor with Relay. +This is a short tutorial on how to use "Pipeline Executor" with Relay. """ import tvm from tvm import te From 3b02c9ab84b5ec36181db4dfad4170474921df27 Mon Sep 17 00:00:00 2001 From: huajsj Date: Thu, 9 Jun 2022 17:17:28 -0700 Subject: [PATCH 14/39] trigger build --- gallery/how_to/work_with_relay/using_with_pipeline_executor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py index a4e5c1a425a9..eaec39de1031 100755 --- a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py @@ -19,7 +19,7 @@ ================================= **Author**: `Hua Jiang `_ -This is a short tutorial on how to use "Pipeline Executor" with Relay. +This is a short tutorial on how to use the "Pipeline Executor" with Relay. """ import tvm from tvm import te From 0811b24dde7319171c8f96cfe6f33cd425dadecb Mon Sep 17 00:00:00 2001 From: huajsj Date: Tue, 21 Jun 2022 13:59:45 -0700 Subject: [PATCH 15/39] fix build issue --- .../using_with_pipeline_executor.py | 198 +++++++++--------- tests/scripts/task_config_build_gpu.sh | 2 +- 2 files changed, 95 insertions(+), 105 deletions(-) diff --git a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py index eaec39de1031..d829f2030a06 100755 --- a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py @@ -106,110 +106,100 @@ def @main(%data_n_0: Tensor[(1, 16, 224, 224), float32]) { # Run the two subgraphs in pipeline with pipeline executor. # --------------------------------------------------------- # Define a function to do all the codegen and pipeline executor works. -def run_pipeline_executor(): - ######################################### - # Create subgraph pipeline configuration. - # --------------------------------------- - # In build/config.cmake set USE_PIPELINE_EXECUTOR as ON to enable pipeline executor. - from tvm.contrib import graph_executor, pipeline_executor, pipeline_executor_build - - ######################################### - # Create subgraph pipeline configuration. - # Associate the subgraph module with a target. - # Using BYOC to set the codegen of the second subgraph module. - # To use dnnl the 'USE_DNNL_CODEGEN' should set as ON in config.cmake and installing MKL-DNN. - mod0, mod1 = subgraphs[0], subgraphs[1] - mod0 = relay.transform.AnnotateTarget(["dnnl"])(mod0) - mod0 = relay.transform.MergeCompilerRegions()(mod0) - mod0 = relay.transform.PartitionGraph()(mod0) - ################################################# - # Get the pipeline executor configuration object. - pipe_config = pipeline_executor_build.PipelineConfig() - ########################################################################### - # Set the compile target of the second subgraph module for example as LLVM. - pipe_config[mod0].target = "llvm" - pipe_config[mod0].dev = tvm.cpu(0) - ############################################################################### - # Set the cpu afinity for control flow, for example using cpu 0 for control flow. - pipe_config[mod1].cpu_affinity = "0" - ############################################################## - # Set the compile target of the second subgraph module as LLVM. - pipe_config[mod1].target = "llvm" - pipe_config[mod1].dev = tvm.cpu(0) - ################################################################################# - # Set the cpu afinity for control flow, for example using cpu 1 for control flow. - pipe_config[mod1].cpu_affinity = "1" - pipe_config["input"]["data"].connect(pipe_config[mod0]["input"]["data"]) - pipe_config[mod0]["output"][0].connect(pipe_config[mod1]["input"]["data_n_0"]) - pipe_config[mod1]["output"]["0"].connect(pipe_config["output"][0]) - ###################################### - # The pipeline configuration as below. - """ - print(pipe_config) - Inputs - |data: mod0:data - - output - |output(0) : mod1.output(0) - - connections - |mod0.output(0)-> mod1.data_n_0 - """ - ############################## - # Build the pipeline executor. - # ---------------------------- - with tvm.transform.PassContext(opt_level=3): - pipeline_mod_factory = pipeline_executor_build.build(pipe_config) - ############################################### - # Export the parameter configuration to a file. - directory_path = tvm.contrib.utils.tempdir().temp_dir - ############################################# - # If the directory does not exist, create it. - if not os.path.exists(directory_path): - os.makedirs(directory_path) - config_file_name = pipeline_mod_factory.export_library(directory_path) - ################################################################ - # Use the load function to create and initialize PipelineModule. - # -------------------------------------------------------------- - pipeline_module = pipeline_executor.PipelineModule.load_library(config_file_name) - - ############################ - # Run the pipeline executor. - # -------------------------- - # Allocated a input data. - data = np.random.uniform(-1, 1, size=data_shape).astype("float32") - pipeline_module.set_input("data", tvm.nd.array(data)) - ########################################################################## - # Run the two subgraph in pipeline mode and get the output asynchronously. - pipeline_module.run() - outputs = [] - while not outputs: - outputs = pipeline_module.get_output() - time.sleep(0.001) - ###################################### - # Use graph_executor for verification. - # ------------------------------------ - # Run these two subgraphs in sequence with graph_executor to get the output. - target = "llvm" - dev = tvm.device(target, 0) - lib0 = relay.build_module.build(mod0, target, params=params) - lib1 = relay.build_module.build(mod1, target, params=params) - module0 = runtime.GraphModule(lib0["default"](dev)) - module1 = runtime.GraphModule(lib1["default"](dev)) - module0.set_input("data", data) - module0.run() - out_shape = (1, 16, 224, 224) - out = module0.get_output(0, tvm.nd.empty(out_shape)) - module1.set_input("data_n_0", out) - module1.run() - out = module1.get_output(0, tvm.nd.empty(out_shape)) - #################### - # Verify the result. - tvm.testing.assert_allclose(outputs[0].numpy(), out.numpy()) - - -################################################################################## # To run pipeline executor with dnnl, USE_PIPELINE_EXECUTOR need to get set as ON. # and the 'USE_DNNL_CODEGEN' should set as ON in config.cmake and installing MKL-DNN. +from tvm.contrib import graph_executor, pipeline_executor, pipeline_executor_build + +######################################### +# Create subgraph pipeline configuration. +# Associate the subgraph module with a target. +# Using BYOC to set the codegen of the second subgraph module. +# To use dnnl the 'USE_DNNL_CODEGEN' should set as ON in config.cmake and installing MKL-DNN. +mod0, mod1 = subgraphs[0], subgraphs[1] +mod0 = relay.transform.AnnotateTarget(["dnnl"])(mod0) +mod0 = relay.transform.MergeCompilerRegions()(mod0) +mod0 = relay.transform.PartitionGraph()(mod0) +################################################# +# Get the pipeline executor configuration object. +pipe_config = pipeline_executor_build.PipelineConfig() +########################################################################### +# Set the compile target of the second subgraph module for example as LLVM. +pipe_config[mod0].target = "llvm" +pipe_config[mod0].dev = tvm.cpu(0) +############################################################################### +# Set the cpu afinity for control flow, for example using cpu 0 for control flow. +pipe_config[mod1].cpu_affinity = "0" +############################################################## +# Set the compile target of the second subgraph module as LLVM. +pipe_config[mod1].target = "llvm" +pipe_config[mod1].dev = tvm.cpu(0) +################################################################################# +# Set the cpu afinity for control flow, for example using cpu 1 for control flow. +pipe_config[mod1].cpu_affinity = "1" +pipe_config["input"]["data"].connect(pipe_config[mod0]["input"]["data"]) +pipe_config[mod0]["output"][0].connect(pipe_config[mod1]["input"]["data_n_0"]) +pipe_config[mod1]["output"]["0"].connect(pipe_config["output"][0]) +###################################### +# The pipeline configuration as below. +""" +print(pipe_config) + Inputs + |data: mod0:data + + output + |output(0) : mod1.output(0) -run_pipeline_executor() + connections + |mod0.output(0)-> mod1.data_n_0 +""" +############################## +# Build the pipeline executor. +# ---------------------------- +with tvm.transform.PassContext(opt_level=3): + pipeline_mod_factory = pipeline_executor_build.build(pipe_config) +############################################### +# Export the parameter configuration to a file. +directory_path = tvm.contrib.utils.tempdir().temp_dir +############################################# +# If the directory does not exist, create it. +if not os.path.exists(directory_path): + os.makedirs(directory_path) +config_file_name = pipeline_mod_factory.export_library(directory_path) +################################################################ +# Use the load function to create and initialize PipelineModule. +# -------------------------------------------------------------- +pipeline_module = pipeline_executor.PipelineModule.load_library(config_file_name) + +############################ +# Run the pipeline executor. +# -------------------------- +# Allocated a input data. +data = np.random.uniform(-1, 1, size=data_shape).astype("float32") +pipeline_module.set_input("data", tvm.nd.array(data)) +########################################################################## +# Run the two subgraph in pipeline mode and get the output asynchronously. +pipeline_module.run() +outputs = [] +while not outputs: + outputs = pipeline_module.get_output() + time.sleep(0.001) +###################################### +# Use graph_executor for verification. +# ------------------------------------ +# Run these two subgraphs in sequence with graph_executor to get the output. +target = "llvm" +dev = tvm.device(target, 0) +lib0 = relay.build_module.build(mod0, target, params=params) +lib1 = relay.build_module.build(mod1, target, params=params) +module0 = runtime.GraphModule(lib0["default"](dev)) +module1 = runtime.GraphModule(lib1["default"](dev)) +module0.set_input("data", data) +module0.run() +out_shape = (1, 16, 224, 224) +out = module0.get_output(0, tvm.nd.empty(out_shape)) +module1.set_input("data_n_0", out) +module1.run() +out = module1.get_output(0, tvm.nd.empty(out_shape)) +#################### +# Verify the result. +tvm.testing.assert_allclose(outputs[0].numpy(), out.numpy()) diff --git a/tests/scripts/task_config_build_gpu.sh b/tests/scripts/task_config_build_gpu.sh index 8d0ecc5e3c21..9357fa6c8078 100755 --- a/tests/scripts/task_config_build_gpu.sh +++ b/tests/scripts/task_config_build_gpu.sh @@ -48,4 +48,4 @@ echo set\(USE_CCACHE OFF\) >> config.cmake echo set\(SUMMARIZE ON\) >> config.cmake echo set\(HIDE_PRIVATE_SYMBOLS ON\) >> config.cmake echo set\(USE_PIPELINE_EXECUTOR ON\) >> config.cmake -echo set\(USE_DNNL_CODEGEN ON\) >> config.cmake +echo set\(USE_DNNL ON\) >> config.cmake From 53894ec1b5c933261b8468f51d98021cf64601bf Mon Sep 17 00:00:00 2001 From: huajsj Date: Wed, 22 Jun 2022 00:19:25 -0700 Subject: [PATCH 16/39] trigger build --- gallery/how_to/work_with_relay/using_with_pipeline_executor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py index d829f2030a06..364bc50a39d9 100755 --- a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py @@ -19,7 +19,7 @@ ================================= **Author**: `Hua Jiang `_ -This is a short tutorial on how to use the "Pipeline Executor" with Relay. +This is a short tutorial on how to use "Pipeline Executor" with Relay. """ import tvm from tvm import te From fb4f8215f6a519b49b216fb439d2f13ac42b07e0 Mon Sep 17 00:00:00 2001 From: huajsj Date: Mon, 4 Jul 2022 22:31:05 -0700 Subject: [PATCH 17/39] oneflow cause crash, do test with change --- gallery/how_to/compile_models/from_oneflow.py | 126 ------------------ 1 file changed, 126 deletions(-) diff --git a/gallery/how_to/compile_models/from_oneflow.py b/gallery/how_to/compile_models/from_oneflow.py index eb27c4b3e34b..a80bd562eb49 100644 --- a/gallery/how_to/compile_models/from_oneflow.py +++ b/gallery/how_to/compile_models/from_oneflow.py @@ -55,129 +55,3 @@ from tvm import relay from tvm.contrib.download import download_testdata -###################################################################### -# Load a pretrained OneFlow model and save model -# ---------------------------------------------- -model_name = "resnet18" -model = getattr(flowvision.models, model_name)(pretrained=True) -model = model.eval() - -model_dir = "resnet18_model" -if not os.path.exists(model_dir): - flow.save(model.state_dict(), model_dir) - -###################################################################### -# Load a test image -# ----------------- -# Classic cat example! -from PIL import Image - -img_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true" -img_path = download_testdata(img_url, "cat.png", module="data") -img = Image.open(img_path).resize((224, 224)) - -# Preprocess the image and convert to tensor -from flowvision import transforms - -my_preprocess = transforms.Compose( - [ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] -) -img = my_preprocess(img) -img = np.expand_dims(img.numpy(), 0) - -###################################################################### -# Import the graph to Relay -# ------------------------- -# Convert OneFlow graph to Relay graph. The input name can be arbitrary. -class Graph(flow.nn.Graph): - def __init__(self, module): - super().__init__() - self.m = module - - def build(self, x): - out = self.m(x) - return out - - -graph = Graph(model) -_ = graph._compile(flow.randn(1, 3, 224, 224)) - -mod, params = relay.frontend.from_oneflow(graph, model_dir) - -###################################################################### -# Relay Build -# ----------- -# Compile the graph to llvm target with given input specification. -target = tvm.target.Target("llvm", host="llvm") -dev = tvm.cpu(0) -with tvm.transform.PassContext(opt_level=3): - lib = relay.build(mod, target=target, params=params) - -###################################################################### -# Execute the portable graph on TVM -# --------------------------------- -# Now we can try deploying the compiled model on target. -target = "cuda" -with tvm.transform.PassContext(opt_level=10): - intrp = relay.build_module.create_executor("graph", mod, tvm.cuda(0), target) - -print(type(img)) -print(img.shape) -tvm_output = intrp.evaluate()(tvm.nd.array(img.astype("float32")), **params) - -##################################################################### -# Look up synset name -# ------------------- -# Look up prediction top 1 index in 1000 class synset. -synset_url = "".join( - [ - "https://raw.githubusercontent.com/Cadene/", - "pretrained-models.pytorch/master/data/", - "imagenet_synsets.txt", - ] -) -synset_name = "imagenet_synsets.txt" -synset_path = download_testdata(synset_url, synset_name, module="data") -with open(synset_path) as f: - synsets = f.readlines() - -synsets = [x.strip() for x in synsets] -splits = [line.split(" ") for line in synsets] -key_to_classname = {spl[0]: " ".join(spl[1:]) for spl in splits} - -class_url = "".join( - [ - "https://raw.githubusercontent.com/Cadene/", - "pretrained-models.pytorch/master/data/", - "imagenet_classes.txt", - ] -) -class_name = "imagenet_classes.txt" -class_path = download_testdata(class_url, class_name, module="data") -with open(class_path) as f: - class_id_to_key = f.readlines() - -class_id_to_key = [x.strip() for x in class_id_to_key] - -# Get top-1 result for TVM -top1_tvm = np.argmax(tvm_output.numpy()[0]) -tvm_class_key = class_id_to_key[top1_tvm] - -# Convert input to OneFlow variable and get OneFlow result for comparison -with flow.no_grad(): - torch_img = flow.from_numpy(img) - output = model(torch_img) - - # Get top-1 result for OneFlow - top_oneflow = np.argmax(output.numpy()) - oneflow_class_key = class_id_to_key[top_oneflow] - -print("Relay top-1 id: {}, class name: {}".format(top1_tvm, key_to_classname[tvm_class_key])) -print( - "OneFlow top-1 id: {}, class name: {}".format(top_oneflow, key_to_classname[oneflow_class_key]) -) From e2597985745e687ad9c302dcfcdca12275404957 Mon Sep 17 00:00:00 2001 From: huajsj Date: Mon, 4 Jul 2022 22:53:28 -0700 Subject: [PATCH 18/39] add sphinx skip --- .../how_to/work_with_relay/using_with_pipeline_executor.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py index 364bc50a39d9..47dc67dfbf9d 100755 --- a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py @@ -152,6 +152,12 @@ def @main(%data_n_0: Tensor[(1, 16, 224, 224), float32]) { connections |mod0.output(0)-> mod1.data_n_0 """ + +# sphinx_gallery_start_ignore +from tvm import testing + +testing.utils.install_request_hook(depth=3) +# sphinx_gallery_end_ignore ############################## # Build the pipeline executor. # ---------------------------- From b70a731463dd3675c574d758191eb638dbd7d00a Mon Sep 17 00:00:00 2001 From: huajsj Date: Mon, 4 Jul 2022 23:03:18 -0700 Subject: [PATCH 19/39] plint --- gallery/how_to/compile_models/from_oneflow.py | 1 - 1 file changed, 1 deletion(-) diff --git a/gallery/how_to/compile_models/from_oneflow.py b/gallery/how_to/compile_models/from_oneflow.py index a80bd562eb49..2a4d0815acc0 100644 --- a/gallery/how_to/compile_models/from_oneflow.py +++ b/gallery/how_to/compile_models/from_oneflow.py @@ -54,4 +54,3 @@ import tvm from tvm import relay from tvm.contrib.download import download_testdata - From 215a2bdc967289387347d477e3b85b836b3e98ac Mon Sep 17 00:00:00 2001 From: huajsj Date: Tue, 5 Jul 2022 13:30:31 -0700 Subject: [PATCH 20/39] remove from_oneflow change test. --- gallery/how_to/compile_models/from_oneflow.py | 127 ++++++++++++++++++ 1 file changed, 127 insertions(+) diff --git a/gallery/how_to/compile_models/from_oneflow.py b/gallery/how_to/compile_models/from_oneflow.py index 2a4d0815acc0..eb27c4b3e34b 100644 --- a/gallery/how_to/compile_models/from_oneflow.py +++ b/gallery/how_to/compile_models/from_oneflow.py @@ -54,3 +54,130 @@ import tvm from tvm import relay from tvm.contrib.download import download_testdata + +###################################################################### +# Load a pretrained OneFlow model and save model +# ---------------------------------------------- +model_name = "resnet18" +model = getattr(flowvision.models, model_name)(pretrained=True) +model = model.eval() + +model_dir = "resnet18_model" +if not os.path.exists(model_dir): + flow.save(model.state_dict(), model_dir) + +###################################################################### +# Load a test image +# ----------------- +# Classic cat example! +from PIL import Image + +img_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true" +img_path = download_testdata(img_url, "cat.png", module="data") +img = Image.open(img_path).resize((224, 224)) + +# Preprocess the image and convert to tensor +from flowvision import transforms + +my_preprocess = transforms.Compose( + [ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] +) +img = my_preprocess(img) +img = np.expand_dims(img.numpy(), 0) + +###################################################################### +# Import the graph to Relay +# ------------------------- +# Convert OneFlow graph to Relay graph. The input name can be arbitrary. +class Graph(flow.nn.Graph): + def __init__(self, module): + super().__init__() + self.m = module + + def build(self, x): + out = self.m(x) + return out + + +graph = Graph(model) +_ = graph._compile(flow.randn(1, 3, 224, 224)) + +mod, params = relay.frontend.from_oneflow(graph, model_dir) + +###################################################################### +# Relay Build +# ----------- +# Compile the graph to llvm target with given input specification. +target = tvm.target.Target("llvm", host="llvm") +dev = tvm.cpu(0) +with tvm.transform.PassContext(opt_level=3): + lib = relay.build(mod, target=target, params=params) + +###################################################################### +# Execute the portable graph on TVM +# --------------------------------- +# Now we can try deploying the compiled model on target. +target = "cuda" +with tvm.transform.PassContext(opt_level=10): + intrp = relay.build_module.create_executor("graph", mod, tvm.cuda(0), target) + +print(type(img)) +print(img.shape) +tvm_output = intrp.evaluate()(tvm.nd.array(img.astype("float32")), **params) + +##################################################################### +# Look up synset name +# ------------------- +# Look up prediction top 1 index in 1000 class synset. +synset_url = "".join( + [ + "https://raw.githubusercontent.com/Cadene/", + "pretrained-models.pytorch/master/data/", + "imagenet_synsets.txt", + ] +) +synset_name = "imagenet_synsets.txt" +synset_path = download_testdata(synset_url, synset_name, module="data") +with open(synset_path) as f: + synsets = f.readlines() + +synsets = [x.strip() for x in synsets] +splits = [line.split(" ") for line in synsets] +key_to_classname = {spl[0]: " ".join(spl[1:]) for spl in splits} + +class_url = "".join( + [ + "https://raw.githubusercontent.com/Cadene/", + "pretrained-models.pytorch/master/data/", + "imagenet_classes.txt", + ] +) +class_name = "imagenet_classes.txt" +class_path = download_testdata(class_url, class_name, module="data") +with open(class_path) as f: + class_id_to_key = f.readlines() + +class_id_to_key = [x.strip() for x in class_id_to_key] + +# Get top-1 result for TVM +top1_tvm = np.argmax(tvm_output.numpy()[0]) +tvm_class_key = class_id_to_key[top1_tvm] + +# Convert input to OneFlow variable and get OneFlow result for comparison +with flow.no_grad(): + torch_img = flow.from_numpy(img) + output = model(torch_img) + + # Get top-1 result for OneFlow + top_oneflow = np.argmax(output.numpy()) + oneflow_class_key = class_id_to_key[top_oneflow] + +print("Relay top-1 id: {}, class name: {}".format(top1_tvm, key_to_classname[tvm_class_key])) +print( + "OneFlow top-1 id: {}, class name: {}".format(top_oneflow, key_to_classname[oneflow_class_key]) +) From bc6e863d2711f90014b4d5e7f837881bbdfabf1e Mon Sep 17 00:00:00 2001 From: huajsj Date: Tue, 5 Jul 2022 13:37:22 -0700 Subject: [PATCH 21/39] remove pipeline executor change for test --- .../using_with_pipeline_executor.py | 203 +++++++++--------- tests/scripts/task_config_build_gpu.sh | 4 +- 2 files changed, 104 insertions(+), 103 deletions(-) diff --git a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py index 47dc67dfbf9d..4b5cba3702a5 100755 --- a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py @@ -108,104 +108,105 @@ def @main(%data_n_0: Tensor[(1, 16, 224, 224), float32]) { # Define a function to do all the codegen and pipeline executor works. # To run pipeline executor with dnnl, USE_PIPELINE_EXECUTOR need to get set as ON. # and the 'USE_DNNL_CODEGEN' should set as ON in config.cmake and installing MKL-DNN. -from tvm.contrib import graph_executor, pipeline_executor, pipeline_executor_build - -######################################### -# Create subgraph pipeline configuration. -# Associate the subgraph module with a target. -# Using BYOC to set the codegen of the second subgraph module. -# To use dnnl the 'USE_DNNL_CODEGEN' should set as ON in config.cmake and installing MKL-DNN. -mod0, mod1 = subgraphs[0], subgraphs[1] -mod0 = relay.transform.AnnotateTarget(["dnnl"])(mod0) -mod0 = relay.transform.MergeCompilerRegions()(mod0) -mod0 = relay.transform.PartitionGraph()(mod0) -################################################# -# Get the pipeline executor configuration object. -pipe_config = pipeline_executor_build.PipelineConfig() -########################################################################### -# Set the compile target of the second subgraph module for example as LLVM. -pipe_config[mod0].target = "llvm" -pipe_config[mod0].dev = tvm.cpu(0) -############################################################################### -# Set the cpu afinity for control flow, for example using cpu 0 for control flow. -pipe_config[mod1].cpu_affinity = "0" -############################################################## -# Set the compile target of the second subgraph module as LLVM. -pipe_config[mod1].target = "llvm" -pipe_config[mod1].dev = tvm.cpu(0) -################################################################################# -# Set the cpu afinity for control flow, for example using cpu 1 for control flow. -pipe_config[mod1].cpu_affinity = "1" -pipe_config["input"]["data"].connect(pipe_config[mod0]["input"]["data"]) -pipe_config[mod0]["output"][0].connect(pipe_config[mod1]["input"]["data_n_0"]) -pipe_config[mod1]["output"]["0"].connect(pipe_config["output"][0]) -###################################### -# The pipeline configuration as below. -""" -print(pipe_config) - Inputs - |data: mod0:data - - output - |output(0) : mod1.output(0) - - connections - |mod0.output(0)-> mod1.data_n_0 -""" - -# sphinx_gallery_start_ignore -from tvm import testing - -testing.utils.install_request_hook(depth=3) -# sphinx_gallery_end_ignore -############################## -# Build the pipeline executor. -# ---------------------------- -with tvm.transform.PassContext(opt_level=3): - pipeline_mod_factory = pipeline_executor_build.build(pipe_config) -############################################### -# Export the parameter configuration to a file. -directory_path = tvm.contrib.utils.tempdir().temp_dir -############################################# -# If the directory does not exist, create it. -if not os.path.exists(directory_path): - os.makedirs(directory_path) -config_file_name = pipeline_mod_factory.export_library(directory_path) -################################################################ -# Use the load function to create and initialize PipelineModule. -# -------------------------------------------------------------- -pipeline_module = pipeline_executor.PipelineModule.load_library(config_file_name) - -############################ -# Run the pipeline executor. -# -------------------------- -# Allocated a input data. -data = np.random.uniform(-1, 1, size=data_shape).astype("float32") -pipeline_module.set_input("data", tvm.nd.array(data)) -########################################################################## -# Run the two subgraph in pipeline mode and get the output asynchronously. -pipeline_module.run() -outputs = [] -while not outputs: - outputs = pipeline_module.get_output() - time.sleep(0.001) -###################################### -# Use graph_executor for verification. -# ------------------------------------ -# Run these two subgraphs in sequence with graph_executor to get the output. -target = "llvm" -dev = tvm.device(target, 0) -lib0 = relay.build_module.build(mod0, target, params=params) -lib1 = relay.build_module.build(mod1, target, params=params) -module0 = runtime.GraphModule(lib0["default"](dev)) -module1 = runtime.GraphModule(lib1["default"](dev)) -module0.set_input("data", data) -module0.run() -out_shape = (1, 16, 224, 224) -out = module0.get_output(0, tvm.nd.empty(out_shape)) -module1.set_input("data_n_0", out) -module1.run() -out = module1.get_output(0, tvm.nd.empty(out_shape)) -#################### -# Verify the result. -tvm.testing.assert_allclose(outputs[0].numpy(), out.numpy()) +def run_pipeline(): + from tvm.contrib import graph_executor, pipeline_executor, pipeline_executor_build + + ######################################### + # Create subgraph pipeline configuration. + # Associate the subgraph module with a target. + # Using BYOC to set the codegen of the second subgraph module. + # To use dnnl the 'USE_DNNL_CODEGEN' should set as ON in config.cmake and installing MKL-DNN. + mod0, mod1 = subgraphs[0], subgraphs[1] + mod0 = relay.transform.AnnotateTarget(["dnnl"])(mod0) + mod0 = relay.transform.MergeCompilerRegions()(mod0) + mod0 = relay.transform.PartitionGraph()(mod0) + ################################################# + # Get the pipeline executor configuration object. + pipe_config = pipeline_executor_build.PipelineConfig() + ########################################################################### + # Set the compile target of the second subgraph module for example as LLVM. + pipe_config[mod0].target = "llvm" + pipe_config[mod0].dev = tvm.cpu(0) + ############################################################################### + # Set the cpu afinity for control flow, for example using cpu 0 for control flow. + pipe_config[mod1].cpu_affinity = "0" + ############################################################## + # Set the compile target of the second subgraph module as LLVM. + pipe_config[mod1].target = "llvm" + pipe_config[mod1].dev = tvm.cpu(0) + ################################################################################# + # Set the cpu afinity for control flow, for example using cpu 1 for control flow. + pipe_config[mod1].cpu_affinity = "1" + pipe_config["input"]["data"].connect(pipe_config[mod0]["input"]["data"]) + pipe_config[mod0]["output"][0].connect(pipe_config[mod1]["input"]["data_n_0"]) + pipe_config[mod1]["output"]["0"].connect(pipe_config["output"][0]) + ###################################### + # The pipeline configuration as below. + """ + print(pipe_config) + Inputs + |data: mod0:data + + output + |output(0) : mod1.output(0) + + connections + |mod0.output(0)-> mod1.data_n_0 + """ + + # sphinx_gallery_start_ignore + from tvm import testing + + testing.utils.install_request_hook(depth=3) + # sphinx_gallery_end_ignore + ############################## + # Build the pipeline executor. + # ---------------------------- + with tvm.transform.PassContext(opt_level=3): + pipeline_mod_factory = pipeline_executor_build.build(pipe_config) + ############################################### + # Export the parameter configuration to a file. + directory_path = tvm.contrib.utils.tempdir().temp_dir + ############################################# + # If the directory does not exist, create it. + if not os.path.exists(directory_path): + os.makedirs(directory_path) + config_file_name = pipeline_mod_factory.export_library(directory_path) + ################################################################ + # Use the load function to create and initialize PipelineModule. + # -------------------------------------------------------------- + pipeline_module = pipeline_executor.PipelineModule.load_library(config_file_name) + + ############################ + # Run the pipeline executor. + # -------------------------- + # Allocated a input data. + data = np.random.uniform(-1, 1, size=data_shape).astype("float32") + pipeline_module.set_input("data", tvm.nd.array(data)) + ########################################################################## + # Run the two subgraph in pipeline mode and get the output asynchronously. + pipeline_module.run() + outputs = [] + while not outputs: + outputs = pipeline_module.get_output() + time.sleep(0.001) + ###################################### + # Use graph_executor for verification. + # ------------------------------------ + # Run these two subgraphs in sequence with graph_executor to get the output. + target = "llvm" + dev = tvm.device(target, 0) + lib0 = relay.build_module.build(mod0, target, params=params) + lib1 = relay.build_module.build(mod1, target, params=params) + module0 = runtime.GraphModule(lib0["default"](dev)) + module1 = runtime.GraphModule(lib1["default"](dev)) + module0.set_input("data", data) + module0.run() + out_shape = (1, 16, 224, 224) + out = module0.get_output(0, tvm.nd.empty(out_shape)) + module1.set_input("data_n_0", out) + module1.run() + out = module1.get_output(0, tvm.nd.empty(out_shape)) + #################### + # Verify the result. + tvm.testing.assert_allclose(outputs[0].numpy(), out.numpy()) diff --git a/tests/scripts/task_config_build_gpu.sh b/tests/scripts/task_config_build_gpu.sh index 9357fa6c8078..f45dd584c862 100755 --- a/tests/scripts/task_config_build_gpu.sh +++ b/tests/scripts/task_config_build_gpu.sh @@ -47,5 +47,5 @@ echo set\(USE_LIBBACKTRACE AUTO\) >> config.cmake echo set\(USE_CCACHE OFF\) >> config.cmake echo set\(SUMMARIZE ON\) >> config.cmake echo set\(HIDE_PRIVATE_SYMBOLS ON\) >> config.cmake -echo set\(USE_PIPELINE_EXECUTOR ON\) >> config.cmake -echo set\(USE_DNNL ON\) >> config.cmake +#echo set\(USE_PIPELINE_EXECUTOR ON\) >> config.cmake +#echo set\(USE_DNNL ON\) >> config.cmake From 77099740debd43a47fa3504f60776530a7b5e42d Mon Sep 17 00:00:00 2001 From: huajsj Date: Tue, 5 Jul 2022 14:44:33 -0700 Subject: [PATCH 22/39] plint --- .../how_to/work_with_relay/using_with_pipeline_executor.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py index 4b5cba3702a5..7a6d68423b9e 100755 --- a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py @@ -102,6 +102,12 @@ def @main(%data_n_0: Tensor[(1, 16, 224, 224), float32]) { } """ +# sphinx_gallery_start_ignore +from tvm import testing + +testing.utils.install_request_hook(depth=3) +# sphinx_gallery_end_ignore + ########################################################### # Run the two subgraphs in pipeline with pipeline executor. # --------------------------------------------------------- From 745ec3bbb21cef7ff5e30f358a9546662ef93352 Mon Sep 17 00:00:00 2001 From: huajsj Date: Tue, 5 Jul 2022 21:01:41 -0700 Subject: [PATCH 23/39] enable DNNL and pipeline --- tests/scripts/task_config_build_gpu.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/scripts/task_config_build_gpu.sh b/tests/scripts/task_config_build_gpu.sh index f45dd584c862..9357fa6c8078 100755 --- a/tests/scripts/task_config_build_gpu.sh +++ b/tests/scripts/task_config_build_gpu.sh @@ -47,5 +47,5 @@ echo set\(USE_LIBBACKTRACE AUTO\) >> config.cmake echo set\(USE_CCACHE OFF\) >> config.cmake echo set\(SUMMARIZE ON\) >> config.cmake echo set\(HIDE_PRIVATE_SYMBOLS ON\) >> config.cmake -#echo set\(USE_PIPELINE_EXECUTOR ON\) >> config.cmake -#echo set\(USE_DNNL ON\) >> config.cmake +echo set\(USE_PIPELINE_EXECUTOR ON\) >> config.cmake +echo set\(USE_DNNL ON\) >> config.cmake From e14d431ecaed10e7a7dd2d3c6bb3aaafceccf6e8 Mon Sep 17 00:00:00 2001 From: huajsj Date: Tue, 5 Jul 2022 23:35:19 -0700 Subject: [PATCH 24/39] disable DNNL --- tests/scripts/task_config_build_gpu.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/scripts/task_config_build_gpu.sh b/tests/scripts/task_config_build_gpu.sh index 9357fa6c8078..86e1ad3f7a61 100755 --- a/tests/scripts/task_config_build_gpu.sh +++ b/tests/scripts/task_config_build_gpu.sh @@ -48,4 +48,4 @@ echo set\(USE_CCACHE OFF\) >> config.cmake echo set\(SUMMARIZE ON\) >> config.cmake echo set\(HIDE_PRIVATE_SYMBOLS ON\) >> config.cmake echo set\(USE_PIPELINE_EXECUTOR ON\) >> config.cmake -echo set\(USE_DNNL ON\) >> config.cmake +#echo set\(USE_DNNL ON\) >> config.cmake From 6640dd6815e3c208ddab9bf22fda976e0f1a54e2 Mon Sep 17 00:00:00 2001 From: huajsj Date: Wed, 6 Jul 2022 13:15:38 -0700 Subject: [PATCH 25/39] enable DNNL without pipeline --- tests/scripts/task_config_build_gpu.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/scripts/task_config_build_gpu.sh b/tests/scripts/task_config_build_gpu.sh index 86e1ad3f7a61..4dd25735e0b8 100755 --- a/tests/scripts/task_config_build_gpu.sh +++ b/tests/scripts/task_config_build_gpu.sh @@ -47,5 +47,5 @@ echo set\(USE_LIBBACKTRACE AUTO\) >> config.cmake echo set\(USE_CCACHE OFF\) >> config.cmake echo set\(SUMMARIZE ON\) >> config.cmake echo set\(HIDE_PRIVATE_SYMBOLS ON\) >> config.cmake -echo set\(USE_PIPELINE_EXECUTOR ON\) >> config.cmake -#echo set\(USE_DNNL ON\) >> config.cmake +#echo set\(USE_PIPELINE_EXECUTOR ON\) >> config.cmake +echo set\(USE_DNNL ON\) >> config.cmake From f5b61fd73222b6e3e1f80ea5b15d9eccc023a997 Mon Sep 17 00:00:00 2001 From: huajsj Date: Mon, 11 Jul 2022 18:36:39 -0700 Subject: [PATCH 26/39] remove dnnl and add cutlass --- tests/scripts/task_config_build_gpu.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/scripts/task_config_build_gpu.sh b/tests/scripts/task_config_build_gpu.sh index 4dd25735e0b8..f79076e213cb 100755 --- a/tests/scripts/task_config_build_gpu.sh +++ b/tests/scripts/task_config_build_gpu.sh @@ -47,5 +47,5 @@ echo set\(USE_LIBBACKTRACE AUTO\) >> config.cmake echo set\(USE_CCACHE OFF\) >> config.cmake echo set\(SUMMARIZE ON\) >> config.cmake echo set\(HIDE_PRIVATE_SYMBOLS ON\) >> config.cmake -#echo set\(USE_PIPELINE_EXECUTOR ON\) >> config.cmake -echo set\(USE_DNNL ON\) >> config.cmake +echo set\(USE_PIPELINE_EXECUTOR ON\) >> config.cmake +echo set\(USE_CUTLASS ON\) >> config.cmake From 50a7eb907361783f3077edcfa70331c11dc18759 Mon Sep 17 00:00:00 2001 From: huajsj Date: Tue, 12 Jul 2022 00:21:00 -0700 Subject: [PATCH 27/39] use cutlass with byoc --- .../using_with_pipeline_executor.py | 55 +++++++++++-------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py index 7a6d68423b9e..f693b76126b6 100755 --- a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py @@ -25,11 +25,19 @@ from tvm import te import numpy as np from tvm.contrib import graph_executor as runtime +from tvm.relay.op.contrib.cutlass import partition_for_cutlass from tvm import relay from tvm.relay import testing import tvm.testing import time - +from tvm.contrib.cutlass import ( + has_cutlass, + num_cutlass_partitions, + finalize_modules, + finalize_modules_vm, +) + +img_size = 8 ####################################################################### # Create a simple network, this network can be a pre-trained model too. # --------------------------------------------------------------------- @@ -38,7 +46,10 @@ def get_network(): out_channels = 16 batch_size = 1 - data = relay.var("data", relay.TensorType((batch_size, 3, 224, 224), "float32")) + data = relay.var("data", relay.TensorType((batch_size, 3, img_size, img_size), "float32")) + dense_weight = relay.var( + "data", relay.TensorType((batch_size, 16 * img_size * img_size), "float32") + ) weight = relay.var("weight") second_weight = relay.var("second_weight") bn_gamma = relay.var("bn_gamma") @@ -50,15 +61,10 @@ def get_network(): ) simple_net = relay.nn.batch_norm(simple_net, bn_gamma, bn_beta, bn_mmean, bn_mvar)[0] simple_net = relay.nn.relu(simple_net) - simple_net = relay.nn.conv2d( - data=simple_net, - weight=second_weight, - kernel_size=(3, 3), - channels=out_channels, - padding=(1, 1), - ) + simple_net = relay.nn.batch_flatten(simple_net) + simple_net = relay.nn.dense(simple_net, dense_weight) simple_net = relay.Function(relay.analysis.free_vars(simple_net), simple_net) - data_shape = (batch_size, 3, 224, 224) + data_shape = (batch_size, 3, img_size, img_size) net, params = testing.create_workload(simple_net) return net, params, data_shape @@ -86,19 +92,19 @@ def get_network(): """ #subgraphs[0]) - def @main(%data: Tensor[(1, 3, 224, 224), float32]) { - %0 = nn.conv2d(%data, meta[relay.Constant][0] /* ty=Tensor[(16, 3, 3, 3), float32] */, padding=[1, 1, 1, 1], channels=16, kernel_size=[3, 3]) /* ty=Tensor[(1, 16, 224, 224), float32] */; - %1 = nn.batch_norm(%0, meta[relay.Constant][1] /* ty=Tensor[(16), float32] */, meta[relay.Constant][2] /* ty=Tensor[(16), float32]*/, meta[relay.Constant][3] /* ty=Tensor[(16), float32] */, meta[relay.Constant][4] /* ty=Tensor[(16), float32] */) /* ty=(Tensor[(1,16, 224, 224), float32], Tensor[(16), float32], Tensor[(16), float32]) */; + def @main(%data: Tensor[(1, 3, img_size, img_size), float32]) { + %0 = nn.conv2d(%data, meta[relay.Constant][0] /* ty=Tensor[(16, 3, 3, 3), float32] */, padding=[1, 1, 1, 1], channels=16, kernel_size=[3, 3]) /* ty=Tensor[(1, 16, img_size, img_size), float32] */; + %1 = nn.batch_norm(%0, meta[relay.Constant][1] /* ty=Tensor[(16), float32] */, meta[relay.Constant][2] /* ty=Tensor[(16), float32]*/, meta[relay.Constant][3] /* ty=Tensor[(16), float32] */, meta[relay.Constant][4] /* ty=Tensor[(16), float32] */) /* ty=(Tensor[(1,16, img_size, img_size), float32], Tensor[(16), float32], Tensor[(16), float32]) */; %2 = %1.0; - nn.relu(%2) /* ty=Tensor[(1, 16, 224, 224), float32] */ + nn.relu(%2) /* ty=Tensor[(1, 16, img_size, img_size), float32] */ } peline-tutorial #subgraphs[1] - def @main(%data_n_0: Tensor[(1, 16, 224, 224), float32]) { - nn.conv2d(%data_n_0, meta[relay.Constant][0] /* ty=Tensor[(16, 16, 3, 3), float32] */, padding=[1, 1, 1, 1], channels=16, kernel_size=[3, 3]) /* ty=Tensor[(1, 16, 224, 224), float32] */ + def @main(%data_n_0: Tensor[(1, 16, img_size, img_size), float32]) { + nn.conv2d(%data_n_0, meta[relay.Constant][0] /* ty=Tensor[(16, 16, 3, 3), float32] */, padding=[1, 1, 1, 1], channels=16, kernel_size=[3, 3]) /* ty=Tensor[(1, 16, img_size, img_size), float32] */ } """ @@ -123,9 +129,11 @@ def run_pipeline(): # Using BYOC to set the codegen of the second subgraph module. # To use dnnl the 'USE_DNNL_CODEGEN' should set as ON in config.cmake and installing MKL-DNN. mod0, mod1 = subgraphs[0], subgraphs[1] - mod0 = relay.transform.AnnotateTarget(["dnnl"])(mod0) - mod0 = relay.transform.MergeCompilerRegions()(mod0) - mod0 = relay.transform.PartitionGraph()(mod0) + # mod0 = relay.transform.AnnotateTarget(["dnnl"])(mod0) + # mod0 = relay.transform.AnnotateTarget(["cutlass"])(mod0) + # mod0 = relay.transform.MergeCompilerRegions()(mod0) + # mod0 = relay.transform.PartitionGraph()(mod0) + mod1 = partition_for_cutlass(mod1) ################################################# # Get the pipeline executor configuration object. pipe_config = pipeline_executor_build.PipelineConfig() @@ -138,8 +146,8 @@ def run_pipeline(): pipe_config[mod1].cpu_affinity = "0" ############################################################## # Set the compile target of the second subgraph module as LLVM. - pipe_config[mod1].target = "llvm" - pipe_config[mod1].dev = tvm.cpu(0) + pipe_config[mod1].target = "cuda" + pipe_config[mod1].dev = tvm.device("cuda", 0) ################################################################################# # Set the cpu afinity for control flow, for example using cpu 1 for control flow. pipe_config[mod1].cpu_affinity = "1" @@ -208,7 +216,7 @@ def run_pipeline(): module1 = runtime.GraphModule(lib1["default"](dev)) module0.set_input("data", data) module0.run() - out_shape = (1, 16, 224, 224) + out_shape = (1, 16, img_size, img_size) out = module0.get_output(0, tvm.nd.empty(out_shape)) module1.set_input("data_n_0", out) module1.run() @@ -216,3 +224,6 @@ def run_pipeline(): #################### # Verify the result. tvm.testing.assert_allclose(outputs[0].numpy(), out.numpy()) + + +run_pipeline() From 0b30034bac7334bdeb361b69f84fdcc002449019 Mon Sep 17 00:00:00 2001 From: huajsj Date: Sun, 17 Jul 2022 00:06:56 -0700 Subject: [PATCH 28/39] change into cutlass --- .../using_with_pipeline_executor.py | 96 ++++++++++++++----- python/tvm/contrib/pipeline_executor.py | 9 +- python/tvm/contrib/pipeline_executor_build.py | 14 ++- 3 files changed, 89 insertions(+), 30 deletions(-) diff --git a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py index f693b76126b6..f58edfc43021 100755 --- a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py @@ -46,9 +46,9 @@ def get_network(): out_channels = 16 batch_size = 1 - data = relay.var("data", relay.TensorType((batch_size, 3, img_size, img_size), "float32")) + data = relay.var("data", relay.TensorType((batch_size, 3, img_size, img_size), "float16")) dense_weight = relay.var( - "data", relay.TensorType((batch_size, 16 * img_size * img_size), "float32") + "dweight", relay.TensorType((batch_size, 16 * img_size * img_size), "float16") ) weight = relay.var("weight") second_weight = relay.var("second_weight") @@ -92,20 +92,22 @@ def get_network(): """ #subgraphs[0]) - def @main(%data: Tensor[(1, 3, img_size, img_size), float32]) { - %0 = nn.conv2d(%data, meta[relay.Constant][0] /* ty=Tensor[(16, 3, 3, 3), float32] */, padding=[1, 1, 1, 1], channels=16, kernel_size=[3, 3]) /* ty=Tensor[(1, 16, img_size, img_size), float32] */; - %1 = nn.batch_norm(%0, meta[relay.Constant][1] /* ty=Tensor[(16), float32] */, meta[relay.Constant][2] /* ty=Tensor[(16), float32]*/, meta[relay.Constant][3] /* ty=Tensor[(16), float32] */, meta[relay.Constant][4] /* ty=Tensor[(16), float32] */) /* ty=(Tensor[(1,16, img_size, img_size), float32], Tensor[(16), float32], Tensor[(16), float32]) */; + def @main(%data: Tensor[(1, 3, img_size, img_size), float16]) { + %0 = nn.conv2d(%data, meta[relay.Constant][0] /* ty=Tensor[(16, 3, 3, 3), float16] */, padding=[1, 1, 1, 1], channels=16, kernel_size=[3, 3]) /* ty=Tensor[(1, 16, img_size, img_size), float16] */; + %1 = nn.batch_norm(%0, meta[relay.Constant][1] /* ty=Tensor[(16), float16] */, meta[relay.Constant][2] /* ty=Tensor[(16), float16]*/, meta[relay.Constant][3] /* ty=Tensor[(16), float16] */, meta[relay.Constant][4] /* ty=Tensor[(16), float16] */) /* ty=(Tensor[(1,16, img_size, img_size), float16], Tensor[(16), float16], Tensor[(16), float16]) */; %2 = %1.0; - nn.relu(%2) /* ty=Tensor[(1, 16, img_size, img_size), float32] */ + nn.relu(%2) /* ty=Tensor[(1, 16, img_size, img_size), float16] */ } peline-tutorial #subgraphs[1] - def @main(%data_n_0: Tensor[(1, 16, img_size, img_size), float32]) { - nn.conv2d(%data_n_0, meta[relay.Constant][0] /* ty=Tensor[(16, 16, 3, 3), float32] */, padding=[1, 1, 1, 1], channels=16, kernel_size=[3, 3]) /* ty=Tensor[(1, 16, img_size, img_size), float32] */ + def @main(%data_n_0: Tensor[(1, 16, 8, 8), float16] /* ty=Tensor[(1, 16, 8, 8), float16] */) { + %0 = nn.batch_flatten(%data_n_0) /* ty=Tensor[(1, 1024), float16] */; + nn.dense(%0, meta[relay.Constant][0] /* ty=Tensor[(1, 1024), float16] */, units=None) /* ty=Tensor[(1, 1), float16] */ } + """ # sphinx_gallery_start_ignore @@ -113,13 +115,40 @@ def @main(%data_n_0: Tensor[(1, 16, img_size, img_size), float32]) { testing.utils.install_request_hook(depth=3) # sphinx_gallery_end_ignore +######################################### +# Build the subgraph with cutlass target. +# --------------------------------------- +######################################### +cutlass = tvm.target.Target( + { + "kind": "cutlass", + "sm": 80, + "use_3xtf32": True, + "split_k_slices": [1], + "profile_all_alignments": False, + "find_first_valid": True, + "use_multiprocessing": True, + "use_fast_math": False, + "tmp_dir": "./tmp", + }, + host=tvm.target.Target("llvm"), +) + + +def cutlass_build(mod, target, params=None, target_host=None, mod_name="default"): + target = [target, cutlass] + lib = relay.build_module.build( + mod, target=target, params=params, target_host=target_host, mod_name=mod_name + ) + return lib + ########################################################### # Run the two subgraphs in pipeline with pipeline executor. # --------------------------------------------------------- # Define a function to do all the codegen and pipeline executor works. # To run pipeline executor with dnnl, USE_PIPELINE_EXECUTOR need to get set as ON. -# and the 'USE_DNNL_CODEGEN' should set as ON in config.cmake and installing MKL-DNN. +# and the 'USE_CUTLASS' should set as ON in config.cmake. def run_pipeline(): from tvm.contrib import graph_executor, pipeline_executor, pipeline_executor_build @@ -127,12 +156,9 @@ def run_pipeline(): # Create subgraph pipeline configuration. # Associate the subgraph module with a target. # Using BYOC to set the codegen of the second subgraph module. - # To use dnnl the 'USE_DNNL_CODEGEN' should set as ON in config.cmake and installing MKL-DNN. + # To use cutlass the 'USE_CUTLASS' should set as ON. mod0, mod1 = subgraphs[0], subgraphs[1] - # mod0 = relay.transform.AnnotateTarget(["dnnl"])(mod0) - # mod0 = relay.transform.AnnotateTarget(["cutlass"])(mod0) - # mod0 = relay.transform.MergeCompilerRegions()(mod0) - # mod0 = relay.transform.PartitionGraph()(mod0) + # Apply cutlass as the codegen. mod1 = partition_for_cutlass(mod1) ################################################# # Get the pipeline executor configuration object. @@ -144,10 +170,13 @@ def run_pipeline(): ############################################################################### # Set the cpu afinity for control flow, for example using cpu 0 for control flow. pipe_config[mod1].cpu_affinity = "0" + pipe_config[mod1].export_cc = None ############################################################## # Set the compile target of the second subgraph module as LLVM. - pipe_config[mod1].target = "cuda" + pipe_config[mod1].target = "cuda" # tvm.target.Target("cuda", host=tvm.target.Target("llvm")) pipe_config[mod1].dev = tvm.device("cuda", 0) + pipe_config[mod1].build_func = cutlass_build + pipe_config[mod1].export_cc = "nvcc" ################################################################################# # Set the cpu afinity for control flow, for example using cpu 1 for control flow. pipe_config[mod1].cpu_affinity = "1" @@ -171,7 +200,7 @@ def run_pipeline(): # sphinx_gallery_start_ignore from tvm import testing - testing.utils.install_request_hook(depth=3) + # testing.utils.install_request_hook(depth=3) # sphinx_gallery_end_ignore ############################## # Build the pipeline executor. @@ -195,7 +224,7 @@ def run_pipeline(): # Run the pipeline executor. # -------------------------- # Allocated a input data. - data = np.random.uniform(-1, 1, size=data_shape).astype("float32") + data = np.random.uniform(-1, 1, size=data_shape).astype("float16") pipeline_module.set_input("data", tvm.nd.array(data)) ########################################################################## # Run the two subgraph in pipeline mode and get the output asynchronously. @@ -209,18 +238,39 @@ def run_pipeline(): # ------------------------------------ # Run these two subgraphs in sequence with graph_executor to get the output. target = "llvm" - dev = tvm.device(target, 0) + dev0 = tvm.device(target, 0) lib0 = relay.build_module.build(mod0, target, params=params) - lib1 = relay.build_module.build(mod1, target, params=params) - module0 = runtime.GraphModule(lib0["default"](dev)) - module1 = runtime.GraphModule(lib1["default"](dev)) + module0 = runtime.GraphModule(lib0["default"](dev0)) + cutlass = tvm.target.Target( + { + "kind": "cutlass", + "sm": 75, + "use_3xtf32": True, + "split_k_slices": [1], + "profile_all_alignments": False, + "find_first_valid": True, + "use_multiprocessing": True, + "use_fast_math": False, + "tmp_dir": "./tmp", + }, + host=tvm.target.Target("llvm"), + ) + cuda = tvm.target.Target("cuda", host=tvm.target.Target("llvm")) + lib1 = relay.build_module.build(mod1, [cuda, cutlass], params=params) + lib1 = finalize_modules(lib1, "compile.so", "./tmp") + + dev1 = tvm.device("cuda", 0) + + module1 = runtime.GraphModule(lib1["default"](dev1)) + module0.set_input("data", data) module0.run() out_shape = (1, 16, img_size, img_size) - out = module0.get_output(0, tvm.nd.empty(out_shape)) + out = module0.get_output(0, tvm.nd.empty(out_shape, "float16")) module1.set_input("data_n_0", out) module1.run() - out = module1.get_output(0, tvm.nd.empty(out_shape)) + out_shape = (1, 1) + out = module1.get_output(0, tvm.nd.empty(out_shape, "float16")) #################### # Verify the result. tvm.testing.assert_allclose(outputs[0].numpy(), out.numpy()) diff --git a/python/tvm/contrib/pipeline_executor.py b/python/tvm/contrib/pipeline_executor.py index 5ef309bb2808..f1c4e98a51d7 100644 --- a/python/tvm/contrib/pipeline_executor.py +++ b/python/tvm/contrib/pipeline_executor.py @@ -302,11 +302,16 @@ def export_library(self, directory_path): self.pipeline_mods[lib_index]["dev"].device_type, self.pipeline_mods[lib_index]["dev"].device_id, ) - # Get the graph, lib, and parameters from GraphExecutorFactoryModule. lib = self.pipeline_mods[lib_index]["lib"] # Export the lib, graph, and parameters to disk. - lib.export_library(mconfig["lib_name"]) + if self.pipeline_mods[lib_index]["export_cc"]: + lib.export_library( + mconfig["lib_name"], cc=self.pipeline_mods[lib_index]["export_cc"] + ) + else: + lib.export_library(mconfig["lib_name"]) + with open(mconfig["json_name"], "w") as file_handle: file_handle.write(lib.graph_json) with open(mconfig["params_name"], "wb") as file_handle: diff --git a/python/tvm/contrib/pipeline_executor_build.py b/python/tvm/contrib/pipeline_executor_build.py index 520156b47406..324383ab7ce3 100644 --- a/python/tvm/contrib/pipeline_executor_build.py +++ b/python/tvm/contrib/pipeline_executor_build.py @@ -86,7 +86,12 @@ def build(pipe_configs): # Use "mod_idx" as the key to create a "module_connection" map which is not only # for the module index but also for the module connection used to build the pipeline. module_string_config[mod_idx] = pipe_config - libs[mod_idx] = {"lib": lib, "dev": dev, "fcompile": mod_config["fcompile"]} + libs[mod_idx] = { + "lib": lib, + "dev": dev, + "fcompile": mod_config["fcompile"], + "export_cc": mod_config["export_cc"], + } # Creating a text form configuration to record the "input_connection" and the # "module_connection" information. The "input_connection" is used to record the @@ -132,10 +137,7 @@ def export_library(factory, directory_path): mconfig["json_name"] = "{}/json{}".format(directory_path, lib_index) mconfig["params_name"] = "{}/params{}".format(directory_path, lib_index) lib_config = factory.pipeline_mods[lib_index] - mconfig["dev"] = "{},{}".format( - lib_config["dev"].device_type, - lib_config["dev"].device_id, - ) + mconfig["dev"] = "{},{}".format(lib_config["dev"].device_type, lib_config["dev"].device_id) fcompile = lib_config["fcompile"] if not fcompile: fcompile = False @@ -413,6 +415,7 @@ def __init__(self, mod=None): self.fcompile = None self.name = None self.dev = None + self.export_cc = None self.cpu_affinity = "" self.idx = None self.mod = mod @@ -601,6 +604,7 @@ def get_config(self): "target": module.target, "fcompile": module.fcompile, "dev": module.dev, + "export_cc": module.export_cc, } # Creating a map including pipeline inputs and subgraph inputs. From 873e0273a40e67cb6e9e15d9fc7342f64938eb8e Mon Sep 17 00:00:00 2001 From: huajsj Date: Sun, 17 Jul 2022 15:03:50 -0700 Subject: [PATCH 29/39] fix doc convention issue --- .../using_with_pipeline_executor.py | 240 +++++++++--------- 1 file changed, 118 insertions(+), 122 deletions(-) diff --git a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py index f58edfc43021..b02293864407 100755 --- a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py @@ -117,12 +117,12 @@ def @main(%data_n_0: Tensor[(1, 16, 8, 8), float16] /* ty=Tensor[(1, 16, 8, 8), # sphinx_gallery_end_ignore ######################################### # Build the subgraph with cutlass target. -# --------------------------------------- -######################################### +#---------------------------------------- + cutlass = tvm.target.Target( { "kind": "cutlass", - "sm": 80, + "sm": int(tvm.target.Target("cuda").arch.split("_")[1]), "use_3xtf32": True, "split_k_slices": [1], "profile_all_alignments": False, @@ -149,131 +149,127 @@ def cutlass_build(mod, target, params=None, target_host=None, mod_name="default" # Define a function to do all the codegen and pipeline executor works. # To run pipeline executor with dnnl, USE_PIPELINE_EXECUTOR need to get set as ON. # and the 'USE_CUTLASS' should set as ON in config.cmake. -def run_pipeline(): - from tvm.contrib import graph_executor, pipeline_executor, pipeline_executor_build - - ######################################### - # Create subgraph pipeline configuration. - # Associate the subgraph module with a target. - # Using BYOC to set the codegen of the second subgraph module. - # To use cutlass the 'USE_CUTLASS' should set as ON. - mod0, mod1 = subgraphs[0], subgraphs[1] - # Apply cutlass as the codegen. - mod1 = partition_for_cutlass(mod1) - ################################################# - # Get the pipeline executor configuration object. - pipe_config = pipeline_executor_build.PipelineConfig() - ########################################################################### - # Set the compile target of the second subgraph module for example as LLVM. - pipe_config[mod0].target = "llvm" - pipe_config[mod0].dev = tvm.cpu(0) - ############################################################################### - # Set the cpu afinity for control flow, for example using cpu 0 for control flow. - pipe_config[mod1].cpu_affinity = "0" - pipe_config[mod1].export_cc = None - ############################################################## - # Set the compile target of the second subgraph module as LLVM. - pipe_config[mod1].target = "cuda" # tvm.target.Target("cuda", host=tvm.target.Target("llvm")) - pipe_config[mod1].dev = tvm.device("cuda", 0) - pipe_config[mod1].build_func = cutlass_build - pipe_config[mod1].export_cc = "nvcc" - ################################################################################# - # Set the cpu afinity for control flow, for example using cpu 1 for control flow. - pipe_config[mod1].cpu_affinity = "1" - pipe_config["input"]["data"].connect(pipe_config[mod0]["input"]["data"]) - pipe_config[mod0]["output"][0].connect(pipe_config[mod1]["input"]["data_n_0"]) - pipe_config[mod1]["output"]["0"].connect(pipe_config["output"][0]) - ###################################### - # The pipeline configuration as below. - """ - print(pipe_config) - Inputs - |data: mod0:data +from tvm.contrib import graph_executor, pipeline_executor, pipeline_executor_build - output - |output(0) : mod1.output(0) - - connections - |mod0.output(0)-> mod1.data_n_0 - """ +######################################### +# Create subgraph pipeline configuration. +# Associate the subgraph module with a target. +# Using BYOC to set the codegen of the second subgraph module. +# To use cutlass the 'USE_CUTLASS' should set as ON. +mod0, mod1 = subgraphs[0], subgraphs[1] +# Use cutlass as the codegen. +mod1 = partition_for_cutlass(mod1) +################################################# +# Get the pipeline executor configuration object. +pipe_config = pipeline_executor_build.PipelineConfig() +########################################################################### +# Set the compile target of the second subgraph module for example as LLVM. +pipe_config[mod0].target = "llvm" +pipe_config[mod0].dev = tvm.cpu(0) +############################################################################### +# Set the cpu afinity for control flow, for example using cpu 0 for control flow. +pipe_config[mod1].cpu_affinity = "0" +pipe_config[mod1].export_cc = None +############################################################## +# Set the compile target of the second subgraph module as LLVM. +pipe_config[mod1].target = "cuda" # tvm.target.Target("cuda", host=tvm.target.Target("llvm")) +pipe_config[mod1].dev = tvm.device("cuda", 0) +pipe_config[mod1].build_func = cutlass_build +pipe_config[mod1].export_cc = "nvcc" +################################################################################# +# Set the cpu afinity for control flow, for example using cpu 1 for control flow. +pipe_config[mod1].cpu_affinity = "1" +pipe_config["input"]["data"].connect(pipe_config[mod0]["input"]["data"]) +pipe_config[mod0]["output"][0].connect(pipe_config[mod1]["input"]["data_n_0"]) +pipe_config[mod1]["output"]["0"].connect(pipe_config["output"][0]) +###################################### +# The pipeline configuration as below. +""" +print(pipe_config) + Inputs + |data: mod0:data - # sphinx_gallery_start_ignore - from tvm import testing + output + |output(0) : mod1.output(0) - # testing.utils.install_request_hook(depth=3) - # sphinx_gallery_end_ignore - ############################## - # Build the pipeline executor. - # ---------------------------- - with tvm.transform.PassContext(opt_level=3): - pipeline_mod_factory = pipeline_executor_build.build(pipe_config) - ############################################### - # Export the parameter configuration to a file. - directory_path = tvm.contrib.utils.tempdir().temp_dir - ############################################# - # If the directory does not exist, create it. - if not os.path.exists(directory_path): - os.makedirs(directory_path) - config_file_name = pipeline_mod_factory.export_library(directory_path) - ################################################################ - # Use the load function to create and initialize PipelineModule. - # -------------------------------------------------------------- - pipeline_module = pipeline_executor.PipelineModule.load_library(config_file_name) + connections + |mod0.output(0)-> mod1.data_n_0 +""" - ############################ - # Run the pipeline executor. - # -------------------------- - # Allocated a input data. - data = np.random.uniform(-1, 1, size=data_shape).astype("float16") - pipeline_module.set_input("data", tvm.nd.array(data)) - ########################################################################## - # Run the two subgraph in pipeline mode and get the output asynchronously. - pipeline_module.run() - outputs = [] - while not outputs: - outputs = pipeline_module.get_output() - time.sleep(0.001) - ###################################### - # Use graph_executor for verification. - # ------------------------------------ - # Run these two subgraphs in sequence with graph_executor to get the output. - target = "llvm" - dev0 = tvm.device(target, 0) - lib0 = relay.build_module.build(mod0, target, params=params) - module0 = runtime.GraphModule(lib0["default"](dev0)) - cutlass = tvm.target.Target( - { - "kind": "cutlass", - "sm": 75, - "use_3xtf32": True, - "split_k_slices": [1], - "profile_all_alignments": False, - "find_first_valid": True, - "use_multiprocessing": True, - "use_fast_math": False, - "tmp_dir": "./tmp", - }, - host=tvm.target.Target("llvm"), - ) - cuda = tvm.target.Target("cuda", host=tvm.target.Target("llvm")) - lib1 = relay.build_module.build(mod1, [cuda, cutlass], params=params) - lib1 = finalize_modules(lib1, "compile.so", "./tmp") +# sphinx_gallery_start_ignore +from tvm import testing - dev1 = tvm.device("cuda", 0) +# testing.utils.install_request_hook(depth=3) +# sphinx_gallery_end_ignore +############################## +# Build the pipeline executor. +# ---------------------------- +with tvm.transform.PassContext(opt_level=3): + pipeline_mod_factory = pipeline_executor_build.build(pipe_config) +############################################### +# Export the parameter configuration to a file. +directory_path = tvm.contrib.utils.tempdir().temp_dir +############################################# +# If the directory does not exist, create it. +if not os.path.exists(directory_path): + os.makedirs(directory_path) +config_file_name = pipeline_mod_factory.export_library(directory_path) +################################################################ +# Use the load function to create and initialize PipelineModule. +# -------------------------------------------------------------- +pipeline_module = pipeline_executor.PipelineModule.load_library(config_file_name) - module1 = runtime.GraphModule(lib1["default"](dev1)) +############################ +# Run the pipeline executor. +# -------------------------- +# Allocated a input data. +data = np.random.uniform(-1, 1, size=data_shape).astype("float16") +pipeline_module.set_input("data", tvm.nd.array(data)) +########################################################################## +# Run the two subgraph in pipeline mode and get the output asynchronously. +pipeline_module.run() +outputs = [] +while not outputs: + outputs = pipeline_module.get_output() + time.sleep(0.001) +###################################### +# Use graph_executor for verification. +# ------------------------------------ +# Run these two subgraphs in sequence with graph_executor to get the output. +target = "llvm" +dev0 = tvm.device(target, 0) +lib0 = relay.build_module.build(mod0, target, params=params) +module0 = runtime.GraphModule(lib0["default"](dev0)) +cutlass = tvm.target.Target( + { + "kind": "cutlass", + "sm": 75, + "use_3xtf32": True, + "split_k_slices": [1], + "profile_all_alignments": False, + "find_first_valid": True, + "use_multiprocessing": True, + "use_fast_math": False, + "tmp_dir": "./tmp", + }, + host=tvm.target.Target("llvm"), +) +cuda = tvm.target.Target("cuda", host=tvm.target.Target("llvm")) +lib1 = relay.build_module.build(mod1, [cuda, cutlass], params=params) +lib1 = finalize_modules(lib1, "compile.so", "./tmp") - module0.set_input("data", data) - module0.run() - out_shape = (1, 16, img_size, img_size) - out = module0.get_output(0, tvm.nd.empty(out_shape, "float16")) - module1.set_input("data_n_0", out) - module1.run() - out_shape = (1, 1) - out = module1.get_output(0, tvm.nd.empty(out_shape, "float16")) - #################### - # Verify the result. - tvm.testing.assert_allclose(outputs[0].numpy(), out.numpy()) +dev1 = tvm.device("cuda", 0) +module1 = runtime.GraphModule(lib1["default"](dev1)) -run_pipeline() +module0.set_input("data", data) +module0.run() +out_shape = (1, 16, img_size, img_size) +out = module0.get_output(0, tvm.nd.empty(out_shape, "float16")) +module1.set_input("data_n_0", out) +module1.run() +out_shape = (1, 1) +out = module1.get_output(0, tvm.nd.empty(out_shape, "float16")) +#################### +# Verify the result. +tvm.testing.assert_allclose(outputs[0].numpy(), out.numpy()) From 73656af1cfb73b73f066b3861685e76162dc48fe Mon Sep 17 00:00:00 2001 From: huajsj Date: Sun, 17 Jul 2022 15:21:04 -0700 Subject: [PATCH 30/39] remove duplicate variable --- .../using_with_pipeline_executor.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py index b02293864407..c03cd88ef182 100755 --- a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py @@ -240,20 +240,6 @@ def cutlass_build(mod, target, params=None, target_host=None, mod_name="default" dev0 = tvm.device(target, 0) lib0 = relay.build_module.build(mod0, target, params=params) module0 = runtime.GraphModule(lib0["default"](dev0)) -cutlass = tvm.target.Target( - { - "kind": "cutlass", - "sm": 75, - "use_3xtf32": True, - "split_k_slices": [1], - "profile_all_alignments": False, - "find_first_valid": True, - "use_multiprocessing": True, - "use_fast_math": False, - "tmp_dir": "./tmp", - }, - host=tvm.target.Target("llvm"), -) cuda = tvm.target.Target("cuda", host=tvm.target.Target("llvm")) lib1 = relay.build_module.build(mod1, [cuda, cutlass], params=params) lib1 = finalize_modules(lib1, "compile.so", "./tmp") From e4d83606fa8c3d2931b0c81d599ecb662446cb0e Mon Sep 17 00:00:00 2001 From: huajsj Date: Sun, 17 Jul 2022 16:34:26 -0700 Subject: [PATCH 31/39] fix plint issue. --- .../how_to/work_with_relay/using_with_pipeline_executor.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py index c03cd88ef182..849677cca562 100755 --- a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_with_pipeline_executor.py @@ -115,9 +115,10 @@ def @main(%data_n_0: Tensor[(1, 16, 8, 8), float16] /* ty=Tensor[(1, 16, 8, 8), testing.utils.install_request_hook(depth=3) # sphinx_gallery_end_ignore + ######################################### # Build the subgraph with cutlass target. -#---------------------------------------- +# --------------------------------------- cutlass = tvm.target.Target( { @@ -172,7 +173,7 @@ def cutlass_build(mod, target, params=None, target_host=None, mod_name="default" pipe_config[mod1].export_cc = None ############################################################## # Set the compile target of the second subgraph module as LLVM. -pipe_config[mod1].target = "cuda" # tvm.target.Target("cuda", host=tvm.target.Target("llvm")) +pipe_config[mod1].target = "cuda" pipe_config[mod1].dev = tvm.device("cuda", 0) pipe_config[mod1].build_func = cutlass_build pipe_config[mod1].export_cc = "nvcc" From cfd2af2b73ae3d647bafade80a0aaedc3c3b13db Mon Sep 17 00:00:00 2001 From: huajsj Date: Wed, 20 Jul 2022 19:43:23 -0700 Subject: [PATCH 32/39] address review comments. --- ...executor.py => using_pipeline_executor.py} | 26 +++++++------------ 1 file changed, 10 insertions(+), 16 deletions(-) rename gallery/how_to/work_with_relay/{using_with_pipeline_executor.py => using_pipeline_executor.py} (92%) diff --git a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py b/gallery/how_to/work_with_relay/using_pipeline_executor.py similarity index 92% rename from gallery/how_to/work_with_relay/using_with_pipeline_executor.py rename to gallery/how_to/work_with_relay/using_pipeline_executor.py index 849677cca562..9161a66a90a7 100755 --- a/gallery/how_to/work_with_relay/using_with_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_pipeline_executor.py @@ -42,7 +42,7 @@ # Create a simple network, this network can be a pre-trained model too. # --------------------------------------------------------------------- # Let's create a very simple network for demonstration. -# It consists of convolution, batch normalization, and ReLU activation. +# It consists of convolution, batch normalization, dense, and ReLU activation. def get_network(): out_channels = 16 batch_size = 1 @@ -73,8 +73,8 @@ def get_network(): ########################################### # Splitting the network into two subgraphs. # ----------------------------------------- -# We use an testing linear graph splitting function as a example. User also can create their -# own splitting function logic. +# The graph splitting function comming from a uinit test is an example. User can create a +# customized function for graph splitting. import inspect import os @@ -99,8 +99,6 @@ def @main(%data: Tensor[(1, 3, img_size, img_size), float16]) { nn.relu(%2) /* ty=Tensor[(1, 16, img_size, img_size), float16] */ } -peline-tutorial - #subgraphs[1] def @main(%data_n_0: Tensor[(1, 16, 8, 8), float16] /* ty=Tensor[(1, 16, 8, 8), float16] */) { @@ -147,16 +145,13 @@ def cutlass_build(mod, target, params=None, target_host=None, mod_name="default" ########################################################### # Run the two subgraphs in pipeline with pipeline executor. # --------------------------------------------------------- -# Define a function to do all the codegen and pipeline executor works. -# To run pipeline executor with dnnl, USE_PIPELINE_EXECUTOR need to get set as ON. -# and the 'USE_CUTLASS' should set as ON in config.cmake. +# Set 'USE_PIPELINE_EXECUTOR' as ON, and set USE_CUTLASS' as ON. from tvm.contrib import graph_executor, pipeline_executor, pipeline_executor_build ######################################### # Create subgraph pipeline configuration. -# Associate the subgraph module with a target. -# Using BYOC to set the codegen of the second subgraph module. -# To use cutlass the 'USE_CUTLASS' should set as ON. +# Associate a subgraph module with a target. +# Use CUTLASS BYOC to build the second subgraph module. mod0, mod1 = subgraphs[0], subgraphs[1] # Use cutlass as the codegen. mod1 = partition_for_cutlass(mod1) @@ -164,7 +159,7 @@ def cutlass_build(mod, target, params=None, target_host=None, mod_name="default" # Get the pipeline executor configuration object. pipe_config = pipeline_executor_build.PipelineConfig() ########################################################################### -# Set the compile target of the second subgraph module for example as LLVM. +# Set the compile target of the subgraph module. pipe_config[mod0].target = "llvm" pipe_config[mod0].dev = tvm.cpu(0) ############################################################################### @@ -172,7 +167,7 @@ def cutlass_build(mod, target, params=None, target_host=None, mod_name="default" pipe_config[mod1].cpu_affinity = "0" pipe_config[mod1].export_cc = None ############################################################## -# Set the compile target of the second subgraph module as LLVM. +# Set the compile target of the second subgraph module as cuda. pipe_config[mod1].target = "cuda" pipe_config[mod1].dev = tvm.device("cuda", 0) pipe_config[mod1].build_func = cutlass_build @@ -212,8 +207,7 @@ def cutlass_build(mod, target, params=None, target_host=None, mod_name="default" directory_path = tvm.contrib.utils.tempdir().temp_dir ############################################# # If the directory does not exist, create it. -if not os.path.exists(directory_path): - os.makedirs(directory_path) +os.makedirs(directory_path, exist_ok=True) config_file_name = pipeline_mod_factory.export_library(directory_path) ################################################################ # Use the load function to create and initialize PipelineModule. @@ -223,7 +217,7 @@ def cutlass_build(mod, target, params=None, target_host=None, mod_name="default" ############################ # Run the pipeline executor. # -------------------------- -# Allocated a input data. +# Allocate input data. data = np.random.uniform(-1, 1, size=data_shape).astype("float16") pipeline_module.set_input("data", tvm.nd.array(data)) ########################################################################## From a1fc852e281749c6997a38efb1bf9a61117bb774 Mon Sep 17 00:00:00 2001 From: huajsj Date: Wed, 20 Jul 2022 23:07:36 -0700 Subject: [PATCH 33/39] address review comments --- .../work_with_relay/using_pipeline_executor.py | 12 ++++-------- python/tvm/contrib/pipeline_executor.py | 16 ++++++++++++++-- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/gallery/how_to/work_with_relay/using_pipeline_executor.py b/gallery/how_to/work_with_relay/using_pipeline_executor.py index 9161a66a90a7..98c39f699386 100755 --- a/gallery/how_to/work_with_relay/using_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_pipeline_executor.py @@ -145,7 +145,7 @@ def cutlass_build(mod, target, params=None, target_host=None, mod_name="default" ########################################################### # Run the two subgraphs in pipeline with pipeline executor. # --------------------------------------------------------- -# Set 'USE_PIPELINE_EXECUTOR' as ON, and set USE_CUTLASS' as ON. +# Set 'USE_PIPELINE_EXECUTOR' as ON, and set USE_CUTLASS' as ON in cmake. from tvm.contrib import graph_executor, pipeline_executor, pipeline_executor_build ######################################### @@ -205,8 +205,6 @@ def cutlass_build(mod, target, params=None, target_host=None, mod_name="default" ############################################### # Export the parameter configuration to a file. directory_path = tvm.contrib.utils.tempdir().temp_dir -############################################# -# If the directory does not exist, create it. os.makedirs(directory_path, exist_ok=True) config_file_name = pipeline_mod_factory.export_library(directory_path) ################################################################ @@ -221,12 +219,10 @@ def cutlass_build(mod, target, params=None, target_host=None, mod_name="default" data = np.random.uniform(-1, 1, size=data_shape).astype("float16") pipeline_module.set_input("data", tvm.nd.array(data)) ########################################################################## -# Run the two subgraph in pipeline mode and get the output asynchronously. +# Run the two subgraph in the pipeline mode to get the output asynchronously +# or synchronously. In the following example, it is synchronous. pipeline_module.run() -outputs = [] -while not outputs: - outputs = pipeline_module.get_output() - time.sleep(0.001) +outputs = pipeline_module.get_output() ###################################### # Use graph_executor for verification. # ------------------------------------ diff --git a/python/tvm/contrib/pipeline_executor.py b/python/tvm/contrib/pipeline_executor.py index f1c4e98a51d7..f4ff3b8b6eb2 100644 --- a/python/tvm/contrib/pipeline_executor.py +++ b/python/tvm/contrib/pipeline_executor.py @@ -131,14 +131,26 @@ def get_input(self, key): """ return self._get_input(key) - def get_output(self): + def get_output(self, synchronize=True, sleep_interval=0.001): """Get the output. Returns ------- data : Array[NDArray] A list of output data. + synchronize : BOOL + Whether to do a synchronize poll. + sleep_interval : Float32 + When doing the synchronize loop poll, how many seconds the loop should sleep for yield. """ - return self._get_output() + outputs = [] + if not synchronize: + outputs = self._get_output() + else: + while not outputs: + outputs = pipeline_module.get_output() + time.sleep(sleep_interval) + + return outputs @property def num_executing_pipeline(self): From 60c89533e39d74afb6996f3d10f7bde717e50bde Mon Sep 17 00:00:00 2001 From: huajsj Date: Wed, 20 Jul 2022 23:14:54 -0700 Subject: [PATCH 34/39] fix bug. --- gallery/how_to/work_with_relay/using_pipeline_executor.py | 1 - python/tvm/contrib/pipeline_executor.py | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gallery/how_to/work_with_relay/using_pipeline_executor.py b/gallery/how_to/work_with_relay/using_pipeline_executor.py index 98c39f699386..c848cbc85d92 100755 --- a/gallery/how_to/work_with_relay/using_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_pipeline_executor.py @@ -29,7 +29,6 @@ from tvm import relay from tvm.relay import testing import tvm.testing -import time from tvm.contrib.cutlass import ( has_cutlass, num_cutlass_partitions, diff --git a/python/tvm/contrib/pipeline_executor.py b/python/tvm/contrib/pipeline_executor.py index f4ff3b8b6eb2..2543ff66db89 100644 --- a/python/tvm/contrib/pipeline_executor.py +++ b/python/tvm/contrib/pipeline_executor.py @@ -20,6 +20,7 @@ from tvm import runtime from tvm._ffi import get_global_func from tvm.contrib import graph_executor +import time def pipeline_executor_enabled(): @@ -147,7 +148,7 @@ def get_output(self, synchronize=True, sleep_interval=0.001): outputs = self._get_output() else: while not outputs: - outputs = pipeline_module.get_output() + outputs = self._get_output() time.sleep(sleep_interval) return outputs From 420e95180d4dfb4aa37308300805e75f4fcf089a Mon Sep 17 00:00:00 2001 From: huajsj Date: Wed, 20 Jul 2022 23:29:13 -0700 Subject: [PATCH 35/39] polish the document --- gallery/how_to/work_with_relay/using_pipeline_executor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gallery/how_to/work_with_relay/using_pipeline_executor.py b/gallery/how_to/work_with_relay/using_pipeline_executor.py index c848cbc85d92..2c836eacdf42 100755 --- a/gallery/how_to/work_with_relay/using_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_pipeline_executor.py @@ -72,8 +72,8 @@ def get_network(): ########################################### # Splitting the network into two subgraphs. # ----------------------------------------- -# The graph splitting function comming from a uinit test is an example. User can create a -# customized function for graph splitting. +# It is an example that the graph splitting function comes from a unit test. User can create a +# customized function to split the graph. import inspect import os From b998f12e167f3fa607d61365642543007ce3d964 Mon Sep 17 00:00:00 2001 From: huajsj Date: Wed, 20 Jul 2022 23:49:21 -0700 Subject: [PATCH 36/39] fix plint issue --- python/tvm/contrib/pipeline_executor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tvm/contrib/pipeline_executor.py b/python/tvm/contrib/pipeline_executor.py index 2543ff66db89..b61463073749 100644 --- a/python/tvm/contrib/pipeline_executor.py +++ b/python/tvm/contrib/pipeline_executor.py @@ -17,10 +17,10 @@ """Pipeline executor that executes a series of modules in a pipeline fashion.""" import json import os +import time from tvm import runtime from tvm._ffi import get_global_func from tvm.contrib import graph_executor -import time def pipeline_executor_enabled(): From 1a930aff1b3779425e5525481a6c53e101b33826 Mon Sep 17 00:00:00 2001 From: huajsj Date: Thu, 21 Jul 2022 07:47:09 -0700 Subject: [PATCH 37/39] address review comments. --- gallery/how_to/work_with_relay/using_pipeline_executor.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/gallery/how_to/work_with_relay/using_pipeline_executor.py b/gallery/how_to/work_with_relay/using_pipeline_executor.py index 2c836eacdf42..bde6e4cb706e 100755 --- a/gallery/how_to/work_with_relay/using_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_pipeline_executor.py @@ -72,8 +72,8 @@ def get_network(): ########################################### # Splitting the network into two subgraphs. # ----------------------------------------- -# It is an example that the graph splitting function comes from a unit test. User can create a -# customized function to split the graph. +# This function called 'graph_split' from a unit test is just an example. User can create a customized logic +# to split the graph. import inspect import os @@ -162,9 +162,8 @@ def cutlass_build(mod, target, params=None, target_host=None, mod_name="default" pipe_config[mod0].target = "llvm" pipe_config[mod0].dev = tvm.cpu(0) ############################################################################### -# Set the cpu afinity for control flow, for example using cpu 0 for control flow. +# Set the cpu affinity for control flow, for example using cpu 0 for control flow. pipe_config[mod1].cpu_affinity = "0" -pipe_config[mod1].export_cc = None ############################################################## # Set the compile target of the second subgraph module as cuda. pipe_config[mod1].target = "cuda" From 7449ff7ed82b301a9cc3cacbc92e844f228c541d Mon Sep 17 00:00:00 2001 From: huajsj Date: Fri, 22 Jul 2022 00:38:41 -0700 Subject: [PATCH 38/39] address review comments --- .../work_with_relay/using_pipeline_executor.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/gallery/how_to/work_with_relay/using_pipeline_executor.py b/gallery/how_to/work_with_relay/using_pipeline_executor.py index bde6e4cb706e..490ff276eff3 100755 --- a/gallery/how_to/work_with_relay/using_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_pipeline_executor.py @@ -77,8 +77,8 @@ def get_network(): import inspect import os -test_path = os.path.dirname(inspect.getfile(lambda: None)) -os.sys.path.append(os.path.join(test_path, "../../../tests/python/relay")) +tutorial_dir = os.path.dirname(inspect.getfile(lambda: None)) +os.sys.path.append(os.path.join(tutorial_dir, "../../../tests/python/relay")) from test_pipeline_executor import graph_split ########################################### @@ -161,21 +161,19 @@ def cutlass_build(mod, target, params=None, target_host=None, mod_name="default" # Set the compile target of the subgraph module. pipe_config[mod0].target = "llvm" pipe_config[mod0].dev = tvm.cpu(0) -############################################################################### -# Set the cpu affinity for control flow, for example using cpu 0 for control flow. -pipe_config[mod1].cpu_affinity = "0" ############################################################## # Set the compile target of the second subgraph module as cuda. pipe_config[mod1].target = "cuda" pipe_config[mod1].dev = tvm.device("cuda", 0) pipe_config[mod1].build_func = cutlass_build pipe_config[mod1].export_cc = "nvcc" -################################################################################# -# Set the cpu afinity for control flow, for example using cpu 1 for control flow. -pipe_config[mod1].cpu_affinity = "1" +# Create the pipeline by connecting the subgraphs module. +# The global input will be forwarded to the input interface of the first moudle named mod0 pipe_config["input"]["data"].connect(pipe_config[mod0]["input"]["data"]) +# The first output of mod0 will be forwarded to the input interface of mod1 pipe_config[mod0]["output"][0].connect(pipe_config[mod1]["input"]["data_n_0"]) -pipe_config[mod1]["output"]["0"].connect(pipe_config["output"][0]) +# the first output of mod1 will be the first global output. +pipe_config[mod1]["output"][0].connect(pipe_config["output"][0]) ###################################### # The pipeline configuration as below. """ From 0dcc5bffb25f8eb2fa064637c4f9db893c373550 Mon Sep 17 00:00:00 2001 From: huajsj Date: Fri, 22 Jul 2022 08:59:00 -0700 Subject: [PATCH 39/39] address review comments --- gallery/how_to/work_with_relay/using_pipeline_executor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gallery/how_to/work_with_relay/using_pipeline_executor.py b/gallery/how_to/work_with_relay/using_pipeline_executor.py index 490ff276eff3..5496058265ba 100755 --- a/gallery/how_to/work_with_relay/using_pipeline_executor.py +++ b/gallery/how_to/work_with_relay/using_pipeline_executor.py @@ -167,12 +167,12 @@ def cutlass_build(mod, target, params=None, target_host=None, mod_name="default" pipe_config[mod1].dev = tvm.device("cuda", 0) pipe_config[mod1].build_func = cutlass_build pipe_config[mod1].export_cc = "nvcc" -# Create the pipeline by connecting the subgraphs module. -# The global input will be forwarded to the input interface of the first moudle named mod0 +# Create the pipeline by connecting the subgraph modules. +# The global input will be forwarded to the input interface of the first module named mod0 pipe_config["input"]["data"].connect(pipe_config[mod0]["input"]["data"]) # The first output of mod0 will be forwarded to the input interface of mod1 pipe_config[mod0]["output"][0].connect(pipe_config[mod1]["input"]["data_n_0"]) -# the first output of mod1 will be the first global output. +# The first output of mod1 will be the first global output. pipe_config[mod1]["output"][0].connect(pipe_config["output"][0]) ###################################### # The pipeline configuration as below.