From 4fb92a67d4aed2d367210ab52df90ddb483158b6 Mon Sep 17 00:00:00 2001 From: yifusun Date: Tue, 11 Jun 2024 22:28:05 +0800 Subject: [PATCH 1/2] update for TensorRT inference compatibility on hopper architecture --- hydit/config.py | 2 ++ hydit/inference.py | 7 +++- trt/build_engine_hopper.sh | 70 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 trt/build_engine_hopper.sh diff --git a/hydit/config.py b/hydit/config.py index f09a59b..784f330 100644 --- a/hydit/config.py +++ b/hydit/config.py @@ -54,6 +54,8 @@ def get_args(default_args=None): parser.add_argument("--no-fp16", dest="use_fp16", action="store_false") parser.set_defaults(use_fp16=True) parser.add_argument("--onnx-workdir", type=str, default="onnx_model", help="Path to save ONNX model") + parser.add_argument("--plugin-tag", type=str, default="others", choices=["others", "hopper"], help="Plugin architecture tag for choosing plugin package") + # Sampling parser.add_argument("--batch-size", type=int, default=1, help="Per-GPU batch size") diff --git a/hydit/inference.py b/hydit/inference.py index 7751ffb..72d3780 100644 --- a/hydit/inference.py +++ b/hydit/inference.py @@ -222,7 +222,12 @@ def __init__(self, args, models_root_path): trt_dir = self.root / "model_trt" engine_dir = trt_dir / "engine" - plugin_path = trt_dir / "fmha_plugins/9.2_plugin_cuda11/fMHAPlugin.so" + + if self.args.plugin_tag == "others": + plugin_postfix = "" + elif self.args.plugin_tag == "hopper": + plugin_postfix = "_Hopper" + plugin_path = trt_dir / "fmha_plugins/9.2_plugin_cuda11/fMHAPlugin{}.so".format(plugin_postfix) model_name = "model_onnx" logger.info(f"Loading TensorRT model {engine_dir}/{model_name}...") diff --git a/trt/build_engine_hopper.sh b/trt/build_engine_hopper.sh new file mode 100644 index 0000000..5d5fa18 --- /dev/null +++ b/trt/build_engine_hopper.sh @@ -0,0 +1,70 @@ +# ============================================================================== +# Description: Export ONNX model and build TensorRT engine. +# ============================================================================== + +# Check if the model root path is exists or provided. +if [ -z "$1" ]; then + if [ -d "ckpts" ]; then + echo "The model root directory is not provided. Use the default path 'ckpts'." + export MODEL_ROOT=ckpts + else + echo "Default model path 'ckpts' does not exist. Please provide the path of the model root directory." + exit 1 + fi +elif [ ! -d "$1" ]; then + echo "The model root directory ($1) does not exist." + exit 1 +else + export MODEL_ROOT=$(cd "$1"; pwd) +fi + +export ONNX_WORKDIR=${MODEL_ROOT}/onnx_model +echo "MODEL_ROOT=${MODEL_ROOT}" +echo "ONNX_WORKDIR=${ONNX_WORKDIR}" + +# Remove old directories. +if [ -d "${ONNX_WORKDIR}" ]; then + echo "Remove old ONNX directories..." + rm -r ${ONNX_WORKDIR} +fi + +# Inspect the project directory. +SCRIPT_PATH="$( cd "$( dirname "$0" )" && pwd )" +PROJECT_DIR=$(dirname "$SCRIPT_PATH") +export PYTHONPATH=${PROJECT_DIR}:${PYTHONPATH} +echo "PYTHONPATH=${PYTHONPATH}" +cd ${PROJECT_DIR} +echo "Change directory to ${PROJECT_DIR}" + +# ---------------------------------------- +# 1. Export ONNX model. +# ---------------------------------------- + +# Sleep for reading the message. +sleep 2s + +echo "Exporting ONNX model..." +python trt/export_onnx.py --model-root ${MODEL_ROOT} --onnx-workdir ${ONNX_WORKDIR} +echo "Exporting ONNX model finished" + +# ---------------------------------------- +# 2. Build TensorRT engine. +# ---------------------------------------- + +echo "Building TensorRT engine..." +ENGINE_DIR="${MODEL_ROOT}/t2i/model_trt/engine" +mkdir -p ${ENGINE_DIR} +ENGINE_PATH=${ENGINE_DIR}/model_onnx.plan +PLUGIN_PATH=${MODEL_ROOT}/t2i/model_trt/fmha_plugins/9.2_plugin_cuda11/fMHAPlugin_Hopper.so + +trtexec \ + --onnx=${ONNX_WORKDIR}/export_modified_fmha/model.onnx \ + --fp16 \ + --saveEngine=${ENGINE_PATH} \ + --minShapes=x:2x4x90x90,t:2,encoder_hidden_states:2x77x1024,text_embedding_mask:2x77,encoder_hidden_states_t5:2x256x2048,text_embedding_mask_t5:2x256,image_meta_size:2x6,style:2,cos_cis_img:2025x88,sin_cis_img:2025x88 \ + --optShapes=x:2x4x128x128,t:2,encoder_hidden_states:2x77x1024,text_embedding_mask:2x77,encoder_hidden_states_t5:2x256x2048,text_embedding_mask_t5:2x256,image_meta_size:2x6,style:2,cos_cis_img:4096x88,sin_cis_img:4096x88 \ + --maxShapes=x:2x4x160x160,t:2,encoder_hidden_states:2x77x1024,text_embedding_mask:2x77,encoder_hidden_states_t5:2x256x2048,text_embedding_mask_t5:2x256,image_meta_size:2x6,style:2,cos_cis_img:6400x88,sin_cis_img:6400x88 \ + --shapes=x:2x4x128x128,t:2,encoder_hidden_states:2x77x1024,text_embedding_mask:2x77,encoder_hidden_states_t5:2x256x2048,text_embedding_mask_t5:2x256,image_meta_size:2x6,style:2,cos_cis_img:4096x88,sin_cis_img:4096x88 \ + --verbose \ + --builderOptimizationLevel=4 \ + --staticPlugins=${PLUGIN_PATH} From 00e91e99f9af03ff5bcc01e5f2a5a5e107c58953 Mon Sep 17 00:00:00 2001 From: yifusun Date: Tue, 11 Jun 2024 23:00:12 +0800 Subject: [PATCH 2/2] update for TensorRT inference compatibility on the Hopper architecture --- trt/build_engine.sh | 16 ++++++++- trt/build_engine_hopper.sh | 70 -------------------------------------- 2 files changed, 15 insertions(+), 71 deletions(-) delete mode 100644 trt/build_engine_hopper.sh diff --git a/trt/build_engine.sh b/trt/build_engine.sh index 02549cc..e475de9 100644 --- a/trt/build_engine.sh +++ b/trt/build_engine.sh @@ -18,6 +18,20 @@ else export MODEL_ROOT=$(cd "$1"; pwd) fi +if [ $# -ge 2 ]; then + PLUGIN_SUFFIX="$2" +else + PLUGIN_SUFFIX="" +fi +PLUGIN_PATH=${MODEL_ROOT}/t2i/model_trt/fmha_plugins/9.2_plugin_cuda11/fMHAPlugin${PLUGIN_SUFFIX}.so + +if [ -e "$PLUGIN_PATH" ]; then + echo "Plugin exists." +else + echo "Invalid PLUGIN_SUFFIX or MODEL_ROOT" + exit 1 +fi + export ONNX_WORKDIR=${MODEL_ROOT}/onnx_model echo "MODEL_ROOT=${MODEL_ROOT}" echo "ONNX_WORKDIR=${ONNX_WORKDIR}" @@ -55,7 +69,7 @@ echo "Building TensorRT engine..." ENGINE_DIR="${MODEL_ROOT}/t2i/model_trt/engine" mkdir -p ${ENGINE_DIR} ENGINE_PATH=${ENGINE_DIR}/model_onnx.plan -PLUGIN_PATH=${MODEL_ROOT}/t2i/model_trt/fmha_plugins/9.2_plugin_cuda11/fMHAPlugin.so + trtexec \ --onnx=${ONNX_WORKDIR}/export_modified_fmha/model.onnx \ diff --git a/trt/build_engine_hopper.sh b/trt/build_engine_hopper.sh deleted file mode 100644 index 5d5fa18..0000000 --- a/trt/build_engine_hopper.sh +++ /dev/null @@ -1,70 +0,0 @@ -# ============================================================================== -# Description: Export ONNX model and build TensorRT engine. -# ============================================================================== - -# Check if the model root path is exists or provided. -if [ -z "$1" ]; then - if [ -d "ckpts" ]; then - echo "The model root directory is not provided. Use the default path 'ckpts'." - export MODEL_ROOT=ckpts - else - echo "Default model path 'ckpts' does not exist. Please provide the path of the model root directory." - exit 1 - fi -elif [ ! -d "$1" ]; then - echo "The model root directory ($1) does not exist." - exit 1 -else - export MODEL_ROOT=$(cd "$1"; pwd) -fi - -export ONNX_WORKDIR=${MODEL_ROOT}/onnx_model -echo "MODEL_ROOT=${MODEL_ROOT}" -echo "ONNX_WORKDIR=${ONNX_WORKDIR}" - -# Remove old directories. -if [ -d "${ONNX_WORKDIR}" ]; then - echo "Remove old ONNX directories..." - rm -r ${ONNX_WORKDIR} -fi - -# Inspect the project directory. -SCRIPT_PATH="$( cd "$( dirname "$0" )" && pwd )" -PROJECT_DIR=$(dirname "$SCRIPT_PATH") -export PYTHONPATH=${PROJECT_DIR}:${PYTHONPATH} -echo "PYTHONPATH=${PYTHONPATH}" -cd ${PROJECT_DIR} -echo "Change directory to ${PROJECT_DIR}" - -# ---------------------------------------- -# 1. Export ONNX model. -# ---------------------------------------- - -# Sleep for reading the message. -sleep 2s - -echo "Exporting ONNX model..." -python trt/export_onnx.py --model-root ${MODEL_ROOT} --onnx-workdir ${ONNX_WORKDIR} -echo "Exporting ONNX model finished" - -# ---------------------------------------- -# 2. Build TensorRT engine. -# ---------------------------------------- - -echo "Building TensorRT engine..." -ENGINE_DIR="${MODEL_ROOT}/t2i/model_trt/engine" -mkdir -p ${ENGINE_DIR} -ENGINE_PATH=${ENGINE_DIR}/model_onnx.plan -PLUGIN_PATH=${MODEL_ROOT}/t2i/model_trt/fmha_plugins/9.2_plugin_cuda11/fMHAPlugin_Hopper.so - -trtexec \ - --onnx=${ONNX_WORKDIR}/export_modified_fmha/model.onnx \ - --fp16 \ - --saveEngine=${ENGINE_PATH} \ - --minShapes=x:2x4x90x90,t:2,encoder_hidden_states:2x77x1024,text_embedding_mask:2x77,encoder_hidden_states_t5:2x256x2048,text_embedding_mask_t5:2x256,image_meta_size:2x6,style:2,cos_cis_img:2025x88,sin_cis_img:2025x88 \ - --optShapes=x:2x4x128x128,t:2,encoder_hidden_states:2x77x1024,text_embedding_mask:2x77,encoder_hidden_states_t5:2x256x2048,text_embedding_mask_t5:2x256,image_meta_size:2x6,style:2,cos_cis_img:4096x88,sin_cis_img:4096x88 \ - --maxShapes=x:2x4x160x160,t:2,encoder_hidden_states:2x77x1024,text_embedding_mask:2x77,encoder_hidden_states_t5:2x256x2048,text_embedding_mask_t5:2x256,image_meta_size:2x6,style:2,cos_cis_img:6400x88,sin_cis_img:6400x88 \ - --shapes=x:2x4x128x128,t:2,encoder_hidden_states:2x77x1024,text_embedding_mask:2x77,encoder_hidden_states_t5:2x256x2048,text_embedding_mask_t5:2x256,image_meta_size:2x6,style:2,cos_cis_img:4096x88,sin_cis_img:4096x88 \ - --verbose \ - --builderOptimizationLevel=4 \ - --staticPlugins=${PLUGIN_PATH}