sophgo · fangz-ai · Dec 27, 2024 · Dec 27, 2024 · Dec 27, 2024
diff --git a/models/Megrez/README.md b/models/Megrez/README.md
@@ -0,0 +1,74 @@
+# Megrez-3B-Instruct
+
+本项目实现BM1684X部署语言大模型[Megrez-3B-Instruct](https://huggingface.co/Infinigence/Megrez-3B-Instruct)。通过[TPU-MLIR](https://github.com/sophgo/tpu-mlir)编译器将模型转换成bmodel，并采用c++代码将其部署到BM1684X的PCIE环境，或者SoC环境。
+
+## 开发环境准备
+
+#### 1. 从Huggingface下载`Megrez-3B-Instruct`
+
+``` shell
+git lfs install
+git clone https://huggingface.co/Infinigence/Megrez-3B-Instruct
+```
+
+另外需要做一些模型源码上的修改：
+* 将`compile/files`下的`modeling_llama.py`替换到transformer中的`modeling_llama.py`文件
+
+#### 2. 导出成onnx模型
+
+如果过程中提示缺少某些组件，直接`pip3 install 组件`即可
+
+``` shell
+# 导出onnx
+cd compile
+python3 export_onnx.py --model_path your_model_path
+```
+
+## 编译模型
+
+此处介绍如何将onnx模型编译成bmodel。也可以省去编译模型这一步，直接下载编译好的模型：
+
+``` shell
+python3 -m dfss [email protected]:/ext_model_information/LLM/LLM-TPU/megrez_bm1684x_int4_seq512.bmodel
+```
+
+#### 1. 下载docker，启动容器
+
+``` shell
+docker pull sophgo/tpuc_dev:latest
+
+# myname1234 is just an example, you can set your own name
+docker run --privileged --name myname1234 -v $PWD:/workspace -it sophgo/tpuc_dev:latest bash
+
+docker exec -it myname1234 bash
+```
+后文假定环境都在docker的`/workspace`目录。
+
+#### 2. 安装`TPU-MLIR`
+
+``` shell
+pip3 install tpu-mlir
+```
+
+#### 3. 编译模型生成bmodel
+
+对ONNX模型进行编译，生成模型
+
+具体请参考python_demo/README.md
+
+## 编译与运行程序
+
+编译库文件，生成`chat.cpython*.so`文件，将该文件拷贝到`pipeline.py`文件目录
+
+```
+cd python_demo
+mkdir build
+cd build && cmake .. && make && cp *cpython* .. && cd ..
+```
+
+执行程序，如下：
+
+```
+python3 pipeline.py --model_path megrez_bm1684x_int4_seq512.bmodel --tokenizer_path ../support/token_config --devid 0
+```
+model_path为实际的model储存路径；tokenizer_path为实际的tokenizer配置的储存路径
diff --git a/models/Megrez/compile/README.md b/models/Megrez/compile/README.md
@@ -0,0 +1,19 @@
+# Command
+
+## Export onnx
+
+```shell
+pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+pip install sentencepiece transformers==4.44.1
+cp file/Megrez-3B-Instruct/modeling_llama.py /your/path/to/transformers/path
+```
+
+```shell
+python3 export_onnx.py --model_path your_torch_model --seq_length 512 --device cpu
+```
+
+## Compile bmodel
+使用io_alone
+```
+./compile.sh --mode int4 --name megrez --seq_length 512
+```
diff --git a/models/Megrez/compile/compile.sh b/models/Megrez/compile/compile.sh
@@ -0,0 +1,184 @@
+#!/bin/bash
+set -ex
+models=""
+mode="int4"
+quantize_args=""
+name="megrez"
+seq_length=512
+
+chip="bm1684x"
+num_layers=32
+out_model=$name.bmodel
+
+while [[ $# -gt 0 ]]; do
+    key="$1"
+
+    case $key in
+    --mode)
+        mode="$2"
+        shift 2
+        ;;
+    --name)
+        name="$2"
+        shift 2
+        ;;
+    --seq_length)
+        seq_length="$2"
+        shift 2
+        ;;
+    *)
+        echo "Invalid option: $key" >&2
+        exit 1
+        ;;
+    :)
+        echo "Option -$OPTARG requires an argument." >&2
+        exit 1
+        ;;
+    esac
+done
+
+if [ "$name" = "megrez" ]; then
+  num_layers=32
+  hidden_size=2560
+  echo "Compile Megrez-3B-Instruct"
+else
+  echo -e "Error: Invalid name $name, the input name must be \033[31mmegrez\033[0m"
+  exit 1
+fi
+
+if [ x$mode == x"int8" ]; then
+    quantize_args="--quantize W8BF16"
+elif [ x$mode == x"bf16" ]; then
+    quantize_args="--quantize BF16"
+elif [ x$mode == x"int4" ]; then
+    quantize_args="--quantize W4BF16 --q_group_size 64"
+else
+    echo "Error, unknown quantize mode"
+    exit 1
+fi
+
+onnx_dir=$PWD/tmp/onnx
+folder='tmp/'$name'_'$chip'_'$mode
+out_model=$name'_'$chip'_'$mode'_seq'${seq_length}'.bmodel'
+
+# Convert block
+outdir=${folder}/block
+mkdir -p $outdir
+pushd $outdir
+
+process_block()
+{
+    i=$1
+
+    model_transform.py \
+        --model_name block_$i \
+        --model_def ${onnx_dir}/block_$i.onnx \
+        --mlir block_$i.mlir
+
+    model_deploy.py \
+        --mlir block_$i.mlir \
+        $quantize_args \
+        --quant_input \
+        --quant_output \
+        --chip ${chip} \
+        $device_args \
+        --model block_$i.bmodel
+
+    model_transform.py \
+        --model_name block_cache_$i \
+        --model_def ${onnx_dir}/block_cache_$i.onnx \
+        --mlir block_cache_$i.mlir
+
+    model_deploy.py \
+        --mlir block_cache_$i.mlir \
+        $quantize_args \
+        --quant_input \
+        --quant_output \
+        --chip ${chip} \
+        $device_args \
+        --addr_mode io_alone \
+        --model block_cache_$i.bmodel
+
+    rm -f *.npz
+}
+
+# Process each block
+for ((i=0; i<$num_layers; i++)); do
+    process_block $i
+    models="${models}${outdir}/block_${i}.bmodel ${outdir}/block_cache_${i}.bmodel "
+done
+
+popd
+echo $models
+
+# convert embedding
+outdir=${folder}/embedding
+mkdir -p $outdir
+pushd $outdir
+
+model_transform.py \
+    --model_name embedding \
+    --model_def ${onnx_dir}/embedding.pt \
+    --input_shapes "[[1,$seq_length]]" \
+    --input_types "int32" \
+    --mlir embedding.mlir
+
+model_deploy.py \
+    --mlir embedding.mlir \
+    --quantize BF16 \
+    --quant_input \
+    --quant_output \
+    --chip ${chip} \
+    $device_args \
+    --model embedding.bmodel
+
+model_transform.py \
+    --model_name embedding_cache \
+    --model_def ${onnx_dir}/embedding.pt \
+    --input_shapes "[[1,1]]" \
+    --input_types "int32" \
+    --mlir embedding_cache.mlir
+
+model_deploy.py \
+    --mlir embedding_cache.mlir \
+    --quantize BF16 \
+    --quant_input \
+    --quant_output \
+    --chip ${chip} \
+    $device_args \
+    --model embedding_cache.bmodel
+
+rm -f *.npz
+
+models=$models' '$outdir'/embedding.bmodel '$outdir'/embedding_cache.bmodel '
+
+popd
+echo $models
+
+# convert lm_head
+outdir=${folder}/lm_head
+mkdir -p $outdir
+pushd $outdir
+
+model_transform.py \
+    --model_name lm_head \
+    --model_def ${onnx_dir}/lm_head.pt \
+    --input_shapes "[[1,${hidden_size}]]" \
+    --mlir lm_head.mlir
+
+model_deploy.py \
+    --mlir lm_head.mlir \
+    $quantize_args \
+    --quant_input \
+    --chip ${chip} \
+    $device_args \
+    --model lm_head.bmodel
+
+rm -f *.npz
+
+models=${models}${outdir}'/lm_head.bmodel '
+popd
+
+echo $models
+
+model_tool --combine $models -o $out_model