From da3442dd3267f80196e66c27ad94a85b47fe49fa Mon Sep 17 00:00:00 2001 From: "Wang, Mengni" Date: Mon, 13 Nov 2023 13:07:45 +0800 Subject: [PATCH] Fix ONNXRT example with upgraded optimum 1.14.0 (#1381) Signed-off-by: Mengni Wang Signed-off-by: yuwenzho --- .../quantization/ptq_static/prepare_model.py | 48 +++++++++++----- .../llama/quantization/weight_only/README.md | 2 +- .../quantization/weight_only/prepare_model.py | 55 +++++++++++++++++++ .../test_weight_only_adaptor.py | 4 +- test/model/test_onnx_model.py | 6 +- 5 files changed, 97 insertions(+), 18 deletions(-) create mode 100644 examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/prepare_model.py diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/prepare_model.py b/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/prepare_model.py index b951a19bc23..92b423f351a 100644 --- a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/prepare_model.py +++ b/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/prepare_model.py @@ -1,6 +1,9 @@ import argparse import os import subprocess +import optimum.version +from packaging.version import Version +OPTIMUM114_VERSION = Version("1.14.0") def parse_arguments(): @@ -12,20 +15,37 @@ def parse_arguments(): def prepare_model(input_model, output_model): print("\nexport model...") - subprocess.run( - [ - "optimum-cli", - "export", - "onnx", - "--model", - f"{input_model}", - "--task", - "text-generation-with-past", - f"{output_model}", - ], - stdout=subprocess.PIPE, - text=True, - ) + if Version(optimum.version.__version__) >= OPTIMUM114_VERSION: + subprocess.run( + [ + "optimum-cli", + "export", + "onnx", + "--model", + f"{input_model}", + "--task", + "text-generation-with-past", + "--legacy", + f"{output_model}", + ], + stdout=subprocess.PIPE, + text=True, + ) + else: + subprocess.run( + [ + "optimum-cli", + "export", + "onnx", + "--model", + f"{input_model}", + "--task", + "text-generation-with-past", + f"{output_model}", + ], + stdout=subprocess.PIPE, + text=True, + ) assert os.path.exists(output_model), f"{output_model} doesn't exist!" diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/README.md b/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/README.md index e15d0e3c703..68227fcffd4 100644 --- a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/README.md +++ b/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/README.md @@ -17,7 +17,7 @@ pip install -r requirements.txt ## 2. Prepare Model ```bash -optimum-cli export onnx --model decapoda-research/llama-7b-hf --task text-generation-with-past ./llama_7b +python prepare_model.py --input_model="decapoda-research/llama-7b-hf" --output_model="./llama_7b" ``` # Run diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/prepare_model.py b/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/prepare_model.py new file mode 100644 index 00000000000..92b423f351a --- /dev/null +++ b/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/prepare_model.py @@ -0,0 +1,55 @@ +import argparse +import os +import subprocess +import optimum.version +from packaging.version import Version +OPTIMUM114_VERSION = Version("1.14.0") + + +def parse_arguments(): + parser = argparse.ArgumentParser() + parser.add_argument("--input_model", type=str, required=False, default="") + parser.add_argument("--output_model", type=str, required=True) + return parser.parse_args() + + +def prepare_model(input_model, output_model): + print("\nexport model...") + if Version(optimum.version.__version__) >= OPTIMUM114_VERSION: + subprocess.run( + [ + "optimum-cli", + "export", + "onnx", + "--model", + f"{input_model}", + "--task", + "text-generation-with-past", + "--legacy", + f"{output_model}", + ], + stdout=subprocess.PIPE, + text=True, + ) + else: + subprocess.run( + [ + "optimum-cli", + "export", + "onnx", + "--model", + f"{input_model}", + "--task", + "text-generation-with-past", + f"{output_model}", + ], + stdout=subprocess.PIPE, + text=True, + ) + + assert os.path.exists(output_model), f"{output_model} doesn't exist!" + + +if __name__ == "__main__": + args = parse_arguments() + prepare_model(args.input_model, args.output_model) diff --git a/test/adaptor/onnxrt_adaptor/test_weight_only_adaptor.py b/test/adaptor/onnxrt_adaptor/test_weight_only_adaptor.py index 6d2201df104..6da0152ded8 100644 --- a/test/adaptor/onnxrt_adaptor/test_weight_only_adaptor.py +++ b/test/adaptor/onnxrt_adaptor/test_weight_only_adaptor.py @@ -38,7 +38,9 @@ def __iter__(self): class TestWeightOnlyAdaptor(unittest.TestCase): @classmethod def setUpClass(self): - cmd = "optimum-cli export onnx --model hf-internal-testing/tiny-random-gptj --task text-generation gptj/" + cmd = ( + "optimum-cli export onnx --model hf-internal-testing/tiny-random-gptj --task text-generation --legacy gptj/" + ) p = subprocess.Popen( cmd, preexec_fn=os.setsid, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True ) # nosec diff --git a/test/model/test_onnx_model.py b/test/model/test_onnx_model.py index 4c891781c35..cb4e1a6c704 100644 --- a/test/model/test_onnx_model.py +++ b/test/model/test_onnx_model.py @@ -193,7 +193,9 @@ def setUp(self): model = onnx.helper.make_model(graph, **{"opset_imports": [onnx.helper.make_opsetid("", 14)]}) self.matmul_reshape_model = model - cmd = "optimum-cli export onnx --model hf-internal-testing/tiny-random-gptj --task text-generation gptj/" + cmd = ( + "optimum-cli export onnx --model hf-internal-testing/tiny-random-gptj --task text-generation --legacy gptj/" + ) p = subprocess.Popen( cmd, preexec_fn=os.setsid, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True ) # nosec @@ -216,7 +218,7 @@ def test_hf_model(self): config = AutoConfig.from_pretrained("hf_test") sessions = ORTModelForCausalLM.load_model("hf_test/decoder_model.onnx") - model = ORTModelForCausalLM(sessions[0], config, "hf_test", use_cache=False, use_io_binding=False) + model = ORTModelForCausalLM(sessions, config, model_save_dir="hf_test", use_cache=False, use_io_binding=False) self.assertNotEqual(model, None) def test_nodes(self):