From b467d4ade64ba99810b554bfa47655958b9477b2 Mon Sep 17 00:00:00 2001 From: Andre Slavescu <51034490+AndreSlavescu@users.noreply.github.com> Date: Mon, 5 Aug 2024 11:27:00 -0700 Subject: [PATCH] Update onnx docs (#2561) --- docs/onnx-conversion.md | 2 +- src/main/python/onnx/convert_hf_model_to_onnx.py | 11 +++++++++-- src/main/python/onnx/optimize_onnx_model.py | 2 +- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/docs/onnx-conversion.md b/docs/onnx-conversion.md index dbbf16bf0a..f881534a03 100644 --- a/docs/onnx-conversion.md +++ b/docs/onnx-conversion.md @@ -328,7 +328,7 @@ First, we want to store the newly generated models in the ```~/.cache/anserini/e ```bash cd src/main/python/onnx/models -cp splade-cocondenser-ensembledistil-optimized.onnx ~/.cache/anserini/encoders/ +cp splade-cocondenser-ensembledistil-optimized.onnx splade-cocondenser-ensembledistil-vocab.txt ~/.cache/anserini/encoders/ ``` Second, now run the end to end regression as seen in the previously mentioned documentation with the generated ONNX model. \ No newline at end of file diff --git a/src/main/python/onnx/convert_hf_model_to_onnx.py b/src/main/python/onnx/convert_hf_model_to_onnx.py index 8368660c30..ca8276d120 100644 --- a/src/main/python/onnx/convert_hf_model_to_onnx.py +++ b/src/main/python/onnx/convert_hf_model_to_onnx.py @@ -29,7 +29,7 @@ def get_dynamic_axes(input_names, output_names): dynamic_axes[name] = {0: 'batch_size', 1: 'sequence'} return dynamic_axes -def convert_model_to_onnx(text, model, tokenizer, onnx_path, device): +def convert_model_to_onnx(text, model, tokenizer, onnx_path, vocab_path, device): print(model) # this prints the model structure for better understanding (optional) model.eval() @@ -70,6 +70,12 @@ def convert_model_to_onnx(text, model, tokenizer, onnx_path, device): onnx.checker.check_model(onnx_model) print("ONNX model checked successfully") + vocab = tokenizer.get_vocab() + with open(vocab_path, 'w', encoding='utf-8') as f: + for token, index in sorted(vocab.items(), key=lambda x: x[1]): + f.write(f"{token}\n") + print(f"Vocabulary saved to {vocab_path}") + # small inference session for testing ort_session = onnxruntime.InferenceSession(onnx_path) ort_inputs = {k: v.cpu().numpy() for k, v in test_input.items()} @@ -89,5 +95,6 @@ def convert_model_to_onnx(text, model, tokenizer, onnx_path, device): os.makedirs("models", exist_ok=True) onnx_path = f"models/{model_prefix}.onnx" + vocab_path = f"models/{model_prefix}-vocab.txt" - convert_model_to_onnx(args.text, model, tokenizer, onnx_path, device=device) + convert_model_to_onnx(args.text, model, tokenizer, onnx_path, vocab_path, device=device) diff --git a/src/main/python/onnx/optimize_onnx_model.py b/src/main/python/onnx/optimize_onnx_model.py index 075b01516f..6b331c5f78 100644 --- a/src/main/python/onnx/optimize_onnx_model.py +++ b/src/main/python/onnx/optimize_onnx_model.py @@ -26,7 +26,7 @@ def optimize_onnx_model(model_path, print_stats=False): # optimized_model.convert_float_to_float16() # Save the optimized model - model_name = model_path.split(".")[0] + model_name = model_path.rsplit(".onnx", 1)[0] optimized_model_path = f'{model_name}-optimized.onnx' optimized_model.save_model_to_file(optimized_model_path) print(f"ONNX model optimization successful. Saved to {optimized_model_path}")