Skip to content

Commit

Permalink
Deprecate ORTModel class (#1939)
Browse files Browse the repository at this point in the history
* deprecate and create alternative

* update optimization examples

* update quant examples

* fix
  • Loading branch information
IlyasMoutawwakil authored Jul 8, 2024
1 parent 6e2e564 commit 4aa0c14
Show file tree
Hide file tree
Showing 15 changed files with 165 additions and 107 deletions.
19 changes: 11 additions & 8 deletions examples/onnxruntime/optimization/multiple-choice/run_swag.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

from optimum.onnxruntime import ORTModelForMultipleChoice, ORTOptimizer
from optimum.onnxruntime.configuration import OptimizationConfig
from optimum.onnxruntime.model import ORTModel
from optimum.onnxruntime.utils import evaluation_loop


# Will error if the minimal version of Transformers is not installed. The version of transformers must be >= 4.19.0
Expand Down Expand Up @@ -236,7 +236,6 @@ def main():
)

os.makedirs(training_args.output_dir, exist_ok=True)
optimized_model_path = os.path.join(training_args.output_dir, "model_optimized.onnx")

tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name or model_args.model_name_or_path)

Expand All @@ -254,13 +253,18 @@ def main():
optimizer = ORTOptimizer.from_pretrained(model)

# Optimize the model
optimizer.optimize(
optimized_model_path = optimizer.optimize(
optimization_config=optimization_config,
save_dir=training_args.output_dir,
use_external_data_format=onnx_export_args.use_external_data_format,
one_external_file=onnx_export_args.one_external_file,
)

model = ORTModelForMultipleChoice.from_pretrained(
optimized_model_path,
provider=optim_args.execution_provider,
)

if training_args.do_eval:
# Prepare the dataset downloading, preprocessing and metric creation to perform the evaluation and / or the
# prediction step(s)
Expand Down Expand Up @@ -339,13 +343,12 @@ def compute_metrics(eval_predictions):
# Evaluation
logger.info("*** Evaluate ***")

ort_model = ORTModel(
optimized_model_path,
execution_provider=optim_args.execution_provider,
compute_metrics=compute_metrics,
outputs = evaluation_loop(
model=model,
dataset=eval_dataset,
label_names=["label"],
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(eval_dataset)

# Save evaluation metrics
with open(os.path.join(training_args.output_dir, "eval_results.json"), "w") as f:
Expand Down
24 changes: 12 additions & 12 deletions examples/onnxruntime/optimization/question-answering/run_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

from optimum.onnxruntime import ORTModelForQuestionAnswering, ORTOptimizer
from optimum.onnxruntime.configuration import OptimizationConfig
from optimum.onnxruntime.model import ORTModel
from optimum.onnxruntime.utils import evaluation_loop


# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
Expand Down Expand Up @@ -305,7 +305,6 @@ def main():
)

os.makedirs(training_args.output_dir, exist_ok=True)
optimized_model_path = os.path.join(training_args.output_dir, "model_optimized.onnx")

tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name or model_args.model_name_or_path)

Expand All @@ -323,13 +322,15 @@ def main():
optimizer = ORTOptimizer.from_pretrained(model)

# Optimize the model
optimizer.optimize(
optimized_model_path = optimizer.optimize(
optimization_config=optimization_config,
save_dir=training_args.output_dir,
use_external_data_format=onnx_export_args.use_external_data_format,
one_external_file=onnx_export_args.one_external_file,
)

model = ORTModelForQuestionAnswering.from_pretrained(optimized_model_path, provider=optim_args.execution_provider)

# Prepare the dataset downloading, preprocessing and metric creation to perform the evaluation and / or the
# prediction step(s)
if training_args.do_eval or training_args.do_predict:
Expand Down Expand Up @@ -478,13 +479,12 @@ def compute_metrics(p: EvalPrediction):
# During Feature creation dataset samples might increase, we will select required samples again
eval_dataset = eval_dataset.select(range(data_args.max_eval_samples))

ort_model = ORTModel(
optimized_model_path,
execution_provider=optim_args.execution_provider,
compute_metrics=compute_metrics,
outputs = evaluation_loop(
model=model,
dataset=eval_dataset,
label_names=["start_positions", "end_positions"],
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(eval_dataset)
predictions = post_processing_function(eval_examples, eval_dataset, outputs.predictions)
metrics = compute_metrics(predictions)

Expand Down Expand Up @@ -514,12 +514,12 @@ def compute_metrics(p: EvalPrediction):
# During Feature creation dataset samples might increase, we will select required samples again
predict_dataset = predict_dataset.select(range(data_args.max_predict_samples))

ort_model = ORTModel(
optimized_model_path,
execution_provider=optim_args.execution_provider,
outputs = evaluation_loop(
model=model,
dataset=eval_dataset,
label_names=["start_positions", "end_positions"],
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(predict_dataset)
predictions = post_processing_function(predict_examples, predict_dataset, outputs.predictions)
metrics = compute_metrics(predictions)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,11 @@ See the License for the specific language governing permissions and
limitations under the License.
-->

# Text classification
# Text classification

## GLUE tasks

The script [`run_glue.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/optimization/text-classification/run_glue.py)
allows us to apply graph optimizations and fusion using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for sequence classification tasks such as
the ones from the [GLUE benchmark](https://gluebenchmark.com/).
The script [`run_glue.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/optimization/text-classification/run_glue.py) allows us to apply graph optimizations and fusion using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for sequence classification tasks such as the ones from the [GLUE benchmark](https://gluebenchmark.com/).

The following example applies graph optimization on a DistilBERT fine-tuned on the sst-2 task. Here the optimization level is selected to be 1, enabling basic optimizations such as redundant node eliminations and constant folding. Higher optimization level will result in hardware dependent optimized graph.

Expand Down
25 changes: 15 additions & 10 deletions examples/onnxruntime/optimization/text-classification/run_glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@

from optimum.onnxruntime import ORTModelForSequenceClassification, ORTOptimizer
from optimum.onnxruntime.configuration import OptimizationConfig
from optimum.onnxruntime.model import ORTModel
from optimum.onnxruntime.utils import evaluation_loop


# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
Expand Down Expand Up @@ -250,7 +250,6 @@ def main():
)

os.makedirs(training_args.output_dir, exist_ok=True)
optimized_model_path = os.path.join(training_args.output_dir, "model_optimized.onnx")

tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path)

Expand All @@ -268,13 +267,17 @@ def main():
optimizer = ORTOptimizer.from_pretrained(model)

# Optimize the model
optimizer.optimize(
optimized_model_path = optimizer.optimize(
optimization_config=optimization_config,
save_dir=training_args.output_dir,
use_external_data_format=onnx_export_args.use_external_data_format,
one_external_file=onnx_export_args.one_external_file,
)

model = ORTModelForSequenceClassification.from_pretrained(
optimized_model_path, provider=optim_args.execution_provider
)

# Prepare the dataset downloading, preprocessing and metric creation to perform the evaluation and / or the
# prediction step(s)
if training_args.do_eval or training_args.do_predict:
Expand Down Expand Up @@ -408,13 +411,13 @@ def compute_metrics(p: EvalPrediction):
desc="Running tokenizer on the evaluation dataset",
)

ort_model = ORTModel(
optimized_model_path,
execution_provider=optim_args.execution_provider,
outputs = evaluation_loop(
model=model,
eval_dataset=eval_dataset,
compute_metrics=compute_metrics,
label_names=["label"],
)
outputs = ort_model.evaluation_loop(eval_dataset)

# Save metrics
with open(os.path.join(training_args.output_dir, "eval_results.json"), "w") as f:
json.dump(outputs.metrics, f, indent=4, sort_keys=True)
Expand All @@ -436,10 +439,12 @@ def compute_metrics(p: EvalPrediction):
desc="Running tokenizer on the test dataset",
)

ort_model = ORTModel(
optimized_model_path, execution_provider=optim_args.execution_provider, label_names=["label"]
outputs = evaluation_loop(
model=model,
eval_dataset=eval_dataset,
compute_metrics=compute_metrics,
label_names=["label"],
)
outputs = ort_model.evaluation_loop(predict_dataset)
predictions = np.squeeze(outputs.predictions) if is_regression else np.argmax(outputs.predictions, axis=1)

# Save predictions
Expand Down
17 changes: 7 additions & 10 deletions examples/onnxruntime/optimization/token-classification/run_ner.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

from optimum.onnxruntime import ORTModelForTokenClassification, ORTOptimizer
from optimum.onnxruntime.configuration import OptimizationConfig
from optimum.onnxruntime.model import ORTModel
from optimum.onnxruntime.utils import evaluation_loop


# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
Expand Down Expand Up @@ -276,7 +276,6 @@ def main():
)

os.makedirs(training_args.output_dir, exist_ok=True)
optimized_model_path = os.path.join(training_args.output_dir, "model_optimized.onnx")

tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name or model_args.model_name_or_path)

Expand Down Expand Up @@ -480,12 +479,11 @@ def compute_metrics(p):
desc="Running tokenizer on the validation dataset",
)

ort_model = ORTModel(
optimized_model_path,
execution_provider=optim_args.execution_provider,
outputs = evaluation_loop(
model=model,
dataset=eval_dataset,
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(eval_dataset)

# Save evaluation metrics
with open(os.path.join(training_args.output_dir, "eval_results.json"), "w") as f:
Expand All @@ -509,12 +507,11 @@ def compute_metrics(p):
desc="Running tokenizer on the prediction dataset",
)

ort_model = ORTModel(
optimized_model_path,
execution_provider=optim_args.execution_provider,
outputs = evaluation_loop(
model=model,
dataset=predict_dataset,
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(predict_dataset)
predictions = np.argmax(outputs.predictions, axis=2)

# Remove ignored index (special tokens)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import sys
from dataclasses import dataclass, field
from functools import partial
from pathlib import Path
from typing import Optional

import datasets
Expand All @@ -38,7 +37,6 @@

from optimum.onnxruntime import ORTQuantizer
from optimum.onnxruntime.configuration import AutoCalibrationConfig, QuantizationConfig
from optimum.onnxruntime.model import ORTModel
from optimum.onnxruntime.modeling_ort import ORTModelForImageClassification
from optimum.onnxruntime.preprocessors import QuantizationPreprocessor
from optimum.onnxruntime.preprocessors.passes import (
Expand All @@ -47,6 +45,7 @@
ExcludeNodeAfter,
ExcludeNodeFollowedBy,
)
from optimum.onnxruntime.utils import evaluation_loop


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -378,13 +377,16 @@ def compute_metrics(p: EvalPrediction):
quantization_preprocessor.register_pass(ExcludeNodeFollowedBy("Add", "Softmax"))

# Apply quantization on the model
quantizer.quantize(
quantized_model_path = quantizer.quantize(
save_dir=training_args.output_dir,
calibration_tensors_range=ranges,
quantization_config=qconfig,
preprocessor=quantization_preprocessor,
use_external_data_format=onnx_export_args.use_external_data_format,
)
model = ORTModelForImageClassification.from_pretrained(
quantized_model_path, provider=optim_args.execution_provider
)

# Evaluation
if training_args.do_eval:
Expand All @@ -409,13 +411,12 @@ def compute_metrics(p: EvalPrediction):
# Set the validation transforms
eval_dataset = eval_dataset.with_transform(preprocess_function)

ort_model = ORTModel(
Path(training_args.output_dir) / "model_quantized.onnx",
execution_provider=optim_args.execution_provider,
compute_metrics=compute_metrics,
outputs = evaluation_loop(
model=model,
dataset=eval_dataset,
label_names=[labels_column],
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(eval_dataset)
# Save metrics
with open(os.path.join(training_args.output_dir, "eval_results.json"), "w") as f:
json.dump(outputs.metrics, f, indent=4, sort_keys=True)
Expand Down
14 changes: 7 additions & 7 deletions examples/onnxruntime/quantization/multiple-choice/run_swag.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,14 @@

from optimum.onnxruntime import ORTModelForMultipleChoice, ORTQuantizer
from optimum.onnxruntime.configuration import AutoCalibrationConfig, QuantizationConfig
from optimum.onnxruntime.model import ORTModel
from optimum.onnxruntime.preprocessors import QuantizationPreprocessor
from optimum.onnxruntime.preprocessors.passes import (
ExcludeGeLUNodes,
ExcludeLayerNormNodes,
ExcludeNodeAfter,
ExcludeNodeFollowedBy,
)
from optimum.onnxruntime.utils import evaluation_loop


# Will error if the minimal version of Transformers is not installed. The version of transformers must be >= 4.19.0
Expand Down Expand Up @@ -409,13 +409,14 @@ def compute_metrics(eval_predictions):
quantization_preprocessor.register_pass(ExcludeNodeFollowedBy("Add", "Softmax"))

# Apply quantization on the model
quantizer.quantize(
quantized_model_path = quantizer.quantize(
save_dir=training_args.output_dir,
calibration_tensors_range=ranges,
quantization_config=qconfig,
preprocessor=quantization_preprocessor,
use_external_data_format=onnx_export_args.use_external_data_format,
)
model = ORTModelForMultipleChoice.from_pretrained(quantized_model_path, provider=optim_args.execution_provider)

# Evaluation
if training_args.do_eval:
Expand All @@ -436,13 +437,12 @@ def compute_metrics(eval_predictions):
load_from_cache_file=not data_args.overwrite_cache,
)

ort_model = ORTModel(
os.path.join(training_args.output_dir, "model_quantized.onnx"),
execution_provider=optim_args.execution_provider,
compute_metrics=compute_metrics,
outputs = evaluation_loop(
model=model,
dataset=eval_dataset,
label_names=["label"],
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(eval_dataset)

# Save evaluation metrics
with open(os.path.join(training_args.output_dir, "eval_results.json"), "w") as f:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,9 @@ limitations under the License.

# Question answering

The script [`run_qa.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/quantization/question-answering/run_qa.py) allows us to apply different quantization approaches (such as dynamic and static quantization) as well as graph optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for question answering tasks.

The script [`run_qa.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/quantization/question-answering/run_qa.py)
allows us to apply different quantization approaches (such as dynamic and static quantization) as well as graph
optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for question answering tasks.

Note that if your dataset contains samples with no possible answers (like SQuAD version 2), you need to pass along
the flag `--version_2_with_negative`.
Note that if your dataset contains samples with no possible answers (like SQuAD version 2), you need to pass along the flag `--version_2_with_negative`.

The following example applies post-training dynamic quantization on a DistilBERT fine-tuned on the SQuAD1.0 dataset.

Expand Down
Loading

0 comments on commit 4aa0c14

Please sign in to comment.