Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprecate ORTModel class #1939

Merged
merged 4 commits into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions examples/onnxruntime/optimization/multiple-choice/run_swag.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

from optimum.onnxruntime import ORTModelForMultipleChoice, ORTOptimizer
from optimum.onnxruntime.configuration import OptimizationConfig
from optimum.onnxruntime.model import ORTModel
from optimum.onnxruntime.utils import evaluation_loop


# Will error if the minimal version of Transformers is not installed. The version of transformers must be >= 4.19.0
Expand Down Expand Up @@ -236,7 +236,6 @@ def main():
)

os.makedirs(training_args.output_dir, exist_ok=True)
optimized_model_path = os.path.join(training_args.output_dir, "model_optimized.onnx")

tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name or model_args.model_name_or_path)

Expand All @@ -254,13 +253,18 @@ def main():
optimizer = ORTOptimizer.from_pretrained(model)

# Optimize the model
optimizer.optimize(
optimized_model_path = optimizer.optimize(
optimization_config=optimization_config,
save_dir=training_args.output_dir,
use_external_data_format=onnx_export_args.use_external_data_format,
one_external_file=onnx_export_args.one_external_file,
)

model = ORTModelForMultipleChoice.from_pretrained(
optimized_model_path,
provider=optim_args.execution_provider,
)

if training_args.do_eval:
# Prepare the dataset downloading, preprocessing and metric creation to perform the evaluation and / or the
# prediction step(s)
Expand Down Expand Up @@ -339,13 +343,12 @@ def compute_metrics(eval_predictions):
# Evaluation
logger.info("*** Evaluate ***")

ort_model = ORTModel(
optimized_model_path,
execution_provider=optim_args.execution_provider,
compute_metrics=compute_metrics,
outputs = evaluation_loop(
model=model,
dataset=eval_dataset,
label_names=["label"],
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(eval_dataset)

# Save evaluation metrics
with open(os.path.join(training_args.output_dir, "eval_results.json"), "w") as f:
Expand Down
24 changes: 12 additions & 12 deletions examples/onnxruntime/optimization/question-answering/run_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

from optimum.onnxruntime import ORTModelForQuestionAnswering, ORTOptimizer
from optimum.onnxruntime.configuration import OptimizationConfig
from optimum.onnxruntime.model import ORTModel
from optimum.onnxruntime.utils import evaluation_loop


# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
Expand Down Expand Up @@ -305,7 +305,6 @@ def main():
)

os.makedirs(training_args.output_dir, exist_ok=True)
optimized_model_path = os.path.join(training_args.output_dir, "model_optimized.onnx")

tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name or model_args.model_name_or_path)

Expand All @@ -323,13 +322,15 @@ def main():
optimizer = ORTOptimizer.from_pretrained(model)

# Optimize the model
optimizer.optimize(
optimized_model_path = optimizer.optimize(
optimization_config=optimization_config,
save_dir=training_args.output_dir,
use_external_data_format=onnx_export_args.use_external_data_format,
one_external_file=onnx_export_args.one_external_file,
)

model = ORTModelForQuestionAnswering.from_pretrained(optimized_model_path, provider=optim_args.execution_provider)

# Prepare the dataset downloading, preprocessing and metric creation to perform the evaluation and / or the
# prediction step(s)
if training_args.do_eval or training_args.do_predict:
Expand Down Expand Up @@ -478,13 +479,12 @@ def compute_metrics(p: EvalPrediction):
# During Feature creation dataset samples might increase, we will select required samples again
eval_dataset = eval_dataset.select(range(data_args.max_eval_samples))

ort_model = ORTModel(
optimized_model_path,
execution_provider=optim_args.execution_provider,
compute_metrics=compute_metrics,
outputs = evaluation_loop(
model=model,
dataset=eval_dataset,
label_names=["start_positions", "end_positions"],
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(eval_dataset)
predictions = post_processing_function(eval_examples, eval_dataset, outputs.predictions)
metrics = compute_metrics(predictions)

Expand Down Expand Up @@ -514,12 +514,12 @@ def compute_metrics(p: EvalPrediction):
# During Feature creation dataset samples might increase, we will select required samples again
predict_dataset = predict_dataset.select(range(data_args.max_predict_samples))

ort_model = ORTModel(
optimized_model_path,
execution_provider=optim_args.execution_provider,
outputs = evaluation_loop(
model=model,
dataset=eval_dataset,
label_names=["start_positions", "end_positions"],
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(predict_dataset)
predictions = post_processing_function(predict_examples, predict_dataset, outputs.predictions)
metrics = compute_metrics(predictions)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,11 @@ See the License for the specific language governing permissions and
limitations under the License.
-->

# Text classification
# Text classification

## GLUE tasks

The script [`run_glue.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/optimization/text-classification/run_glue.py)
allows us to apply graph optimizations and fusion using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for sequence classification tasks such as
the ones from the [GLUE benchmark](https://gluebenchmark.com/).
The script [`run_glue.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/optimization/text-classification/run_glue.py) allows us to apply graph optimizations and fusion using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for sequence classification tasks such as the ones from the [GLUE benchmark](https://gluebenchmark.com/).

The following example applies graph optimization on a DistilBERT fine-tuned on the sst-2 task. Here the optimization level is selected to be 1, enabling basic optimizations such as redundant node eliminations and constant folding. Higher optimization level will result in hardware dependent optimized graph.

Expand Down
25 changes: 15 additions & 10 deletions examples/onnxruntime/optimization/text-classification/run_glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@

from optimum.onnxruntime import ORTModelForSequenceClassification, ORTOptimizer
from optimum.onnxruntime.configuration import OptimizationConfig
from optimum.onnxruntime.model import ORTModel
from optimum.onnxruntime.utils import evaluation_loop


# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
Expand Down Expand Up @@ -250,7 +250,6 @@ def main():
)

os.makedirs(training_args.output_dir, exist_ok=True)
optimized_model_path = os.path.join(training_args.output_dir, "model_optimized.onnx")

tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path)

Expand All @@ -268,13 +267,17 @@ def main():
optimizer = ORTOptimizer.from_pretrained(model)

# Optimize the model
optimizer.optimize(
optimized_model_path = optimizer.optimize(
optimization_config=optimization_config,
save_dir=training_args.output_dir,
use_external_data_format=onnx_export_args.use_external_data_format,
one_external_file=onnx_export_args.one_external_file,
)

model = ORTModelForSequenceClassification.from_pretrained(
optimized_model_path, provider=optim_args.execution_provider
)

# Prepare the dataset downloading, preprocessing and metric creation to perform the evaluation and / or the
# prediction step(s)
if training_args.do_eval or training_args.do_predict:
Expand Down Expand Up @@ -408,13 +411,13 @@ def compute_metrics(p: EvalPrediction):
desc="Running tokenizer on the evaluation dataset",
)

ort_model = ORTModel(
optimized_model_path,
execution_provider=optim_args.execution_provider,
outputs = evaluation_loop(
model=model,
eval_dataset=eval_dataset,
compute_metrics=compute_metrics,
label_names=["label"],
)
outputs = ort_model.evaluation_loop(eval_dataset)

# Save metrics
with open(os.path.join(training_args.output_dir, "eval_results.json"), "w") as f:
json.dump(outputs.metrics, f, indent=4, sort_keys=True)
Expand All @@ -436,10 +439,12 @@ def compute_metrics(p: EvalPrediction):
desc="Running tokenizer on the test dataset",
)

ort_model = ORTModel(
optimized_model_path, execution_provider=optim_args.execution_provider, label_names=["label"]
outputs = evaluation_loop(
model=model,
eval_dataset=eval_dataset,
compute_metrics=compute_metrics,
label_names=["label"],
)
outputs = ort_model.evaluation_loop(predict_dataset)
predictions = np.squeeze(outputs.predictions) if is_regression else np.argmax(outputs.predictions, axis=1)

# Save predictions
Expand Down
17 changes: 7 additions & 10 deletions examples/onnxruntime/optimization/token-classification/run_ner.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

from optimum.onnxruntime import ORTModelForTokenClassification, ORTOptimizer
from optimum.onnxruntime.configuration import OptimizationConfig
from optimum.onnxruntime.model import ORTModel
from optimum.onnxruntime.utils import evaluation_loop


# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
Expand Down Expand Up @@ -276,7 +276,6 @@ def main():
)

os.makedirs(training_args.output_dir, exist_ok=True)
optimized_model_path = os.path.join(training_args.output_dir, "model_optimized.onnx")

tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name or model_args.model_name_or_path)

Expand Down Expand Up @@ -480,12 +479,11 @@ def compute_metrics(p):
desc="Running tokenizer on the validation dataset",
)

ort_model = ORTModel(
optimized_model_path,
execution_provider=optim_args.execution_provider,
outputs = evaluation_loop(
model=model,
dataset=eval_dataset,
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(eval_dataset)

# Save evaluation metrics
with open(os.path.join(training_args.output_dir, "eval_results.json"), "w") as f:
Expand All @@ -509,12 +507,11 @@ def compute_metrics(p):
desc="Running tokenizer on the prediction dataset",
)

ort_model = ORTModel(
optimized_model_path,
execution_provider=optim_args.execution_provider,
outputs = evaluation_loop(
model=model,
dataset=predict_dataset,
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(predict_dataset)
predictions = np.argmax(outputs.predictions, axis=2)

# Remove ignored index (special tokens)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import sys
from dataclasses import dataclass, field
from functools import partial
from pathlib import Path
from typing import Optional

import datasets
Expand All @@ -38,7 +37,6 @@

from optimum.onnxruntime import ORTQuantizer
from optimum.onnxruntime.configuration import AutoCalibrationConfig, QuantizationConfig
from optimum.onnxruntime.model import ORTModel
from optimum.onnxruntime.modeling_ort import ORTModelForImageClassification
from optimum.onnxruntime.preprocessors import QuantizationPreprocessor
from optimum.onnxruntime.preprocessors.passes import (
Expand All @@ -47,6 +45,7 @@
ExcludeNodeAfter,
ExcludeNodeFollowedBy,
)
from optimum.onnxruntime.utils import evaluation_loop


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -378,13 +377,16 @@ def compute_metrics(p: EvalPrediction):
quantization_preprocessor.register_pass(ExcludeNodeFollowedBy("Add", "Softmax"))

# Apply quantization on the model
quantizer.quantize(
quantized_model_path = quantizer.quantize(
save_dir=training_args.output_dir,
calibration_tensors_range=ranges,
quantization_config=qconfig,
preprocessor=quantization_preprocessor,
use_external_data_format=onnx_export_args.use_external_data_format,
)
model = ORTModelForImageClassification.from_pretrained(
quantized_model_path, provider=optim_args.execution_provider
)

# Evaluation
if training_args.do_eval:
Expand All @@ -409,13 +411,12 @@ def compute_metrics(p: EvalPrediction):
# Set the validation transforms
eval_dataset = eval_dataset.with_transform(preprocess_function)

ort_model = ORTModel(
Path(training_args.output_dir) / "model_quantized.onnx",
execution_provider=optim_args.execution_provider,
compute_metrics=compute_metrics,
outputs = evaluation_loop(
model=model,
dataset=eval_dataset,
label_names=[labels_column],
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(eval_dataset)
# Save metrics
with open(os.path.join(training_args.output_dir, "eval_results.json"), "w") as f:
json.dump(outputs.metrics, f, indent=4, sort_keys=True)
Expand Down
14 changes: 7 additions & 7 deletions examples/onnxruntime/quantization/multiple-choice/run_swag.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,14 @@

from optimum.onnxruntime import ORTModelForMultipleChoice, ORTQuantizer
from optimum.onnxruntime.configuration import AutoCalibrationConfig, QuantizationConfig
from optimum.onnxruntime.model import ORTModel
from optimum.onnxruntime.preprocessors import QuantizationPreprocessor
from optimum.onnxruntime.preprocessors.passes import (
ExcludeGeLUNodes,
ExcludeLayerNormNodes,
ExcludeNodeAfter,
ExcludeNodeFollowedBy,
)
from optimum.onnxruntime.utils import evaluation_loop


# Will error if the minimal version of Transformers is not installed. The version of transformers must be >= 4.19.0
Expand Down Expand Up @@ -409,13 +409,14 @@ def compute_metrics(eval_predictions):
quantization_preprocessor.register_pass(ExcludeNodeFollowedBy("Add", "Softmax"))

# Apply quantization on the model
quantizer.quantize(
quantized_model_path = quantizer.quantize(
save_dir=training_args.output_dir,
calibration_tensors_range=ranges,
quantization_config=qconfig,
preprocessor=quantization_preprocessor,
use_external_data_format=onnx_export_args.use_external_data_format,
)
model = ORTModelForMultipleChoice.from_pretrained(quantized_model_path, provider=optim_args.execution_provider)

# Evaluation
if training_args.do_eval:
Expand All @@ -436,13 +437,12 @@ def compute_metrics(eval_predictions):
load_from_cache_file=not data_args.overwrite_cache,
)

ort_model = ORTModel(
os.path.join(training_args.output_dir, "model_quantized.onnx"),
execution_provider=optim_args.execution_provider,
compute_metrics=compute_metrics,
outputs = evaluation_loop(
model=model,
dataset=eval_dataset,
label_names=["label"],
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(eval_dataset)

# Save evaluation metrics
with open(os.path.join(training_args.output_dir, "eval_results.json"), "w") as f:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,9 @@ limitations under the License.

# Question answering

The script [`run_qa.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/quantization/question-answering/run_qa.py) allows us to apply different quantization approaches (such as dynamic and static quantization) as well as graph optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for question answering tasks.

The script [`run_qa.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/quantization/question-answering/run_qa.py)
allows us to apply different quantization approaches (such as dynamic and static quantization) as well as graph
optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for question answering tasks.

Note that if your dataset contains samples with no possible answers (like SQuAD version 2), you need to pass along
the flag `--version_2_with_negative`.
Note that if your dataset contains samples with no possible answers (like SQuAD version 2), you need to pass along the flag `--version_2_with_negative`.

The following example applies post-training dynamic quantization on a DistilBERT fine-tuned on the SQuAD1.0 dataset.

Expand Down
Loading
Loading