diff --git a/.github/workflows/check_code_quality.yml b/.github/workflows/check_code_quality.yml index a1d98a6ea5..da5b8bac57 100644 --- a/.github/workflows/check_code_quality.yml +++ b/.github/workflows/check_code_quality.yml @@ -48,7 +48,7 @@ jobs: run: | source venv/bin/activate black --check . - - name: Check style with isort + - name: Check style with ruff run: | source venv/bin/activate - isort --check . + ruff . diff --git a/Makefile b/Makefile index f07843eac0..84955e6d66 100644 --- a/Makefile +++ b/Makefile @@ -22,11 +22,11 @@ REAL_CLONE_URL = $(if $(CLONE_URL),$(CLONE_URL),$(DEFAULT_CLONE_URL)) # Run code quality checks style_check: black --check . - isort --check . + ruff . style: black . - isort . + ruff . --fix # Run tests for the library test: diff --git a/docs/source/inference.mdx b/docs/source/inference.mdx index 2f6157f51d..f7f4ba534f 100644 --- a/docs/source/inference.mdx +++ b/docs/source/inference.mdx @@ -70,7 +70,7 @@ When fixing the shapes with the `reshape()` method, inference cannot be performe from datasets import load_dataset from transformers import AutoTokenizer, pipeline from evaluate import evaluator -from optimum.intel.openvino import OVModelForQuestionAnswering +from optimum.intel import OVModelForQuestionAnswering model_id = "distilbert-base-cased-distilled-squad" model = OVModelForQuestionAnswering.from_pretrained(model_id, export=True) @@ -92,7 +92,7 @@ metric = task_evaluator.compute(model_or_pipeline=qa_pipe, data=eval_dataset, me By default the model will be compiled when instantiating our `OVModel`. In the case where the model is reshaped, placed to an other device or if FP16 precision is enabled, the model will need to be recompiled again, which will happen by default before the first inference (thus inflating the latency of the first inference). To avoid an unnecessary compilation, you can disable the first compilation by setting `compile=False`. The model should also be compiled before the first inference with `model.compile()`. ```python -from optimum.intel.openvino import OVModelForSequenceClassification +from optimum.intel import OVModelForSequenceClassification model_id = "distilbert-base-uncased-finetuned-sst-2-english" # Load the model and disable the model compilation @@ -111,7 +111,7 @@ Here is an example on how you can run inference for a translation task using an ```python from transformers import AutoTokenizer, pipeline -from optimum.intel.openvino import OVModelForSeq2SeqLM +from optimum.intel import OVModelForSeq2SeqLM model_id = "t5-small" model = OVModelForSeq2SeqLM.from_pretrained(model_id, export=True) @@ -147,7 +147,7 @@ pip install diffusers Here is an example of how you can load an OpenVINO Stable Diffusion model and run inference using OpenVINO Runtime: ```python -from optimum.intel.openvino import OVStableDiffusionPipeline +from optimum.intel import OVStableDiffusionPipeline model_id = "echarlaix/stable-diffusion-v1-5-openvino" stable_diffusion = OVStableDiffusionPipeline.from_pretrained(model_id) diff --git a/docs/source/optimization_ov.mdx b/docs/source/optimization_ov.mdx index da6ea386e3..a38959d157 100644 --- a/docs/source/optimization_ov.mdx +++ b/docs/source/optimization_ov.mdx @@ -16,7 +16,7 @@ limitations under the License. # Optimization -🤗 Optimum Intel provides an `optimum.openvino` package that enables you to apply a variety of model compression methods such as quantization, pruning, on many models hosted on the 🤗 hub using the [NNCF](https://docs.openvino.ai/2022.1/docs_nncf_introduction.html) framework. +🤗 Optimum Intel provides an `openvino` package that enables you to apply a variety of model compression methods such as quantization, pruning, on many models hosted on the 🤗 hub using the [NNCF](https://docs.openvino.ai/2022.1/docs_nncf_introduction.html) framework. ## Post-training optimization @@ -27,7 +27,7 @@ Here is how to apply static quantization on a fine-tuned DistilBERT: ```python from functools import partial from transformers import AutoModelForSequenceClassification, AutoTokenizer -from optimum.intel.openvino import OVConfig, OVQuantizer +from optimum.intel import OVConfig, OVQuantizer model_id = "distilbert-base-uncased-finetuned-sst-2-english" model = AutoModelForSequenceClassification.from_pretrained(model_id) @@ -83,7 +83,7 @@ from transformers import ( ) from datasets import load_dataset - from transformers import Trainer -+ from optimum.intel.openvino import OVConfig, OVTrainer, OVModelForSequenceClassification ++ from optimum.intel import OVConfig, OVTrainer, OVModelForSequenceClassification model_id = "distilbert-base-uncased-finetuned-sst-2-english" model = AutoModelForSequenceClassification.from_pretrained(model_id) @@ -168,7 +168,7 @@ Once we have the config ready, we can start develop the training pipeline like t ```diff - from transformers import Trainer, TrainingArguments -+ from optimum.intel.openvino import OVConfig, OVTrainer, OVTrainingArguments ++ from optimum.intel import OVConfig, OVTrainer, OVTrainingArguments # Load teacher model + teacher_model = AutoModelForSequenceClassification.from_pretrained(teacher_model_or_path) @@ -210,7 +210,7 @@ After applying quantization on our model, we can then easily load it with our `O ```python from transformers import pipeline -from optimum.intel.openvino import OVModelForSequenceClassification +from optimum.intel import OVModelForSequenceClassification model_id = "helenai/distilbert-base-uncased-finetuned-sst-2-english-ov-int8" ov_model = OVModelForSequenceClassification.from_pretrained(model_id) diff --git a/examples/neural_compressor/language-modeling/run_clm.py b/examples/neural_compressor/language-modeling/run_clm.py index f8a76578d2..aca69f98ae 100644 --- a/examples/neural_compressor/language-modeling/run_clm.py +++ b/examples/neural_compressor/language-modeling/run_clm.py @@ -29,9 +29,16 @@ from typing import Optional import datasets +import evaluate import torch import transformers from datasets import load_dataset +from neural_compressor import ( + DistillationConfig, + PostTrainingQuantConfig, + QuantizationAwareTrainingConfig, + WeightPruningConfig, +) from transformers import ( CONFIG_MAPPING, MODEL_FOR_CAUSAL_LM_MAPPING, @@ -49,13 +56,6 @@ from transformers.utils import check_min_version from transformers.utils.versions import require_version -import evaluate -from neural_compressor import ( - DistillationConfig, - PostTrainingQuantConfig, - QuantizationAwareTrainingConfig, - WeightPruningConfig, -) from optimum.intel.neural_compressor import INCModelForCausalLM, INCQuantizer, INCTrainer @@ -441,7 +441,7 @@ def main(): ) else: model = AutoModelForCausalLM.from_config(config) - n_params = sum(dict((p.data_ptr(), p.numel()) for p in model.parameters()).values()) + n_params = sum({p.data_ptr(): p.numel() for p in model.parameters()}.values()) logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params") # We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch diff --git a/examples/neural_compressor/language-modeling/run_mlm.py b/examples/neural_compressor/language-modeling/run_mlm.py index f48ae67cca..f1a6c9a6c0 100644 --- a/examples/neural_compressor/language-modeling/run_mlm.py +++ b/examples/neural_compressor/language-modeling/run_mlm.py @@ -29,9 +29,16 @@ from typing import Optional import datasets +import evaluate import torch import transformers from datasets import load_dataset +from neural_compressor import ( + DistillationConfig, + PostTrainingQuantConfig, + QuantizationAwareTrainingConfig, + WeightPruningConfig, +) from transformers import ( CONFIG_MAPPING, MODEL_FOR_MASKED_LM_MAPPING, @@ -48,13 +55,6 @@ from transformers.utils import check_min_version from transformers.utils.versions import require_version -import evaluate -from neural_compressor import ( - DistillationConfig, - PostTrainingQuantConfig, - QuantizationAwareTrainingConfig, - WeightPruningConfig, -) from optimum.intel.neural_compressor import INCModelForMaskedLM, INCQuantizer, INCTrainer diff --git a/examples/neural_compressor/multiple-choice/run_swag.py b/examples/neural_compressor/multiple-choice/run_swag.py index e65004200c..38746e8629 100644 --- a/examples/neural_compressor/multiple-choice/run_swag.py +++ b/examples/neural_compressor/multiple-choice/run_swag.py @@ -29,12 +29,12 @@ import torch import transformers from datasets import load_dataset +from neural_compressor import DistillationConfig, QuantizationAwareTrainingConfig, WeightPruningConfig from transformers import ( AutoConfig, AutoModelForMultipleChoice, AutoTokenizer, HfArgumentParser, - PreTrainedModel, TrainingArguments, default_data_collator, set_seed, @@ -45,7 +45,6 @@ from transformers.utils import check_min_version from transformers.utils.versions import require_version -from neural_compressor import DistillationConfig, QuantizationAwareTrainingConfig, WeightPruningConfig from optimum.intel.neural_compressor import INCModelForMultipleChoice, INCTrainer @@ -548,14 +547,14 @@ def compute_metrics(eval_predictions): trainer.log_metrics("eval", metrics) trainer.save_metrics("eval", metrics) - kwargs = dict( - finetuned_from=model_args.model_name_or_path, - tasks="multiple-choice", - dataset_tags="swag", - dataset_args="regular", - dataset="SWAG", - language="en", - ) + kwargs = { + "finetuned_from": model_args.model_name_or_path, + "tasks": "multiple-choice", + "dataset_tags": "swag", + "dataset_args": "regular", + "dataset": "SWAG", + "language": "en", + } if training_args.push_to_hub: trainer.push_to_hub(**kwargs) diff --git a/examples/neural_compressor/multiple-choice/run_swag_post_training.py b/examples/neural_compressor/multiple-choice/run_swag_post_training.py index 50b5c7e6b3..4a6fc0ff55 100644 --- a/examples/neural_compressor/multiple-choice/run_swag_post_training.py +++ b/examples/neural_compressor/multiple-choice/run_swag_post_training.py @@ -25,17 +25,18 @@ from typing import Optional, Union import datasets -import numpy as np +import evaluate import torch import transformers +from accelerate import Accelerator from datasets import load_dataset +from neural_compressor import PostTrainingQuantConfig from torch.utils.data import DataLoader from transformers import ( AutoConfig, AutoModelForMultipleChoice, AutoTokenizer, HfArgumentParser, - PreTrainedModel, TrainingArguments, default_data_collator, set_seed, @@ -45,9 +46,6 @@ from transformers.utils import check_min_version from transformers.utils.versions import require_version -import evaluate -from accelerate import Accelerator -from neural_compressor import PostTrainingQuantConfig from optimum.intel.neural_compressor import INCModelForMultipleChoice, INCQuantizer diff --git a/examples/neural_compressor/optical-character-recognition/run_ocr_post_training.py b/examples/neural_compressor/optical-character-recognition/run_ocr_post_training.py index bc5a364afe..cbaeba0833 100644 --- a/examples/neural_compressor/optical-character-recognition/run_ocr_post_training.py +++ b/examples/neural_compressor/optical-character-recognition/run_ocr_post_training.py @@ -5,16 +5,16 @@ from dataclasses import dataclass, field from typing import Optional +import evaluate import pandas as pd import torch import transformers +from neural_compressor import PostTrainingQuantConfig from PIL import Image from torch.utils.data import DataLoader, Dataset from transformers import HfArgumentParser, TrainingArguments, TrOCRProcessor, VisionEncoderDecoderModel from transformers.utils import check_min_version -import evaluate -from neural_compressor import PostTrainingQuantConfig from optimum.intel.neural_compressor import INCModelForVision2Seq, INCQuantizer @@ -284,7 +284,7 @@ def eval_func(model, iters=None): if result_loaded_model != result_optimized_model: logger.error("The quantized model was not successfully loaded.") else: - logger.info(f"The quantized model was successfully loaded.") + logger.info("The quantized model was successfully loaded.") def _mp_fn(index): diff --git a/examples/neural_compressor/question-answering/run_qa.py b/examples/neural_compressor/question-answering/run_qa.py index 19e247824b..3c122e9f5a 100644 --- a/examples/neural_compressor/question-answering/run_qa.py +++ b/examples/neural_compressor/question-answering/run_qa.py @@ -25,12 +25,16 @@ from typing import Optional import datasets +import evaluate import numpy as np import torch import transformers +from accelerate import Accelerator from datasets import load_dataset +from neural_compressor import DistillationConfig, QuantizationAwareTrainingConfig, WeightPruningConfig from torch.utils.data.dataloader import DataLoader from tqdm.auto import tqdm +from trainer_qa import QuestionAnsweringINCTrainer from transformers import ( AutoConfig, AutoModelForQuestionAnswering, @@ -38,7 +42,6 @@ DataCollatorWithPadding, EvalPrediction, HfArgumentParser, - PreTrainedModel, PreTrainedTokenizerFast, TrainingArguments, default_data_collator, @@ -47,13 +50,9 @@ from transformers.trainer_utils import get_last_checkpoint from transformers.utils import check_min_version from transformers.utils.versions import require_version +from utils_qa import postprocess_qa_predictions -import evaluate -from accelerate import Accelerator -from neural_compressor import DistillationConfig, QuantizationAwareTrainingConfig, WeightPruningConfig from optimum.intel.neural_compressor import INCModelForQuestionAnswering -from trainer_qa import QuestionAnsweringINCTrainer -from utils_qa import postprocess_qa_predictions # Will be removed when neural-compressor next release is out @@ -553,7 +552,10 @@ def move_input_to_device(input, device): ) teacher_model_qa = QAModel(teacher_model) teacher_model_qa = accelerator.prepare(teacher_model_qa) - num_param = lambda model: sum(p.numel() for p in model.parameters()) + + def num_param(model): + return sum(p.numel() for p in model.parameters()) + logger.info( "***** Number of teacher model parameters: {:.2f}M *****".format(num_param(teacher_model_qa) / 10**6) ) @@ -662,9 +664,33 @@ def prepare_validation_features(examples): load_from_cache_file=not data_args.overwrite_cache, desc="Running tokenizer on validation dataset", ) + if data_args.max_eval_samples is not None: # During Feature creation dataset samples might increase, we will select required samples again - eval_dataset = eval_dataset.select(range(data_args.max_eval_samples)) + max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples) + eval_dataset = eval_dataset.select(range(max_eval_samples)) + + if training_args.do_predict: + if "test" not in raw_datasets: + raise ValueError("--do_predict requires a test dataset") + predict_examples = raw_datasets["test"] + if data_args.max_predict_samples is not None: + # We will select sample from whole data + predict_examples = predict_examples.select(range(data_args.max_predict_samples)) + # Predict Feature Creation + with training_args.main_process_first(desc="prediction dataset map pre-processing"): + predict_dataset = predict_examples.map( + prepare_validation_features, + batched=True, + num_proc=data_args.preprocessing_num_workers, + remove_columns=column_names, + load_from_cache_file=not data_args.overwrite_cache, + desc="Running tokenizer on prediction dataset", + ) + if data_args.max_predict_samples is not None: + # During Feature creation dataset samples might increase, we will select required samples again + max_predict_samples = min(len(predict_dataset), data_args.max_predict_samples) + predict_dataset = predict_dataset.select(range(max_predict_samples)) # Post-processing: def post_processing_function(examples, features, predictions, stage="eval"): diff --git a/examples/neural_compressor/question-answering/run_qa_post_training.py b/examples/neural_compressor/question-answering/run_qa_post_training.py index b1828f0a4b..2965f36863 100644 --- a/examples/neural_compressor/question-answering/run_qa_post_training.py +++ b/examples/neural_compressor/question-answering/run_qa_post_training.py @@ -25,12 +25,14 @@ from typing import Optional import datasets +import evaluate import numpy as np import torch import transformers +from accelerate import Accelerator from datasets import load_dataset +from neural_compressor import PostTrainingQuantConfig from torch.utils.data.dataloader import DataLoader -from tqdm.auto import tqdm from transformers import ( AutoConfig, AutoModelForQuestionAnswering, @@ -38,7 +40,6 @@ DataCollatorWithPadding, EvalPrediction, HfArgumentParser, - PreTrainedModel, PreTrainedTokenizerFast, TrainingArguments, default_data_collator, @@ -46,12 +47,9 @@ ) from transformers.utils import check_min_version from transformers.utils.versions import require_version +from utils_qa import postprocess_qa_predictions -import evaluate -from accelerate import Accelerator -from neural_compressor import PostTrainingQuantConfig from optimum.intel.neural_compressor import INCModelForQuestionAnswering, INCQuantizer -from utils_qa import postprocess_qa_predictions os.environ["CUDA_VISIBLE_DEVICES"] = "" diff --git a/examples/neural_compressor/summarization/run_summarization.py b/examples/neural_compressor/summarization/run_summarization.py index f0d13d97e2..fa3cf44dc8 100644 --- a/examples/neural_compressor/summarization/run_summarization.py +++ b/examples/neural_compressor/summarization/run_summarization.py @@ -25,11 +25,14 @@ from typing import Optional import datasets +import evaluate import nltk # Here to have a nice missing dependency error message early on import numpy as np import torch import transformers from datasets import load_dataset +from filelock import FileLock +from neural_compressor import QuantizationAwareTrainingConfig, WeightPruningConfig from transformers import ( AutoConfig, AutoModelForSeq2SeqLM, @@ -40,7 +43,6 @@ MBart50TokenizerFast, MBartTokenizer, MBartTokenizerFast, - PreTrainedModel, Seq2SeqTrainingArguments, set_seed, ) @@ -48,9 +50,6 @@ from transformers.utils import check_min_version, is_offline_mode from transformers.utils.versions import require_version -import evaluate -from filelock import FileLock -from neural_compressor import QuantizationAwareTrainingConfig, WeightPruningConfig from optimum.intel.neural_compressor import INCModelForSeq2SeqLM, INCSeq2SeqTrainer @@ -748,7 +747,6 @@ def compute_metrics(eval_preds): logger.warning("The quantized model was not successfully loaded.") # Evaluation - results = {} max_length = ( training_args.generation_max_length if training_args.generation_max_length is not None diff --git a/examples/neural_compressor/summarization/run_summarization_post_training.py b/examples/neural_compressor/summarization/run_summarization_post_training.py index 80cd2344c9..54e4400cd9 100644 --- a/examples/neural_compressor/summarization/run_summarization_post_training.py +++ b/examples/neural_compressor/summarization/run_summarization_post_training.py @@ -25,11 +25,15 @@ from typing import Optional import datasets +import evaluate import nltk # Here to have a nice missing dependency error message early on import numpy as np import torch import transformers +from accelerate import Accelerator from datasets import load_dataset +from filelock import FileLock +from neural_compressor import PostTrainingQuantConfig from torch.utils.data import DataLoader from transformers import ( AutoConfig, @@ -41,17 +45,12 @@ MBart50TokenizerFast, MBartTokenizer, MBartTokenizerFast, - PreTrainedModel, Seq2SeqTrainingArguments, set_seed, ) from transformers.utils import check_min_version, is_offline_mode from transformers.utils.versions import require_version -import evaluate -from accelerate import Accelerator -from filelock import FileLock -from neural_compressor import PostTrainingQuantConfig, QuantizationAwareTrainingConfig, WeightPruningConfig from optimum.intel.neural_compressor import INCModelForSeq2SeqLM, INCQuantizer @@ -663,7 +662,6 @@ def postprocess_text(preds, labels): logger.warning("The quantized model was not successfully loaded.") # Evaluation - results = {} max_length = ( training_args.generation_max_length if training_args.generation_max_length is not None diff --git a/examples/neural_compressor/test_examples.py b/examples/neural_compressor/test_examples.py index d72cd915b0..89779ffea3 100644 --- a/examples/neural_compressor/test_examples.py +++ b/examples/neural_compressor/test_examples.py @@ -182,7 +182,7 @@ def test_run_clm(self): --max_eval_samples 50 --max_train_samples 4 --num_train_epoch 2 - --learning_rate 1e-10 + --learning_rate 1e-10 --verify_loading --output_dir {tmp_dir} """.split() @@ -212,7 +212,7 @@ def test_run_mlm(self): --max_eval_samples 50 --max_train_samples 4 --num_train_epoch 2 - --learning_rate 1e-10 + --learning_rate 1e-10 --verify_loading --output_dir {tmp_dir} """.split() diff --git a/examples/neural_compressor/text-classification/intent-classification/run_clinc.py b/examples/neural_compressor/text-classification/intent-classification/run_clinc.py index 2e5781f226..99b7dddd9c 100644 --- a/examples/neural_compressor/text-classification/intent-classification/run_clinc.py +++ b/examples/neural_compressor/text-classification/intent-classification/run_clinc.py @@ -29,18 +29,19 @@ import numpy as np import torch import transformers -from datasets import load_dataset, load_metric -from sklearn.linear_model import LogisticRegression -from torch.utils.data import DataLoader, Dataset -from tqdm import tqdm +from datasets import load_dataset +from neural_compressor import ( + DistillationConfig, + PostTrainingQuantConfig, + QuantizationAwareTrainingConfig, + WeightPruningConfig, +) from transformers import ( AutoConfig, AutoModel, AutoTokenizer, DataCollatorWithPadding, - EvalPrediction, HfArgumentParser, - PretrainedConfig, PreTrainedModel, TrainingArguments, default_data_collator, @@ -50,12 +51,6 @@ from transformers.utils import check_min_version from transformers.utils.versions import require_version -from neural_compressor import ( - DistillationConfig, - PostTrainingQuantConfig, - QuantizationAwareTrainingConfig, - WeightPruningConfig, -) from optimum.intel.neural_compressor import INCModel, INCQuantizer, INCTrainer @@ -280,29 +275,6 @@ def forward(self, input_ids, attention_mask, token_type_ids=None, *args, **kwarg return sentence_embeddings -class SetFitModelTraining(torch.nn.Module): - def __init__(self, model, tokenizer): - super().__init__() - self.model = SetFitModel(model) - self.tokenizer = tokenizer - if hasattr(model, "config"): - self.config = model.config - - def forward(self, sentences=None, *args, **kwargs): - if not (isinstance(sentences, (tuple, list)) and len(sentences) == 2): - raise ValueError("sentences should be a tuple or a list with 2 sentences string.") - inputs = self.tokenizer( - sentences[0] + sentences[1], - padding=padding, - max_length=max_seq_length, - truncation=True, - return_tensors="pt", - ) - embeddings = self.model(**inputs) - length = len(embeddings) // 2 - return {"logits": torch.cosine_similarity(embeddings[:length], embeddings[length:]), "loss": 0} - - def main(): # See all possible arguments in src/transformers/training_args.py # or by passing the --help flag to this script. @@ -424,6 +396,28 @@ def main(): # We will pad later, dynamically at batch creation, to the max sequence length in each batch padding = False + class SetFitModelTraining(torch.nn.Module): + def __init__(self, model, tokenizer): + super().__init__() + self.model = SetFitModel(model) + self.tokenizer = tokenizer + if hasattr(model, "config"): + self.config = model.config + + def forward(self, sentences=None, *args, **kwargs): + if not (isinstance(sentences, (tuple, list)) and len(sentences) == 2): + raise ValueError("sentences should be a tuple or a list with 2 sentences string.") + inputs = self.tokenizer( + sentences[0] + sentences[1], + padding=padding, + max_length=max_seq_length, + truncation=True, + return_tensors="pt", + ) + embeddings = self.model(**inputs) + length = len(embeddings) // 2 + return {"logits": torch.cosine_similarity(embeddings[:length], embeddings[length:]), "loss": 0} + if data_args.max_seq_length > tokenizer.model_max_length: logger.warning( f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the" @@ -526,8 +520,14 @@ def compute_metrics(preds, labels): use_auth_token=True if model_args.use_auth_token else None, ) - teacher_model = SetFitModelTraining(teacher_model, teacher_tokenizer) + teacher_tokenizer = AutoTokenizer.from_pretrained( + optim_args.teacher_model_name_or_path, + cache_dir=model_args.cache_dir, + use_fast=model_args.use_fast_tokenizer, + use_auth_token=True if model_args.use_auth_token else None, + ) + teacher_model = SetFitModelTraining(teacher_model, teacher_tokenizer) teacher_model.to(training_args.device) teacher_tokenizer = AutoTokenizer.from_pretrained(optim_args.teacher_model_name_or_path, use_fast=True) diff --git a/examples/neural_compressor/text-classification/run_glue.py b/examples/neural_compressor/text-classification/run_glue.py index 0a5bd6b792..afc0921756 100644 --- a/examples/neural_compressor/text-classification/run_glue.py +++ b/examples/neural_compressor/text-classification/run_glue.py @@ -25,10 +25,12 @@ from typing import Optional import datasets +import evaluate import numpy as np import torch import transformers from datasets import load_dataset +from neural_compressor import DistillationConfig, QuantizationAwareTrainingConfig, WeightPruningConfig from transformers import ( AutoConfig, AutoModelForSequenceClassification, @@ -37,7 +39,6 @@ EvalPrediction, HfArgumentParser, PretrainedConfig, - PreTrainedModel, TrainingArguments, default_data_collator, set_seed, @@ -46,8 +47,6 @@ from transformers.utils import check_min_version from transformers.utils.versions import require_version -import evaluate -from neural_compressor import DistillationConfig, QuantizationAwareTrainingConfig, WeightPruningConfig from optimum.intel.neural_compressor import INCModelForSequenceClassification, INCTrainer @@ -421,12 +420,12 @@ def main(): ): # Some have all caps in their config, some don't. label_name_to_id = {k.lower(): v for k, v in model.config.label2id.items()} - if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)): + if sorted(label_name_to_id.keys()) == sorted(label_list): label_to_id = {i: int(label_name_to_id[label_list[i]]) for i in range(num_labels)} else: logger.warning( "Your model seems to have been trained with labels, but they don't match the dataset: ", - f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}." + f"model labels: {sorted(label_name_to_id.keys())}, dataset labels: {sorted(label_list)}." "\nIgnoring the model labels as a result.", ) elif data_args.task_name is None and not is_regression: diff --git a/examples/neural_compressor/text-classification/run_glue_post_training.py b/examples/neural_compressor/text-classification/run_glue_post_training.py index c9ffaf03c7..4f7a83d9a8 100644 --- a/examples/neural_compressor/text-classification/run_glue_post_training.py +++ b/examples/neural_compressor/text-classification/run_glue_post_training.py @@ -19,26 +19,25 @@ import logging import os -import random import sys from dataclasses import dataclass, field from typing import Optional import datasets -import numpy as np +import evaluate import torch import transformers +from accelerate import Accelerator from datasets import load_dataset +from neural_compressor import PostTrainingQuantConfig from torch.utils.data import DataLoader from transformers import ( AutoConfig, AutoModelForSequenceClassification, AutoTokenizer, DataCollatorWithPadding, - EvalPrediction, HfArgumentParser, PretrainedConfig, - PreTrainedModel, TrainingArguments, default_data_collator, set_seed, @@ -46,9 +45,6 @@ from transformers.utils import check_min_version from transformers.utils.versions import require_version -import evaluate -from accelerate import Accelerator -from neural_compressor import PostTrainingQuantConfig from optimum.intel.neural_compressor import INCModelForSequenceClassification, INCQuantizer @@ -375,12 +371,12 @@ def main(): ): # Some have all caps in their config, some don't. label_name_to_id = {k.lower(): v for k, v in model.config.label2id.items()} - if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)): + if sorted(label_name_to_id.keys()) == sorted(label_list): label_to_id = {i: int(label_name_to_id[label_list[i]]) for i in range(num_labels)} else: logger.warning( "Your model seems to have been trained with labels, but they don't match the dataset: ", - f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}." + f"model labels: {sorted(label_name_to_id.keys())}, dataset labels: {sorted(label_list)}." "\nIgnoring the model labels as a result.", ) elif data_args.task_name is None and not is_regression: diff --git a/examples/neural_compressor/text-to-image/run_diffusion_post_training.py b/examples/neural_compressor/text-to-image/run_diffusion_post_training.py index ba31fc8993..359afdd9ce 100644 --- a/examples/neural_compressor/text-to-image/run_diffusion_post_training.py +++ b/examples/neural_compressor/text-to-image/run_diffusion_post_training.py @@ -25,15 +25,15 @@ import time import torch -from PIL import Image -from torch.utils.data import DataLoader, Dataset - from accelerate.utils import set_seed from diffusers import StableDiffusionPipeline from neural_compressor import PostTrainingQuantConfig +from PIL import Image +from pytorch_fid import fid_score +from torch.utils.data import Dataset + from optimum.intel.neural_compressor import INCQuantizer from optimum.intel.neural_compressor.utils import load_quantized_model -from pytorch_fid import fid_score os.environ["CUDA_VISIBLE_DEVICES"] = "" @@ -147,7 +147,7 @@ def benchmark(pipeline, generator): for i in range(total): prompt = "a photo of an astronaut riding a horse on mars" start2 = time.time() - images = pipeline(prompt, guidance_scale=7.5, num_inference_steps=50, generator=generator).images + pipeline(prompt, guidance_scale=7.5, num_inference_steps=50, generator=generator).images end2 = time.time() if i >= warmup: total_time += end2 - start2 @@ -270,7 +270,7 @@ def eval_func(model): if result_loaded_model != result_optimized_model: logger.error("The quantized model was not successfully loaded.") else: - logger.info(f"The quantized model was successfully loaded.") + logger.info("The quantized model was successfully loaded.") if args.benchmark and args.int8: print("====int8 inference====") diff --git a/examples/neural_compressor/token-classification/run_ner.py b/examples/neural_compressor/token-classification/run_ner.py index a4ba11de49..7ba0702cc9 100644 --- a/examples/neural_compressor/token-classification/run_ner.py +++ b/examples/neural_compressor/token-classification/run_ner.py @@ -26,17 +26,18 @@ from typing import Optional import datasets +import evaluate import numpy as np import torch import transformers from datasets import ClassLabel, load_dataset +from neural_compressor import DistillationConfig, QuantizationAwareTrainingConfig, WeightPruningConfig from transformers import ( AutoConfig, AutoModelForTokenClassification, AutoTokenizer, DataCollatorForTokenClassification, HfArgumentParser, - PreTrainedModel, PreTrainedTokenizerFast, TrainingArguments, set_seed, @@ -45,9 +46,7 @@ from transformers.utils import check_min_version from transformers.utils.versions import require_version -import evaluate -from neural_compressor import DistillationConfig, QuantizationAwareTrainingConfig, WeightPruningConfig -from optimum.intel.neural_compressor import INCModelForTokenClassification, INCQuantizer, INCTrainer +from optimum.intel.neural_compressor import INCModelForTokenClassification, INCTrainer # Will be removed when neural-compressor next release is out diff --git a/examples/neural_compressor/token-classification/run_ner_post_training.py b/examples/neural_compressor/token-classification/run_ner_post_training.py index 59b8f568b3..d9fce5a26e 100644 --- a/examples/neural_compressor/token-classification/run_ner_post_training.py +++ b/examples/neural_compressor/token-classification/run_ner_post_training.py @@ -26,10 +26,12 @@ from typing import Optional import datasets -import numpy as np +import evaluate import torch import transformers +from accelerate import Accelerator from datasets import ClassLabel, load_dataset +from neural_compressor import PostTrainingQuantConfig from torch.utils.data import DataLoader from transformers import ( AutoConfig, @@ -37,7 +39,6 @@ AutoTokenizer, DataCollatorForTokenClassification, HfArgumentParser, - PreTrainedModel, PreTrainedTokenizerFast, TrainingArguments, set_seed, @@ -45,9 +46,6 @@ from transformers.utils import check_min_version from transformers.utils.versions import require_version -import evaluate -from accelerate import Accelerator -from neural_compressor import PostTrainingQuantConfig from optimum.intel.neural_compressor import INCModelForTokenClassification, INCQuantizer diff --git a/examples/neural_compressor/translation/run_translation.py b/examples/neural_compressor/translation/run_translation.py index 9fd496e99f..5262426062 100644 --- a/examples/neural_compressor/translation/run_translation.py +++ b/examples/neural_compressor/translation/run_translation.py @@ -25,22 +25,22 @@ from typing import Optional import datasets +import evaluate import numpy as np import torch import transformers from datasets import load_dataset +from neural_compressor import QuantizationAwareTrainingConfig, WeightPruningConfig from transformers import ( AutoConfig, AutoModelForSeq2SeqLM, AutoTokenizer, - DataCollatorForSeq2Seq, HfArgumentParser, M2M100Tokenizer, MBart50Tokenizer, MBart50TokenizerFast, MBartTokenizer, MBartTokenizerFast, - PreTrainedModel, Seq2SeqTrainingArguments, default_data_collator, set_seed, @@ -49,8 +49,6 @@ from transformers.utils import check_min_version from transformers.utils.versions import require_version -import evaluate -from neural_compressor import QuantizationAwareTrainingConfig, WeightPruningConfig from optimum.intel.neural_compressor import INCModelForSeq2SeqLM, INCSeq2SeqTrainer @@ -510,7 +508,7 @@ def preprocess_function(examples): max_train_samples = ( data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset) ) - train_samples = min(max_train_samples, len(train_dataset)) + min(max_train_samples, len(train_dataset)) if training_args.do_eval: max_target_length = data_args.val_max_target_length @@ -652,7 +650,6 @@ def compute_metrics(eval_preds): logger.warning("The quantized model was not successfully loaded.") # Evaluation - results = {} max_length = ( training_args.generation_max_length if training_args.generation_max_length is not None diff --git a/examples/neural_compressor/translation/run_translation_post_training.py b/examples/neural_compressor/translation/run_translation_post_training.py index 7388e18ea9..8a3a80c039 100644 --- a/examples/neural_compressor/translation/run_translation_post_training.py +++ b/examples/neural_compressor/translation/run_translation_post_training.py @@ -25,23 +25,24 @@ from typing import Optional import datasets +import evaluate import numpy as np import torch import transformers +from accelerate import Accelerator from datasets import load_dataset +from neural_compressor import PostTrainingQuantConfig from torch.utils.data import DataLoader from transformers import ( AutoConfig, AutoModelForSeq2SeqLM, AutoTokenizer, - DataCollatorForSeq2Seq, HfArgumentParser, M2M100Tokenizer, MBart50Tokenizer, MBart50TokenizerFast, MBartTokenizer, MBartTokenizerFast, - PreTrainedModel, Seq2SeqTrainingArguments, default_data_collator, set_seed, @@ -49,9 +50,6 @@ from transformers.utils import check_min_version from transformers.utils.versions import require_version -import evaluate -from accelerate import Accelerator -from neural_compressor import PostTrainingQuantConfig from optimum.intel.neural_compressor import INCModelForSeq2SeqLM, INCQuantizer @@ -481,7 +479,7 @@ def preprocess_function(examples): max_train_samples = ( data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset) ) - train_samples = min(max_train_samples, len(train_dataset)) + min(max_train_samples, len(train_dataset)) if training_args.do_eval: max_target_length = data_args.val_max_target_length @@ -564,7 +562,6 @@ def postprocess_text(preds, labels): logger.warning("The quantized model was not successfully loaded.") # Evaluation - results = {} max_length = ( training_args.generation_max_length if training_args.generation_max_length is not None diff --git a/examples/openvino/audio-classification/run_audio_classification.py b/examples/openvino/audio-classification/run_audio_classification.py index dd4a7085c5..b8df86a575 100644 --- a/examples/openvino/audio-classification/run_audio_classification.py +++ b/examples/openvino/audio-classification/run_audio_classification.py @@ -24,19 +24,17 @@ from typing import Optional import datasets +import evaluate +import jstyleson as json import numpy as np -import torch -import torch.nn.functional as F import transformers from datasets import DatasetDict, load_dataset +from nncf.common.utils.os import safe_open from transformers import AutoConfig, AutoFeatureExtractor, AutoModelForAudioClassification, HfArgumentParser, set_seed from transformers.trainer_utils import get_last_checkpoint from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version -import evaluate -import jstyleson as json -from nncf.common.utils.os import safe_open from optimum.intel.openvino import OVConfig, OVTrainer, OVTrainingArguments @@ -304,7 +302,7 @@ def train_transforms(batch): audio["array"], max_length=data_args.max_length_seconds, sample_rate=feature_extractor.sampling_rate ) output_batch["input_values"].append(wav) - output_batch["labels"] = [label for label in batch[data_args.label_column_name]] + output_batch["labels"] = list(batch[data_args.label_column_name]) return output_batch @@ -314,14 +312,14 @@ def val_transforms(batch): for audio in batch[data_args.audio_column_name]: wav = audio["array"] output_batch["input_values"].append(wav) - output_batch["labels"] = [label for label in batch[data_args.label_column_name]] + output_batch["labels"] = list(batch[data_args.label_column_name]) return output_batch # Prepare label mappings. # We'll include these in the model's config to get human readable labels in the Inference API. labels = raw_datasets["train"].features[data_args.label_column_name].names - label2id, id2label = dict(), dict() + label2id, id2label = {}, {} for i, label in enumerate(labels): label2id[label] = str(i) id2label[str(i)] = label diff --git a/examples/openvino/image-classification/run_image_classification.py b/examples/openvino/image-classification/run_image_classification.py index 1c527bc888..8a7c009e46 100644 --- a/examples/openvino/image-classification/run_image_classification.py +++ b/examples/openvino/image-classification/run_image_classification.py @@ -23,10 +23,13 @@ from pathlib import Path from typing import Optional +import evaluate +import jstyleson as json import numpy as np import torch import transformers from datasets import load_dataset +from nncf.common.utils.os import safe_open from PIL import Image from torchvision.transforms import ( CenterCrop, @@ -49,9 +52,6 @@ from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version -import evaluate -import jstyleson as json -from nncf.common.utils.os import safe_open from optimum.intel.openvino import OVConfig, OVTrainer, OVTrainingArguments @@ -265,7 +265,7 @@ def main(): # Prepare label mappings. # We'll include these in the model's config to get human readable labels in the Inference API. labels = dataset["train"].features["labels"].names - label2id, id2label = dict(), dict() + label2id, id2label = {}, {} for i, label in enumerate(labels): label2id[label] = str(i) id2label[str(i)] = label diff --git a/examples/openvino/question-answering/run_qa.py b/examples/openvino/question-answering/run_qa.py index f7947ff383..a86c7fb6d7 100644 --- a/examples/openvino/question-answering/run_qa.py +++ b/examples/openvino/question-answering/run_qa.py @@ -27,8 +27,12 @@ from typing import Optional import datasets +import evaluate +import jstyleson as json import transformers from datasets import load_dataset +from nncf.common.utils.os import safe_open +from trainer_qa import QuestionAnsweringOVTrainer from transformers import ( AutoConfig, AutoModelForQuestionAnswering, @@ -43,13 +47,9 @@ from transformers.trainer_utils import get_last_checkpoint from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version +from utils_qa import postprocess_qa_predictions -import evaluate -import jstyleson as json -from nncf.common.utils.os import safe_open from optimum.intel.openvino import OVConfig, OVTrainingArguments -from trainer_qa import QuestionAnsweringOVTrainer -from utils_qa import postprocess_qa_predictions # Will error if the minimal version of Transformers is not installed. Remove at your own risks. diff --git a/examples/openvino/text-classification/run_glue.py b/examples/openvino/text-classification/run_glue.py index aad64dd2dc..a83bb4be89 100644 --- a/examples/openvino/text-classification/run_glue.py +++ b/examples/openvino/text-classification/run_glue.py @@ -25,9 +25,12 @@ from typing import Optional import datasets +import evaluate +import jstyleson as json import numpy as np import transformers from datasets import load_dataset +from nncf.common.utils.os import safe_open from transformers import ( AutoConfig, AutoModelForSequenceClassification, @@ -36,8 +39,6 @@ EvalPrediction, HfArgumentParser, PretrainedConfig, - Trainer, - TrainingArguments, default_data_collator, set_seed, ) @@ -45,9 +46,6 @@ from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version -import evaluate -import jstyleson as json -from nncf.common.utils.os import safe_open from optimum.intel.openvino import OVConfig, OVTrainer, OVTrainingArguments @@ -431,12 +429,12 @@ def main(): ): # Some have all caps in their config, some don't. label_name_to_id = {k.lower(): v for k, v in model.config.label2id.items()} - if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)): + if sorted(label_name_to_id.keys()) == sorted(label_list): label_to_id = {i: int(label_name_to_id[label_list[i]]) for i in range(num_labels)} else: logger.warning( "Your model seems to have been trained with labels, but they don't match the dataset: ", - f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}." + f"model labels: {sorted(label_name_to_id.keys())}, dataset labels: {sorted(label_list)}." "\nIgnoring the model labels as a result.", ) elif data_args.task_name is None and not is_regression: diff --git a/optimum/intel/neural_compressor/configuration.py b/optimum/intel/neural_compressor/configuration.py index 4e449902da..e83add9f9b 100644 --- a/optimum/intel/neural_compressor/configuration.py +++ b/optimum/intel/neural_compressor/configuration.py @@ -12,11 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, List, Optional, Union - -import torch +from typing import Dict, Optional, Union from neural_compressor.conf.pythonic_config import DistillationConfig, WeightPruningConfig, _BaseQuantizationConfig + from optimum.configuration_utils import BaseConfig from ..utils.import_utils import _neural_compressor_version, _torch_version diff --git a/optimum/intel/neural_compressor/neural_coder_adaptor.py b/optimum/intel/neural_compressor/neural_coder_adaptor.py index a6451a1ac0..f7fb3cd709 100644 --- a/optimum/intel/neural_compressor/neural_coder_adaptor.py +++ b/optimum/intel/neural_compressor/neural_coder_adaptor.py @@ -1,7 +1,7 @@ class NeuralCoderAdaptor: """API design adaption for Neural Coder""" - default_quant_dynamic = f"""\ + default_quant_dynamic = """\ def eval_func(model): EVAL_FUNC_LINES from neural_compressor.config import PostTrainingQuantConfig @@ -13,7 +13,7 @@ def eval_func(model): MODEL_NAME.eval() """ - default_quant_static = f"""\ + default_quant_static = """\ def eval_func(model): EVAL_FUNC_LINES from neural_compressor.config import PostTrainingQuantConfig diff --git a/optimum/intel/neural_compressor/quantization.py b/optimum/intel/neural_compressor/quantization.py index 3338d15617..ff6b7dc242 100644 --- a/optimum/intel/neural_compressor/quantization.py +++ b/optimum/intel/neural_compressor/quantization.py @@ -16,17 +16,20 @@ import inspect import logging import os -import sys import warnings from enum import Enum from itertools import chain from pathlib import Path -from typing import Callable, ClassVar, Dict, Optional, Union +from typing import TYPE_CHECKING, Callable, ClassVar, Dict, Optional, Union import torch -import transformers from datasets import Dataset, load_dataset -from packaging import version +from huggingface_hub import HfApi, hf_hub_download +from neural_compressor.adaptor.pytorch import PyTorch_FXAdaptor, _cfg_to_qconfig, _propagate_qconfig +from neural_compressor.experimental.export import torch_to_int8_onnx +from neural_compressor.model.torch_model import IPEXModel, PyTorchModel +from neural_compressor.quantization import fit +from neural_compressor.utils.pytorch import load from torch.utils.data import DataLoader, RandomSampler from transformers import ( AutoConfig, @@ -50,13 +53,6 @@ from transformers.utils import TRANSFORMERS_CACHE, is_offline_mode from transformers.utils.generic import ContextManagers -import neural_compressor -from huggingface_hub import HfApi, hf_hub_download -from neural_compressor.adaptor.pytorch import PyTorch_FXAdaptor, _cfg_to_qconfig, _propagate_qconfig -from neural_compressor.experimental.export import torch_to_int8_onnx -from neural_compressor.model.torch_model import IPEXModel, PyTorchModel -from neural_compressor.quantization import fit -from neural_compressor.utils.pytorch import load from optimum.exporters import TasksManager from optimum.exporters.onnx import OnnxConfig from optimum.quantization_base import OptimumQuantizer @@ -71,6 +67,10 @@ from .utils import MIN_QDQ_ONNX_OPSET, ONNX_WEIGHTS_NAME, WEIGHTS_NAME, INCDataLoader, _cfgs_to_fx_cfgs +if TYPE_CHECKING: + from neural_compressor.config import PostTrainingQuantConfig + + logger = logging.getLogger(__name__) NEURAL_COMPRESSOR_MINIMUM_VERSION = "2.1.0" @@ -88,7 +88,7 @@ class INCQuantizationMode(Enum): AWARE_TRAINING = "quant_aware_training" -SUPPORTED_QUANT_MODE = set([approach.value for approach in INCQuantizationMode]) +SUPPORTED_QUANT_MODE = {approach.value for approach in INCQuantizationMode} class INCQuantizer(OptimumQuantizer): @@ -239,10 +239,10 @@ def _onnx_export( output_path: Union[str, Path], ): opset = min(config.DEFAULT_ONNX_OPSET, MIN_QDQ_ONNX_OPSET) - dynamic_axes = {name: axes for name, axes in chain(config.inputs.items(), config.outputs.items())} + dynamic_axes = dict(chain(config.inputs.items(), config.outputs.items())) inputs = config.generate_dummy_inputs(framework="pt") device = model.model.device - inputs = dict((k, v.to(device)) for k, v in inputs.items()) + inputs = {k: v.to(device) for k, v in inputs.items()} torch_to_int8_onnx( fp32_model=self._original_model.to(device), int8_model=model.model, @@ -267,7 +267,7 @@ def _set_task(self): "The task defining the model topology could not be extracted and needs to be specified for the ONNX export." ) if self.task in ["seq2seq-lm", "translation", "summarization"]: - raise ValueError(f"Seq2Seq models are currently not supported for post-training static quantization.") + raise ValueError("Seq2Seq models are currently not supported for post-training static quantization.") def get_calibration_dataset( self, @@ -546,8 +546,8 @@ def from_pretrained(cls, model_name_or_path: str, q_model_name: Optional[str] = if not is_torch_version("==", inc_config.torch_version): msg = f"Quantized model was obtained with torch version {inc_config.torch_version} but {_torch_version} was found." logger.warning(f"{msg}") - except Exception as e: - logger.info(f"Couldn't verify torch version.") + except Exception: + logger.info("Couldn't verify torch version.") if getattr(config, "backend", None) == "ipex": # NOTE: Will improve to use load function when Intel Neural Compressor next 2.1 release. diff --git a/optimum/intel/neural_compressor/trainer.py b/optimum/intel/neural_compressor/trainer.py index 16acf3ed25..f1799f6af8 100644 --- a/optimum/intel/neural_compressor/trainer.py +++ b/optimum/intel/neural_compressor/trainer.py @@ -25,6 +25,10 @@ import datasets import torch import torch.distributed as dist +from neural_compressor import training +from neural_compressor.compression import DistillationCallbacks +from neural_compressor.conf.pythonic_config import _BaseQuantizationConfig +from neural_compressor.experimental.export import torch_to_fp32_onnx, torch_to_int8_onnx # from packaging import version from torch import nn @@ -56,13 +60,8 @@ speed_metrics, ) from transformers.training_args import TrainingArguments -from transformers.utils import is_sagemaker_mp_enabled, logging +from transformers.utils import is_apex_available, is_sagemaker_mp_enabled, logging -from neural_compressor import training -from neural_compressor.compression import DistillationCallbacks -from neural_compressor.conf.pythonic_config import _BaseQuantizationConfig -from neural_compressor.experimental.export import torch_to_fp32_onnx, torch_to_int8_onnx -from neural_compressor.model.torch_model import PyTorchModel from optimum.exporters import TasksManager from ..utils.import_utils import is_neural_compressor_version @@ -70,8 +69,15 @@ from .utils import MIN_QDQ_ONNX_OPSET, ONNX_WEIGHTS_NAME, TRAINING_ARGS_NAME +if is_apex_available(): + from apex import amp + +if is_sagemaker_mp_enabled(): + import smdistributed.modelparallel.torch as smp + + if TYPE_CHECKING: - import optuna + from optimum.exporters.onnx import OnnxConfig __version__ = "4.22.2" @@ -626,10 +632,10 @@ def _set_task(self): def _onnx_export(self, model: nn.Module, config: "OnnxConfig", output_path: str): opset = min(config.DEFAULT_ONNX_OPSET, MIN_QDQ_ONNX_OPSET) - dynamic_axes = {name: axes for name, axes in chain(config.inputs.items(), config.outputs.items())} + dynamic_axes = dict(chain(config.inputs.items(), config.outputs.items())) inputs = config.generate_dummy_inputs(framework="pt") device = model.device - inputs = dict((k, v.to(device)) for k, v in inputs.items()) + inputs = {k: v.to(device) for k, v in inputs.items()} if self.dtype == "int8": torch_to_int8_onnx( @@ -672,7 +678,7 @@ def _remove_unused_columns(self, dataset: "datasets.Dataset", description: Optio " you can safely ignore this message." ) - columns = [k for k in signature_columns if k in dataset.column_names] + [k for k in signature_columns if k in dataset.column_names] return dataset.remove_columns(ignored_columns) @@ -743,12 +749,12 @@ def _prepare_input(self, data: Union[torch.Tensor, Any]) -> Union[torch.Tensor, elif isinstance(data, (tuple, list)): return type(data)(self._prepare_input(v) for v in data) elif isinstance(data, torch.Tensor): - kwargs = dict(device=self.model.device) + kwargs = {"device": self.model.device} if self.deepspeed and data.dtype != torch.int64: # NLP models inputs are int64 and those get adjusted to the right dtype of the # embedding. Other models such as wav2vec2's inputs are already float and thus # may need special handling to match the dtypes of the model - kwargs.update(dict(dtype=self.args.hf_deepspeed_config.dtype())) + kwargs.update({"dtype": self.args.hf_deepspeed_config.dtype()}) return data.to(**kwargs) return data diff --git a/optimum/intel/neural_compressor/utils.py b/optimum/intel/neural_compressor/utils.py index d4191f4e1f..573a970734 100644 --- a/optimum/intel/neural_compressor/utils.py +++ b/optimum/intel/neural_compressor/utils.py @@ -18,11 +18,10 @@ from typing import Dict import torch +from neural_compressor.utils.pytorch import load from packaging import version from torch.utils.data import DataLoader -from neural_compressor.utils.pytorch import load - logger = logging.getLogger(__name__) @@ -72,7 +71,7 @@ def _cfgs_to_fx_cfgs(op_cfgs: Dict, observer_type: str = "post_training_static_q fx_op_cfgs = QConfigMapping() else: - fx_op_cfgs = dict() + fx_op_cfgs = {} op_tuple_cfg_list = [] for key, value in op_cfgs.items(): if key == "default_qconfig": diff --git a/optimum/intel/openvino/modeling.py b/optimum/intel/openvino/modeling.py index e74c38c99e..31ad525fff 100644 --- a/optimum/intel/openvino/modeling.py +++ b/optimum/intel/openvino/modeling.py @@ -16,6 +16,7 @@ from typing import Any, Dict, Optional, Union import numpy as np +import openvino import torch import transformers from transformers import ( @@ -40,8 +41,6 @@ TokenClassifierOutput, ) -import openvino - from ..utils.import_utils import is_transformers_version from .modeling_base import OVBaseModel diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index e1ae88d036..334b1dd4f0 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -18,16 +18,14 @@ from tempfile import TemporaryDirectory from typing import Dict, Optional, Union -import transformers -from transformers import AutoConfig, PretrainedConfig -from transformers.file_utils import add_start_docstrings, default_cache_path -from transformers.onnx.utils import get_preprocessor - import openvino -from huggingface_hub import HfApi, hf_hub_download +from huggingface_hub import hf_hub_download from huggingface_hub.utils import EntryNotFoundError from openvino._offline_transformations import apply_moc_transformations, compress_model_transformation from openvino.runtime import Core +from transformers import PretrainedConfig +from transformers.file_utils import add_start_docstrings + from optimum.exporters import TasksManager from optimum.exporters.onnx import export from optimum.modeling_base import OptimizedModel @@ -75,7 +73,7 @@ def __init__( dynamic_shapes: bool = True, ov_config: Optional[Dict[str, str]] = None, model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None, - **kwargs + **kwargs, ): self.config = config self.model_save_dir = model_save_dir diff --git a/optimum/intel/openvino/modeling_base_seq2seq.py b/optimum/intel/openvino/modeling_base_seq2seq.py index 1e364628e7..e04f694b41 100644 --- a/optimum/intel/openvino/modeling_base_seq2seq.py +++ b/optimum/intel/openvino/modeling_base_seq2seq.py @@ -18,16 +18,13 @@ from tempfile import TemporaryDirectory from typing import Dict, Optional, Union -import transformers -from transformers import AutoConfig, PretrainedConfig -from transformers.file_utils import add_start_docstrings, default_cache_path -from transformers.onnx import FeaturesManager, export -from transformers.onnx.utils import get_preprocessor - import openvino -from huggingface_hub import HfApi, hf_hub_download +from huggingface_hub import hf_hub_download from huggingface_hub.utils import EntryNotFoundError from openvino._offline_transformations import apply_moc_transformations, compress_model_transformation +from transformers import PretrainedConfig +from transformers.file_utils import add_start_docstrings + from optimum.exporters import TasksManager from optimum.exporters.onnx import export_models, get_encoder_decoder_models_for_export @@ -64,7 +61,7 @@ def __init__( dynamic_shapes: bool = True, ov_config: Optional[Dict[str, str]] = None, model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None, - **kwargs + **kwargs, ): self.config = config self.use_cache = decoder_with_past is not None @@ -97,7 +94,7 @@ def _save_pretrained( encoder_file_name: Optional[str] = None, decoder_file_name: Optional[str] = None, decoder_with_past_file_name: Optional[str] = None, - **kwargs + **kwargs, ): """ Saves the model to the OpenVINO IR format so that it can be re-loaded using the diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py index db3a77680e..3405bfe9dd 100644 --- a/optimum/intel/openvino/modeling_diffusion.py +++ b/optimum/intel/openvino/modeling_diffusion.py @@ -20,21 +20,19 @@ from typing import Any, Dict, Optional, Union import numpy as np -from transformers import CLIPFeatureExtractor, CLIPTokenizer - import openvino from diffusers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler, StableDiffusionPipeline from diffusers.schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME from diffusers.utils import CONFIG_NAME from huggingface_hub import snapshot_download from openvino._offline_transformations import compress_model_transformation -from openvino.offline_transformations import compress_model_transformation from openvino.runtime import Core +from transformers import CLIPFeatureExtractor, CLIPTokenizer + from optimum.exporters import TasksManager from optimum.exporters.onnx import export_models, get_stable_diffusion_models_for_export from optimum.pipelines.diffusers.pipeline_stable_diffusion import StableDiffusionPipelineMixin from optimum.utils import ( - CONFIG_NAME, DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER, DIFFUSION_MODEL_UNET_SUBFOLDER, DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER, diff --git a/optimum/intel/openvino/modeling_seq2seq.py b/optimum/intel/openvino/modeling_seq2seq.py index 4e7520e616..c00cc8ef16 100644 --- a/optimum/intel/openvino/modeling_seq2seq.py +++ b/optimum/intel/openvino/modeling_seq2seq.py @@ -17,15 +17,14 @@ from typing import Dict, Optional, Tuple import numpy as np +import openvino import torch import transformers +from openvino.runtime import Core, Tensor from transformers import AutoConfig, AutoModelForSeq2SeqLM from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward from transformers.modeling_outputs import BaseModelOutput, Seq2SeqLMOutput -import openvino -from openvino.runtime import Core, Tensor - from ..utils.import_utils import is_transformers_version from .modeling_base_seq2seq import OVBaseModelForSeq2SeqLM @@ -146,7 +145,7 @@ def __init__( decoder: openvino.runtime.Model, decoder_with_past: openvino.runtime.Model = None, config: transformers.PretrainedConfig = None, - **kwargs + **kwargs, ): super().__init__( encoder=encoder, decoder=decoder, decoder_with_past=decoder_with_past, config=config, **kwargs @@ -232,7 +231,7 @@ def prepare_inputs_for_generation( cross_attn_head_mask=None, use_cache=None, encoder_outputs=None, - **kwargs + **kwargs, ) -> Dict: return { "decoder_input_ids": input_ids, diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index 0b6b726fd0..cb50c0114b 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -17,16 +17,12 @@ import logging from itertools import chain from pathlib import Path -from typing import Callable, Dict, List, Optional, Tuple, Union +from typing import Callable, Dict, Optional, Tuple, Union +import openvino import torch import transformers from datasets import Dataset, load_dataset -from torch.onnx import export as onnx_export -from torch.utils.data import DataLoader, RandomSampler -from transformers import DataCollator, PreTrainedModel, default_data_collator - -import openvino from huggingface_hub import HfApi from nncf import NNCFConfig from nncf.torch import create_compressed_model, register_default_init_args @@ -35,6 +31,10 @@ from nncf.torch.nncf_network import NNCFNetwork from openvino._offline_transformations import compress_quantize_weights_transformation from openvino.runtime import Core +from torch.onnx import export as onnx_export +from torch.utils.data import DataLoader, RandomSampler +from transformers import DataCollator, PreTrainedModel, default_data_collator + from optimum.exporters import TasksManager from optimum.exporters.onnx import OnnxConfig from optimum.quantization_base import OptimumQuantizer @@ -42,7 +42,6 @@ from .configuration import OVConfig from .utils import ( MAX_ONNX_OPSET, - MAX_ONNX_OPSET_2022_2_0, MIN_ONNX_QDQ_OPSET, ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME, @@ -200,7 +199,7 @@ def _onnx_export( ): opset = min(config.DEFAULT_ONNX_OPSET, MAX_ONNX_OPSET) opset = opset if not ov_config.save_onnx_model else max(opset, MIN_ONNX_QDQ_OPSET) - model_inputs = dict((k, v.to(model.device)) for k, v in model_inputs.items()) + model_inputs = {k: v.to(model.device) for k, v in model_inputs.items()} # Create ordered inputs for the ONNX export of NNCFNetwork as keyword arguments are currently not supported inputs = tuple([model_inputs.pop(key, None) for key in self._export_input_names if len(model_inputs) != 0]) @@ -213,7 +212,7 @@ def _onnx_export( f=f, input_names=list(config.inputs.keys()), output_names=list(config.outputs.keys()), - dynamic_axes={name: axes for name, axes in chain(config.inputs.items(), config.outputs.items())}, + dynamic_axes=dict(chain(config.inputs.items(), config.outputs.items())), do_constant_folding=True, opset_version=opset, ) @@ -233,7 +232,7 @@ def _set_task(self): "The task defining the model topology could not be extracted and needs to be specified for the ONNX export." ) if self.task in ["seq2seq-lm", "translation", "summarization"]: - raise ValueError(f"Seq2Seq models are currently not supported for post-training static quantization.") + raise ValueError("Seq2Seq models are currently not supported for post-training static quantization.") def get_calibration_dataset( self, diff --git a/optimum/intel/openvino/trainer.py b/optimum/intel/openvino/trainer.py index 843cc47ddc..c55d5c6bfc 100644 --- a/optimum/intel/openvino/trainer.py +++ b/optimum/intel/openvino/trainer.py @@ -23,9 +23,29 @@ from pathlib import Path from typing import Callable, Dict, List, Optional, Tuple, Type, Union +import openvino +import openvino.runtime import torch import torch.distributed as dist import torch.nn.functional as F +from nncf import NNCFConfig +from nncf.common.logging.logger import nncf_logger, set_log_level +from nncf.common.utils.tensorboard import prepare_for_tensorboard +from nncf.config.structures import BNAdaptationInitArgs, QuantizationRangeInitArgs +from nncf.experimental.torch.sparsity.movement.algo import MovementSparsityController +from nncf.experimental.torch.sparsity.movement.scheduler import MovementSchedulerStage +from nncf.torch import create_compressed_model +from nncf.torch.composite_compression import PTCompositeCompressionAlgorithmController +from nncf.torch.compression_method_api import PTCompressionAlgorithmController +from nncf.torch.nncf_network import NNCFNetwork +from nncf.torch.quantization.algo import QuantizationController +from openvino._offline_transformations import compress_quantize_weights_transformation +from openvino.runtime import Core, PartialShape, serialize +from openvino.tools.mo.back.offline_transformations import ( + apply_fused_names_cleanup, + apply_moc_transformations, + apply_user_transformations, +) from torch.onnx import export as onnx_export from torch.utils.data import DataLoader, Dataset, RandomSampler from torch.utils.data.distributed import DistributedSampler @@ -51,43 +71,20 @@ ) from transformers.utils import ( WEIGHTS_NAME, - TensorType, is_apex_available, is_sagemaker_mp_enabled, is_torch_tpu_available, logging, ) -import openvino -import openvino.runtime -from nncf import NNCFConfig -from nncf.common.logging.logger import nncf_logger, set_log_level -from nncf.common.utils.tensorboard import prepare_for_tensorboard -from nncf.config.structures import BNAdaptationInitArgs, QuantizationRangeInitArgs -from nncf.experimental.torch.sparsity.movement.algo import MovementSparsityController -from nncf.experimental.torch.sparsity.movement.scheduler import MovementSchedulerStage -from nncf.torch import create_compressed_model -from nncf.torch.composite_compression import PTCompositeCompressionAlgorithmController -from nncf.torch.compression_method_api import PTCompressionAlgorithmController -from nncf.torch.nncf_network import NNCFNetwork -from nncf.torch.quantization.algo import QuantizationController -from openvino._offline_transformations import compress_quantize_weights_transformation -from openvino.runtime import Core, PartialShape, serialize -from openvino.tools.mo.back.offline_transformations import ( - apply_fused_names_cleanup, - apply_moc_transformations, - apply_user_transformations, -) from optimum.exporters import TasksManager from optimum.exporters.onnx import OnnxConfig -from optimum.utils import logging from .configuration import OVConfig from .quantization import OVDataLoader from .training_args import OVTrainingArguments from .utils import ( MAX_ONNX_OPSET, - MAX_ONNX_OPSET_2022_2_0, MIN_ONNX_QDQ_OPSET, ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME, @@ -749,7 +746,7 @@ def _should_apply_pruning_transform(self) -> bool: ) def _reshape_ir(self, ov_model: openvino.runtime.Model, static_shape: bool) -> openvino.runtime.Model: - new_input_cfg = dict() + new_input_cfg = {} input_name_vs_shape = {item["keyword"]: item["sample_size"] for item in self.ov_config.input_info} for input_ in ov_model.inputs: if static_shape is True: @@ -774,7 +771,7 @@ def _onnx_export(self, model: NNCFNetwork, config: OnnxConfig, ov_config: OVConf opset = opset if not ov_config.save_onnx_model else max(opset, MIN_ONNX_QDQ_OPSET) model_inputs = config.generate_dummy_inputs(framework="pt") device = model.device - model_inputs = dict((k, v.to(device)) for k, v in model_inputs.items()) + model_inputs = {k: v.to(device) for k, v in model_inputs.items()} self._set_signature_columns_if_needed() # find model input names needed in ONNX export # Create ordered inputs for the ONNX export of NNCFNetwork as keyword arguments are currently not supported inputs = tuple([model_inputs.pop(key, None) for key in self._signature_columns if len(model_inputs) != 0]) @@ -789,7 +786,7 @@ def _onnx_export(self, model: NNCFNetwork, config: OnnxConfig, ov_config: OVConf f=f, input_names=list(config.inputs.keys()), output_names=list(config.outputs.keys()), - dynamic_axes={name: axes for name, axes in chain(config.inputs.items(), config.outputs.items())}, + dynamic_axes=dict(chain(config.inputs.items(), config.outputs.items())), do_constant_folding=True, opset_version=opset, ) diff --git a/optimum/intel/utils/import_utils.py b/optimum/intel/utils/import_utils.py index 427850b314..38bce51192 100644 --- a/optimum/intel/utils/import_utils.py +++ b/optimum/intel/utils/import_utils.py @@ -16,7 +16,7 @@ import operator as op import sys from collections import OrderedDict -from typing import Any, List, Union +from typing import Union from packaging.version import Version, parse @@ -183,27 +183,27 @@ def is_torch_version(operation: str, version: str): DIFFUSERS_IMPORT_ERROR = """ -{0} requires the diffusers library but it was not found in your environment. You can install it with pip: +{0} requires the diffusers library but it was not found in your environment. You can install it with pip: `pip install diffusers`. Please note that you may need to restart your runtime after installation. """ IPEX_IMPORT_ERROR = """ -{0} requires the ipex library but it was not found in your environment. You can install it with pip: +{0} requires the ipex library but it was not found in your environment. You can install it with pip: `pip install intel_extension_for_pytorch`. Please note that you may need to restart your runtime after installation. """ NNCF_IMPORT_ERROR = """ -{0} requires the nncf library but it was not found in your environment. You can install it with pip: +{0} requires the nncf library but it was not found in your environment. You can install it with pip: `pip install nncf`. Please note that you may need to restart your runtime after installation. """ OPENVINO_IMPORT_ERROR = """ -{0} requires the openvino library but it was not found in your environment. You can install it with pip: +{0} requires the openvino library but it was not found in your environment. You can install it with pip: `pip install openvino`. Please note that you may need to restart your runtime after installation. """ NEURAL_COMPRESSOR_IMPORT_ERROR = """ -{0} requires the neural-compressor library but it was not found in your environment. You can install it with pip: +{0} requires the neural-compressor library but it was not found in your environment. You can install it with pip: `pip install neural-compressor`. Please note that you may need to restart your runtime after installation. """ diff --git a/pyproject.toml b/pyproject.toml index b2a989ef21..62589e113c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,4 +14,18 @@ [tool.black] line-length = 119 -target-version = ['py35'] +target-version = ['py37'] + +[tool.ruff] +# Never enforce `E501` (line length violations). +ignore = ["C901", "E501", "E741", "W605"] +select = ["C", "E", "F", "I", "W"] +line-length = 119 + +# Ignore import violations in all `__init__.py` files. +[tool.ruff.per-file-ignores] +"__init__.py" = ["E402", "F401", "F403", "F811"] + +[tool.ruff.isort] +lines-after-imports = 2 +known-first-party = ["optimum"] diff --git a/setup.py b/setup.py index 95a31b1b7c..8c9ceca820 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ TESTS_REQUIRE = ["pytest", "parameterized", "Pillow", "evaluate", "diffusers", "py-cpuinfo"] -QUALITY_REQUIRE = ["black==22.3", "isort>=5.5.4"] +QUALITY_REQUIRE = ["black~=23.1", "ruff>=0.0.241"] EXTRAS_REQUIRE = { "neural-compressor": [ diff --git a/tests/ipex/test_inference.py b/tests/ipex/test_inference.py index d1d6e9c87f..4ecfafe9a9 100644 --- a/tests/ipex/test_inference.py +++ b/tests/ipex/test_inference.py @@ -12,31 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os -import tempfile import unittest import torch +from parameterized import parameterized # TODO : add more tasks from transformers import ( - AutoFeatureExtractor, - AutoModel, - AutoModelForAudioClassification, - AutoModelForCausalLM, - AutoModelForImageClassification, - AutoModelForMaskedLM, - AutoModelForQuestionAnswering, - AutoModelForSeq2SeqLM, AutoModelForSequenceClassification, AutoModelForTokenClassification, AutoTokenizer, pipeline, ) -from evaluate import evaluator from optimum.intel import inference_mode as ipex_inference_mode -from parameterized import parameterized MODEL_NAMES = { diff --git a/tests/neural_compressor/test_neural_coder_adaptor.py b/tests/neural_compressor/test_neural_coder_adaptor.py index d6de437c52..adc90babfc 100644 --- a/tests/neural_compressor/test_neural_coder_adaptor.py +++ b/tests/neural_compressor/test_neural_coder_adaptor.py @@ -4,6 +4,7 @@ import unittest import requests + from optimum.intel.neural_compressor.neural_coder_adaptor import NeuralCoderAdaptor diff --git a/tests/neural_compressor/test_optimization.py b/tests/neural_compressor/test_optimization.py index 0d2d080cf2..d478a9d0ea 100644 --- a/tests/neural_compressor/test_optimization.py +++ b/tests/neural_compressor/test_optimization.py @@ -17,14 +17,23 @@ import unittest from functools import partial +import evaluate import numpy as np import torch from datasets import load_dataset +from neural_compressor.config import ( + AccuracyCriterion, + DistillationConfig, + PostTrainingQuantConfig, + QuantizationAwareTrainingConfig, + TuningCriterion, + WeightPruningConfig, +) +from onnx import load as onnx_load from transformers import ( AutoModelForQuestionAnswering, AutoModelForSequenceClassification, AutoTokenizer, - BertForSequenceClassification, EvalPrediction, TrainingArguments, default_data_collator, @@ -32,16 +41,6 @@ set_seed, ) -import evaluate -from neural_compressor.config import ( - AccuracyCriterion, - DistillationConfig, - PostTrainingQuantConfig, - QuantizationAwareTrainingConfig, - TuningCriterion, - WeightPruningConfig, -) -from onnx import load as onnx_load from optimum.intel import ( INCConfig, INCModelForQuestionAnswering, @@ -49,7 +48,7 @@ INCQuantizer, INCTrainer, ) -from optimum.onnxruntime import ORTModelForQuestionAnswering, ORTModelForSequenceClassification +from optimum.onnxruntime import ORTModelForSequenceClassification os.environ["CUDA_VISIBLE_DEVICES"] = "" @@ -113,7 +112,7 @@ def eval_fn(model): quantization_config = PostTrainingQuantConfig( approach="dynamic", accuracy_criterion=accuracy_criterion, tuning_criterion=tuning_criterion ) - tokens = tokenizer("This is a sample input", return_tensors="pt") + tokenizer("This is a sample input", return_tensors="pt") with tempfile.TemporaryDirectory() as tmp_dir: quantizer = INCQuantizer.from_pretrained(model, eval_fn=eval_fn) @@ -248,8 +247,8 @@ def compute_metrics(p: EvalPrediction): tokenizer=tokenizer, data_collator=default_data_collator, ) - train_result = trainer.train() - metrics = trainer.evaluate() + trainer.train() + trainer.evaluate() trainer.save_model(save_onnx_model=True) loaded_model = INCModelForSequenceClassification.from_pretrained(tmp_dir) ort_model = ORTModelForSequenceClassification.from_pretrained(tmp_dir) @@ -310,8 +309,8 @@ def compute_metrics(p: EvalPrediction): tokenizer=tokenizer, data_collator=default_data_collator, ) - train_result = trainer.train() - metrics = trainer.evaluate() + trainer.train() + trainer.evaluate() trainer.save_model(save_onnx_model=True) inc_config = INCConfig.from_pretrained(tmp_dir) @@ -323,7 +322,7 @@ def compute_metrics(p: EvalPrediction): ort_outputs = ort_model(**tokens) self.assertTrue("logits" in ort_outputs) with torch.no_grad(): - transformers_outputs = transformers_model(**tokens) + transformers_model(**tokens) # self.assertTrue(torch.allclose(ort_outputs.logits, transformers_outputs.logits, atol=1e-4)) @@ -363,8 +362,8 @@ def compute_metrics(p: EvalPrediction): tokenizer=tokenizer, data_collator=default_data_collator, ) - train_result = trainer.train() - metrics = trainer.evaluate() + trainer.train() + trainer.evaluate() trainer.save_model(save_onnx_model=True) inc_config = INCConfig.from_pretrained(tmp_dir) @@ -411,8 +410,8 @@ def compute_metrics(p: EvalPrediction): tokenizer=tokenizer, data_collator=default_data_collator, ) - train_result = trainer.train() - metrics = trainer.evaluate() + trainer.train() + trainer.evaluate() trainer.save_model(save_onnx_model=True) inc_config = INCConfig.from_pretrained(tmp_dir) diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index e66ca3c4a1..3fffe9a2e3 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -20,8 +20,11 @@ from typing import Dict import numpy as np +import requests import torch from datasets import load_dataset +from evaluate import evaluator +from parameterized import parameterized from PIL import Image from transformers import ( AutoFeatureExtractor, @@ -40,8 +43,6 @@ set_seed, ) -import requests -from evaluate import evaluator from optimum.intel.openvino import ( OV_DECODER_NAME, OV_DECODER_WITH_PAST_NAME, @@ -66,14 +67,12 @@ ) from optimum.intel.openvino.modeling_seq2seq import OVDecoder, OVEncoder from optimum.utils import ( - CONFIG_NAME, DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER, DIFFUSION_MODEL_UNET_SUBFOLDER, DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER, DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER, ) from optimum.utils.testing_utils import require_diffusers -from parameterized import parameterized MODEL_NAMES = { @@ -401,7 +400,7 @@ def test_compare_to_transformers(self, model_arch): self.assertIsInstance(ov_model.config, PretrainedConfig) transformers_model = AutoModelForCausalLM.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) - tokens = tokenizer(f"This is a sample", return_tensors="pt") + tokens = tokenizer("This is a sample", return_tensors="pt") ov_outputs = ov_model(**tokens) self.assertTrue("logits" in ov_outputs) self.assertIsInstance(ov_outputs.logits, torch.Tensor) @@ -417,7 +416,7 @@ def test_pipeline(self, model_arch): model = OVModelForCausalLM.from_pretrained(model_id, from_transformers=True) tokenizer = AutoTokenizer.from_pretrained(model_id) pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) - outputs = pipe(f"This is a sample", max_length=10) + outputs = pipe("This is a sample", max_length=10) self.assertEqual(pipe.device, model.device) self.assertTrue(all(["This is a sample" in item["generated_text"] for item in outputs])) gc.collect() diff --git a/tests/openvino/test_modeling_basic.py b/tests/openvino/test_modeling_basic.py index 35c512bd62..a443c5fea7 100644 --- a/tests/openvino/test_modeling_basic.py +++ b/tests/openvino/test_modeling_basic.py @@ -4,18 +4,29 @@ - Do inference with appropriate pipeline - Save the model to disk -This test is meant to run quickly with tiny test models. More extensive tests are in +This test is meant to run quickly with tiny test models. More extensive tests are in test_modeling.py. """ +# ruff: noqa import gc import unittest +from parameterized import parameterized from transformers import AutoTokenizer, pipeline -from huggingface_hub import HfApi -from optimum.intel.openvino import * -from parameterized import parameterized +from optimum.intel import ( + OVModelForAudioClassification, + OVModelForCausalLM, + OVModelForFeatureExtraction, + OVModelForImageClassification, + OVModelForMaskedLM, + OVModelForQuestionAnswering, + OVModelForSeq2SeqLM, + OVModelForSequenceClassification, + OVModelForTokenClassification, + OVStableDiffusionPipeline, +) # Make sure that common architectures are used in combination with common tasks diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index 60dd56735e..6cbcc36f91 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -16,8 +16,10 @@ import unittest from functools import partial +import evaluate import numpy as np from datasets import load_dataset +from parameterized import parameterized from transformers import ( AutoModelForQuestionAnswering, AutoModelForSequenceClassification, @@ -26,7 +28,6 @@ default_data_collator, ) -import evaluate from optimum.intel import ( OVConfig, OVModelForQuestionAnswering, @@ -34,7 +35,6 @@ OVQuantizer, OVTrainer, ) -from parameterized import parameterized class OVQuantizerTest(unittest.TestCase): @@ -110,7 +110,7 @@ def preprocess_function(examples, tokenizer): tokens = tokenizer.encode_plus( "This is a sample question", "This is a sample context", add_special_tokens=True, return_tensors="pt" ) - outputs = model(**tokens, return_dict=True) + model(**tokens, return_dict=True) # Test loading model a second time to catch issues with caching try: @@ -134,9 +134,9 @@ def test_aware_training_quantization(self, model_name, expected_fake_quantize, e train_dataset = dataset["train"].select(range(16)) eval_dataset = dataset["validation"].select(range(16)) metric = evaluate.load("glue", "sst2") - compute_metrics = lambda p: metric.compute( - predictions=np.argmax(p.predictions, axis=1), references=p.label_ids - ) + + def compute_metrics(p): + return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids) with tempfile.TemporaryDirectory() as tmp_dir: trainer = OVTrainer( @@ -150,8 +150,8 @@ def test_aware_training_quantization(self, model_name, expected_fake_quantize, e tokenizer=tokenizer, data_collator=default_data_collator, ) - train_result = trainer.train() - metrics = trainer.evaluate() + trainer.train() + trainer.evaluate() trainer.save_model() model = OVModelForSequenceClassification.from_pretrained(tmp_dir) diff --git a/tests/openvino/test_training.py b/tests/openvino/test_training.py index 8901ede1b6..33a9649b8d 100644 --- a/tests/openvino/test_training.py +++ b/tests/openvino/test_training.py @@ -25,13 +25,16 @@ from pathlib import Path from typing import Dict, List, Optional, Union +import cpuinfo +import evaluate import numpy as np import torch from datasets import load_dataset +from nncf.experimental.torch.sparsity.movement.algo import MovementSparsityController +from parameterized import parameterized from transformers import ( AutoFeatureExtractor, AutoImageProcessor, - AutoModel, AutoModelForAudioClassification, AutoModelForImageClassification, AutoModelForSequenceClassification, @@ -41,9 +44,6 @@ from transformers.trainer_utils import EvalPrediction, TrainOutput from transformers.utils import WEIGHTS_NAME -import cpuinfo -import evaluate -from nncf.experimental.torch.sparsity.movement.algo import MovementSparsityController from optimum.intel.openvino import OVTrainingArguments from optimum.intel.openvino.configuration import DEFAULT_QUANTIZATION_CONFIG, OVConfig from optimum.intel.openvino.modeling import ( @@ -54,7 +54,6 @@ ) from optimum.intel.openvino.trainer import OVTrainer from optimum.intel.openvino.utils import OV_XML_FILE_NAME -from parameterized import parameterized def initialize_movement_sparsifier_parameters_by_sparsity( diff --git a/tests/openvino/test_training_examples.py b/tests/openvino/test_training_examples.py index ed3cebd869..4528e526ad 100644 --- a/tests/openvino/test_training_examples.py +++ b/tests/openvino/test_training_examples.py @@ -23,9 +23,9 @@ import torch import torch.cuda +from parameterized import parameterized from optimum.intel.openvino.utils import OV_XML_FILE_NAME -from parameterized import parameterized PROJECT_ROOT = Path(__file__).parents[2]