diff --git a/examples/image_recognition/resnet50/quantization/ptq_static/README.md b/examples/image_recognition/resnet50/quantization/ptq_static/README.md
index d88e7bbe1..b8145eff8 100644
--- a/examples/image_recognition/resnet50/quantization/ptq_static/README.md
+++ b/examples/image_recognition/resnet50/quantization/ptq_static/README.md
@@ -11,7 +11,6 @@ pip install onnx-neural-compressor
 pip install -r requirements.txt
 ```
 
-> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment).
 
 ## 2. Prepare Model
 
diff --git a/examples/image_recognition/resnet50/quantization/ptq_static/main.py b/examples/image_recognition/resnet50/quantization/ptq_static/main.py
index 232a12912..cc82d49b4 100644
--- a/examples/image_recognition/resnet50/quantization/ptq_static/main.py
+++ b/examples/image_recognition/resnet50/quantization/ptq_static/main.py
@@ -16,26 +16,28 @@
 # under the License.
 # pylint:disable=redefined-outer-name,logging-format-interpolation
 
-import logging
 import argparse
+import collections
+import logging
+import os
+import re
+import time
+
 import cv2
 import numpy as np
 import onnx
-import re
-import os
-import collections
-from PIL import Image
 import onnxruntime as ort
+from PIL import Image
 from sklearn import metrics
-from onnx_neural_compressor import data_reader
-from onnx_neural_compressor import config
-from onnx_neural_compressor import quantization
+
+from onnx_neural_compressor import config, data_reader, quantization
 from onnx_neural_compressor.quantization import tuning
 
 logger = logging.getLogger(__name__)
-logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
-                    datefmt = '%m/%d/%Y %H:%M:%S',
-                    level = logging.WARN)
+logging.basicConfig(
+    format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.WARN
+)
+
 
 def _topk_shape_validate(preds, labels):
     # preds shape can be Nxclass_num or class_num(N=1 by default)
@@ -79,13 +81,14 @@ def _topk_shape_validate(preds, labels):
         class_num = preds.shape[1]
 
     label_N = labels.shape[0]
-    assert label_N == N, 'labels batch size should same with preds'
+    assert label_N == N, "labels batch size should same with preds"
     labels = labels.reshape([N, -1])
     # one-hot labels will have 2 dimension not equal 1
     if labels.shape[1] != 1:
         labels = labels.argsort()[..., -1:]
     return preds, labels
 
+
 class TopK:
     def __init__(self, k=1):
         self.k = k
@@ -94,7 +97,7 @@ def __init__(self, k=1):
 
     def update(self, preds, labels, sample_weight=None):
         preds, labels = _topk_shape_validate(preds, labels)
-        preds = preds.argsort()[..., -self.k:]
+        preds = preds.argsort()[..., -self.k :]
         if self.k == 1:
             correct = metrics.accuracy_score(preds, labels, normalize=False)
             self.num_correct += correct
@@ -103,7 +106,7 @@ def update(self, preds, labels, sample_weight=None):
             for p, l in zip(preds, labels):
                 # get top-k labels with np.argpartition
                 # p = np.argpartition(p, -self.k)[-self.k:]
-                l = l.astype('int32')
+                l = l.astype("int32")
                 if l in p:
                     self.num_correct += 1
 
@@ -128,7 +131,7 @@ def __init__(self, model_path, dataset_location, image_list, batch_size=1, calib
         src_lst = []
         label_lst = []
         num = 0
-        with open(image_list, 'r') as f:
+        with open(image_list, "r") as f:
             for s in f:
                 image_name, label = re.split(r"\s+", s.strip())
                 src = os.path.join(dataset_location, image_name)
@@ -153,18 +156,18 @@ def __init__(self, model_path, dataset_location, image_list, batch_size=1, calib
 
     def _preprpcess(self, src):
         with Image.open(src) as image:
-            image = np.array(image.convert('RGB')).astype(np.float32)
-            image = image / 255.
+            image = np.array(image.convert("RGB")).astype(np.float32)
+            image = image / 255.0
             image = cv2.resize(image, (256, 256), interpolation=cv2.INTER_LINEAR)
 
             h, w = image.shape[0], image.shape[1]
 
             y0 = (h - 224) // 2
             x0 = (w - 224) // 2
-            image = image[y0:y0 + 224, x0:x0 + 224, :]
+            image = image[y0 : y0 + 224, x0 : x0 + 224, :]
             image = (image - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]
             image = image.transpose((2, 0, 1))
-        return image.astype('float32')
+        return image.astype("float32")
 
     def get_next(self):
         lst = next(self.iter_next, None)
@@ -186,54 +189,22 @@ def eval_func(model, dataloader, metric):
         metric.update(output, labels[idx])
     return metric.result()
 
+
 if __name__ == "__main__":
     logger.info("Evaluating ONNXRuntime full precision accuracy and performance:")
     parser = argparse.ArgumentParser(
         description="Resnet50 fine-tune examples for image classification tasks.",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-    parser.add_argument(
-        '--model_path',
-        type=str,
-        help="Pre-trained model on onnx file"
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
+    parser.add_argument("--model_path", type=str, help="Pre-trained model on onnx file")
+    parser.add_argument("--dataset_location", type=str, help="Imagenet data path")
+    parser.add_argument("--label_path", type=str, help="Imagenet label path")
+    parser.add_argument("--benchmark", action="store_true", default=False)
+    parser.add_argument("--tune", action="store_true", default=False, help="whether quantize the model")
+    parser.add_argument("--output_model", type=str, help="output model path")
+    parser.add_argument("--mode", type=str, help="benchmark mode of performance or accuracy")
     parser.add_argument(
-        '--dataset_location',
-        type=str,
-        help="Imagenet data path"
-    )
-    parser.add_argument(
-        '--label_path',
-        type=str,
-        help="Imagenet label path"
-    )
-    parser.add_argument(
-        '--benchmark',
-        action='store_true', \
-        default=False
-    )
-    parser.add_argument(
-        '--tune',
-        action='store_true', \
-        default=False,
-        help="whether quantize the model"
-    )
-    parser.add_argument(
-        '--output_model',
-        type=str,
-        help="output model path"
-    )
-    parser.add_argument(
-        '--mode',
-        type=str,
-        help="benchmark mode of performance or accuracy"
-    )
-    parser.add_argument(
-        '--quant_format',
-        type=str,
-        default='QOperator',
-        choices=['QDQ', 'QOperator'],
-        help="quantization format"
+        "--quant_format", type=str, default="QOperator", choices=["QDQ", "QOperator"], help="quantization format"
     )
     parser.add_argument(
         "--batch_size",
@@ -245,25 +216,26 @@ def eval_func(model, dataloader, metric):
     model = onnx.load(args.model_path)
     top1 = TopK()
     dataloader = DataReader(args.model_path, args.dataset_location, args.label_path, args.batch_size)
+
     def eval(onnx_model):
         dataloader.rewind()
         return eval_func(onnx_model, dataloader, top1)
 
     if args.benchmark:
-        if args.mode == 'performance':
+        if args.mode == "performance":
             total_time = 0.0
             num_iter = 100
             num_warmup = 10
 
             sess_options = ort.SessionOptions()
             sess_options.intra_op_num_threads = args.intra_op_num_threads
-            session = onnxruntime.InferenceSession(model.SerializeToString(),
-                                                   sess_options,
-                                                   providers=onnxruntime.get_available_providers())
+            session = ort.InferenceSession(
+                model.SerializeToString(), sess_options, providers=ort.get_available_providers()
+            )
             ort_inputs = {}
             len_inputs = len(session.get_inputs())
             inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)]
-            
+
             for idx, batch in enumerate(dataloader):
                 if idx + 1 > num_iter:
                     break
@@ -277,17 +249,23 @@ def eval(onnx_model):
             print(args)
             throughput = (num_iter - num_warmup) / total_time
             print("Throughput: {} samples/s".format(throughput))
-        elif args.mode == 'accuracy':
+        elif args.mode == "accuracy":
             acc_result = eval_func(model, dataloader, top1)
             print("Batch size = %d" % dataloader.batch_size)
             print("Accuracy: %.5f" % acc_result)
 
     if args.tune:
-        calibration_data_reader = DataReader(args.model_path, args.dataset_location, args.label_path, args.batch_size, calibration_sampling_size=100)
+        calibration_data_reader = DataReader(
+            args.model_path, args.dataset_location, args.label_path, args.batch_size, calibration_sampling_size=100
+        )
 
         custom_tune_config = tuning.TuningConfig(
             config_set=config.StaticQuantConfig.get_config_set_for_tuning(
-                quant_format=quantization.QuantFormat.QOperator if args.quant_format == "QOperator" else quantization.QuantFormat.QDQ,
+                quant_format=(
+                    quantization.QuantFormat.QOperator
+                    if args.quant_format == "QOperator"
+                    else quantization.QuantFormat.QDQ
+                ),
             )
         )
         best_model = tuning.autotune(
diff --git a/examples/image_recognition/resnet50/quantization/ptq_static/run_benchmark.sh b/examples/image_recognition/resnet50/quantization/ptq_static/run_benchmark.sh
index 9923a2857..2d87088e3 100644
--- a/examples/image_recognition/resnet50/quantization/ptq_static/run_benchmark.sh
+++ b/examples/image_recognition/resnet50/quantization/ptq_static/run_benchmark.sh
@@ -13,16 +13,16 @@ function init_params {
   do
     case $var in
       --input_model=*)
-          input_model=$(echo $var |cut -f2 -d=)
+          input_model=$(echo "$var" |cut -f2 -d=)
       ;;
       --dataset_location=*)
-          dataset_location=$(echo $var |cut -f2 -d=)
+          dataset_location=$(echo "$var" |cut -f2 -d=)
       ;;
       --label_path=*)
-          label_path=$(echo $var |cut -f2 -d=)
+          label_path=$(echo "$var" |cut -f2 -d=)
       ;;
       --mode=*)
-          mode=$(echo $var |cut -f2 -d=)
+          mode=$(echo "$var" |cut -f2 -d=)
       ;;
     esac
   done
@@ -33,10 +33,10 @@ function init_params {
 function run_benchmark {
 
     python main.py \
-            --model_path ${input_model} \
-            --dataset_location ${dataset_location} \
-            --label_path ${label_path-${dataset_location}/../val.txt} \
-            --mode=${mode} \
+            --model_path "${input_model}" \
+            --dataset_location "${dataset_location}" \
+            --label_path "${label_path-${dataset_location}/../val.txt}" \
+            --mode="${mode}" \
             --batch_size 1 \
             --benchmark
             
diff --git a/examples/image_recognition/resnet50/quantization/ptq_static/run_quant.sh b/examples/image_recognition/resnet50/quantization/ptq_static/run_quant.sh
index c7f7a0401..0e44d8d02 100644
--- a/examples/image_recognition/resnet50/quantization/ptq_static/run_quant.sh
+++ b/examples/image_recognition/resnet50/quantization/ptq_static/run_quant.sh
@@ -14,19 +14,19 @@ function init_params {
   do
     case $var in
       --input_model=*)
-          input_model=$(echo $var |cut -f2 -d=)
+          input_model=$(echo "$var" |cut -f2 -d=)
       ;;
       --output_model=*)
-          output_model=$(echo $var |cut -f2 -d=)
+          output_model=$(echo "$var" |cut -f2 -d=)
       ;;
       --dataset_location=*)
-          dataset_location=$(echo $var |cut -f2 -d=)
+          dataset_location=$(echo "$var" |cut -f2 -d=)
       ;;
       --label_path=*)
-          label_path=$(echo $var |cut -f2 -d=)
+          label_path=$(echo "$var" |cut -f2 -d=)
       ;;
       --quant_format=*)
-          quant_format=$(echo $var |cut -f2 -d=)
+          quant_format=$(echo "$var" |cut -f2 -d=)
       ;;
     esac
   done
@@ -36,11 +36,11 @@ function init_params {
 # run_tuning
 function run_tuning {
     python main.py \
-            --model_path ${input_model} \
-            --dataset_location ${dataset_location} \
-            --label_path ${label_path-${dataset_location}/../val.txt} \
-            --output_model ${output_model} \
-            --quant_format ${quant_format-QOperator} \
+            --model_path "${input_model}" \
+            --dataset_location "${dataset_location}" \
+            --label_path "${label_path-${dataset_location}/../val.txt}" \
+            --output_model "${output_model}" \
+            --quant_format "${quant_format-QOperator}" \
             --tune
 }
 
diff --git a/examples/nlp/bert/quantization/ptq_dynamic/README.md b/examples/nlp/bert/quantization/ptq_dynamic/README.md
index dab252bcb..212c8b899 100644
--- a/examples/nlp/bert/quantization/ptq_dynamic/README.md
+++ b/examples/nlp/bert/quantization/ptq_dynamic/README.md
@@ -11,7 +11,6 @@ pip install onnx-neural-compressor
 pip install -r requirements.txt
 ```
 
-> Note: Validated ONNX Runtime [Version](/docs/installation_guide.md#validated-software-environment).
 
 ## 2. Prepare Dataset
 
diff --git a/examples/nlp/bert/quantization/ptq_dynamic/main.py b/examples/nlp/bert/quantization/ptq_dynamic/main.py
index 781b6e8c5..0298054d7 100644
--- a/examples/nlp/bert/quantization/ptq_dynamic/main.py
+++ b/examples/nlp/bert/quantization/ptq_dynamic/main.py
@@ -16,28 +16,32 @@
 # under the License.
 # pylint:disable=redefined-outer-name,logging-format-interpolation
 
+import argparse
+import dataclasses
 import logging
+import os
 import pathlib
 import tempfile
-import argparse
-import os
+from typing import List, Optional, Union
+
+import numpy as np
 import onnx
 import onnxruntime
-import transformers
+import time
 import torch
-import numpy as np
-import dataclasses
-from typing import List, Optional, Union
+import transformers
+from onnxruntime.transformers import optimizer
+from onnxruntime.transformers.fusion_options import FusionOptions
 from torch.utils import data
+
 from onnx_neural_compressor import config
 from onnx_neural_compressor.quantization import tuning
-from onnxruntime.transformers import optimizer
-from onnxruntime.transformers.fusion_options import FusionOptions
 
 logger = logging.getLogger(__name__)
-logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
-                    datefmt = '%m/%d/%Y %H:%M:%S',
-                    level = logging.WARN)
+logging.basicConfig(
+    format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.WARN
+)
+
 
 class ONNXRTBertDataset:
     """Dataset used for model Bert.
@@ -59,57 +63,77 @@ class ONNXRTBertDataset:
           filter (Filter objects, default=None): filter out examples according
                                                  to specific conditions.
     """
-    def __init__(self, model, data_dir, model_name_or_path, max_seq_length=128,\
-                do_lower_case=True, task='mrpc', model_type='bert', dynamic_length=False,\
-                evaluate=True, transform=None, filter=None):
+
+    def __init__(
+        self,
+        model,
+        data_dir,
+        model_name_or_path,
+        max_seq_length=128,
+        do_lower_case=True,
+        task="mrpc",
+        model_type="bert",
+        dynamic_length=False,
+        evaluate=True,
+        transform=None,
+        filter=None,
+    ):
         self.inputs = [inp.name for inp in onnx.load(model).graph.input]
         task = task.lower()
         model_type = model_type.lower()
-        assert task in ['mrpc', 'qqp', 'qnli', 'rte', 'sts-b', 'cola', \
-            'mnli', 'wnli', 'sst-2'], 'Unsupported task type'
-        assert model_type in ['distilbert', 'bert', 'mobilebert', 'roberta'], 'Unsupported \
-            model type'
+        assert task in ["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", "mnli", "wnli", "sst-2"], "Unsupported task type"
+        assert model_type in [
+            "distilbert",
+            "bert",
+            "mobilebert",
+            "roberta",
+        ], "Unsupported \
+            model type"
         self.dynamic_length = dynamic_length
         self.model_type = model_type
         self.max_seq_length = max_seq_length
-        tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path,
-            do_lower_case=do_lower_case)
-        self.dataset = load_and_cache_examples(data_dir, model_name_or_path, \
-            max_seq_length, task, model_type, tokenizer, evaluate)
+        tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path, do_lower_case=do_lower_case)
+        self.dataset = load_and_cache_examples(
+            data_dir, model_name_or_path, max_seq_length, task, model_type, tokenizer, evaluate
+        )
 
     def __len__(self):
         return len(self.dataset)
 
     def __getitem__(self, index):
         batch = tuple(t.detach().cpu().numpy() if not isinstance(t, np.ndarray) else t for t in self.dataset[index])
-        return batch[:len(self.inputs)], batch[-1]
+        return batch[: len(self.inputs)], batch[-1]
+
 
-def load_and_cache_examples(data_dir, model_name_or_path, max_seq_length, task, \
-    model_type, tokenizer, evaluate):
+def load_and_cache_examples(data_dir, model_name_or_path, max_seq_length, task, model_type, tokenizer, evaluate):
     processor = transformers.glue_processors[task]()
     output_mode = transformers.glue_output_modes[task]
     # Load data features from cache or dataset file
     if not os.path.exists("./dataset_cached"):
         os.makedirs("./dataset_cached")
-    cached_features_file = os.path.join("./dataset_cached", 'cached_{}_{}_{}_{}'.format(
-        'dev' if evaluate else 'train',
-        list(filter(None, model_name_or_path.split('/'))).pop(),
-        str(max_seq_length),
-        str(task)))
+    cached_features_file = os.path.join(
+        "./dataset_cached",
+        "cached_{}_{}_{}_{}".format(
+            "dev" if evaluate else "train",
+            list(filter(None, model_name_or_path.split("/"))).pop(),
+            str(max_seq_length),
+            str(task),
+        ),
+    )
     if os.path.exists(cached_features_file):
         logger.info("Load features from cached file {}.".format(cached_features_file))
         features = torch.load(cached_features_file)
     else:
         logger.info("Create features from dataset file at {}.".format(data_dir))
         label_list = processor.get_labels()
-        examples = processor.get_dev_examples(data_dir) if evaluate else \
-            processor.get_train_examples(data_dir)
-        features = convert_examples_to_features(examples,
-                                                tokenizer,
-                                                task=task,
-                                                label_list=label_list,
-                                                max_length=max_seq_length,
-                                                output_mode=output_mode,
+        examples = processor.get_dev_examples(data_dir) if evaluate else processor.get_train_examples(data_dir)
+        features = convert_examples_to_features(
+            examples,
+            tokenizer,
+            task=task,
+            label_list=label_list,
+            max_length=max_seq_length,
+            output_mode=output_mode,
         )
         logger.info("Save features into cached file {}.".format(cached_features_file))
         torch.save(features, cached_features_file)
@@ -122,10 +146,10 @@ def load_and_cache_examples(data_dir, model_name_or_path, max_seq_length, task,
         all_labels = torch.tensor([f.label for f in features], dtype=torch.long)
     elif output_mode == "regression":
         all_labels = torch.tensor([f.label for f in features], dtype=torch.float)
-    dataset = data.TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, \
-        all_seq_lengths, all_labels)
+    dataset = data.TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, all_seq_lengths, all_labels)
     return dataset
 
+
 def convert_examples_to_features(
     examples,
     tokenizer,
@@ -143,7 +167,7 @@ def convert_examples_to_features(
         logger.info("Use label list {} for task {}.".format(label_list, task))
     label_map = {label: i for i, label in enumerate(label_list)}
     features = []
-    for (ex_index, example) in enumerate(examples):
+    for ex_index, example in enumerate(examples):
         inputs = tokenizer.encode_plus(
             example.text_a,
             example.text_b,
@@ -162,19 +186,14 @@ def convert_examples_to_features(
         padding_length = max_length - len(input_ids)
 
         input_ids = input_ids + ([pad_token] * padding_length)
-        attention_mask = attention_mask + \
-            ([0 if mask_padding_with_zero else 1] * padding_length)
+        attention_mask = attention_mask + ([0 if mask_padding_with_zero else 1] * padding_length)
         token_type_ids = token_type_ids + ([pad_token_segment_id] * padding_length)
 
-        assert len(input_ids) == max_length, \
-            "Error with input_ids length {} vs {}".format(
-            len(input_ids), max_length)
-        assert len(attention_mask) == max_length, \
-            "Error with attention_mask length {} vs {}".format(
+        assert len(input_ids) == max_length, "Error with input_ids length {} vs {}".format(len(input_ids), max_length)
+        assert len(attention_mask) == max_length, "Error with attention_mask length {} vs {}".format(
             len(attention_mask), max_length
         )
-        assert len(token_type_ids) == max_length, \
-            "Error with token_type_ids length {} vs {}".format(
+        assert len(token_type_ids) == max_length, "Error with token_type_ids length {} vs {}".format(
             len(token_type_ids), max_length
         )
         if output_mode == "classification":
@@ -194,6 +213,7 @@ def convert_examples_to_features(
         features.append(feats)
     return features
 
+
 @dataclasses.dataclass(frozen=True)
 class InputFeatures:
     """
@@ -217,6 +237,7 @@ class InputFeatures:
     label: Optional[Union[int, float]] = None
     seq_length: Optional[List[int]] = None
 
+
 class ONNXRTGLUE:
     """Computes GLUE score.
 
@@ -226,9 +247,9 @@ class ONNXRTGLUE:
                                   sts-b, cola, mnli, wnli.
 
     """
-    def __init__(self, task='mrpc'):
-        assert task in ['mrpc', 'qqp', 'qnli', 'rte', 'sts-b', 'cola', \
-            'mnli', 'wnli', 'sst-2'], 'Unsupported task type'
+
+    def __init__(self, task="mrpc"):
+        assert task in ["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", "mnli", "wnli", "sst-2"], "Unsupported task type"
         self.pred_list = None
         self.label_list = None
         self.task = task
@@ -241,7 +262,7 @@ def __init__(self, task='mrpc'):
             "qnli": "acc",
             "rte": "acc",
             "wnli": "acc",
-            "sst-2": "acc"
+            "sst-2": "acc",
         }
 
     def update(self, preds, labels):
@@ -270,102 +291,62 @@ def result(self):
             processed_preds = np.argmax(self.pred_list, axis=1)
         elif output_mode == "regression":
             processed_preds = np.squeeze(self.pred_list)
-        result = transformers.glue_compute_metrics(\
-            self.task, processed_preds, self.label_list)
+        result = transformers.glue_compute_metrics(self.task, processed_preds, self.label_list)
         return result[self.return_key[self.task]]
 
+
 if __name__ == "__main__":
-    logger.info('Evaluating ONNXRuntime full precision accuracy and performance:')
+    logger.info("Evaluating ONNXRuntime full precision accuracy and performance:")
     parser = argparse.ArgumentParser(
-    description='BERT fine-tune examples for classification/regression tasks.',
-    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument(
-        '--model_path',
-        type=str,
-        help="Pre-trained resnet50 model on onnx file"
+        description="BERT fine-tune examples for classification/regression tasks.",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
+    parser.add_argument("--model_path", type=str, help="Pre-trained resnet50 model on onnx file")
+    parser.add_argument("--benchmark", action="store_true", default=False)
+    parser.add_argument("--tune", action="store_true", default=False, help="whether quantize the model")
+    parser.add_argument("--output_model", type=str, help="output model path")
+    parser.add_argument("--mode", type=str, help="benchmark mode of performance or accuracy")
+    parser.add_argument("--model_name_or_path", type=str, help="pretrained model name or path")
+    parser.add_argument("--data_path", type=str, help="input data path")
     parser.add_argument(
-        '--benchmark',
-        action='store_true', \
-        default=False
-    )
-    parser.add_argument(
-        '--tune',
-        action='store_true', \
-        default=False,
-        help="whether quantize the model"
-    )
-    parser.add_argument(
-        '--output_model',
-        type=str,
-        help="output model path"
-    )
-    parser.add_argument(
-        '--mode',
-        type=str,
-        help="benchmark mode of performance or accuracy"
-    )
-    parser.add_argument(
-        '--model_name_or_path',
-        type=str,
-        help="pretrained model name or path"
-    )
-    parser.add_argument(
-        '--data_path',
-        type=str,
-        help="input data path"
-    )
-    parser.add_argument(
-        '--batch_size',
+        "--batch_size",
         default=8,
         type=int,
     )
     parser.add_argument(
-        '--task',
+        "--task",
         type=str,
-        default='mrpc',
-        choices=['mrpc', 'qqp', 'qnli', 'rte', 'sts-b', 'cola', \
-                'mnli', 'wnli', 'sst-2'],
-        help="GLUE task name"
-    )
-    parser.add_argument(
-        "--dynamic_length",
-        type=bool,
-        default=False, 
-        help="dynamic length"
-    )
-    parser.add_argument(
-        "--max_seq_length",
-        type=int,
-        default=128, 
-        help="max sequence length"
+        default="mrpc",
+        choices=["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", "mnli", "wnli", "sst-2"],
+        help="GLUE task name",
     )
+    parser.add_argument("--dynamic_length", type=bool, default=False, help="dynamic length")
+    parser.add_argument("--max_seq_length", type=int, default=128, help="max sequence length")
     parser.add_argument(
         "--model_type",
         type=str,
-        default="bert", 
+        default="bert",
         choices=["distilbert", "bert", "mobilebert", "roberta"],
-        help="model type"
-    )
-    parser.add_argument(
-    "--intra_op_num_threads",
-    type=int,
-    default=4
+        help="model type",
     )
+    parser.add_argument("--intra_op_num_threads", type=int, default=4)
     args = parser.parse_args()
-    dataset = ONNXRTBertDataset(args.model_path,
+    dataset = ONNXRTBertDataset(
+        args.model_path,
         data_dir=args.data_path,
         model_name_or_path=args.model_name_or_path,
         max_seq_length=args.max_seq_length,
         task=args.task,
         model_type=args.model_type,
-        dynamic_length=args.dynamic_length)
+        dynamic_length=args.dynamic_length,
+    )
     dataloader = data.DataLoader(
         dataset,
         sampler=data.SequentialSampler(dataset),
         batch_size=args.batch_size,
         shuffle=False,
     )
+
     def eval_func(model):
         metric = ONNXRTGLUE(args.task)
         session = onnxruntime.InferenceSession(model, providers=onnxruntime.get_available_providers())
@@ -373,11 +354,10 @@ def eval_func(model):
         len_inputs = len(session.get_inputs())
         inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)]
 
-        batch_seq_length = args.max_seq_length if not args.dynamic_length else torch.max(batch[-2], 0)[0].item()
-
         for idx, batch in enumerate(dataloader):
             label = batch[-1]
             batch = tuple(t.detach().cpu().numpy() if not isinstance(t, np.ndarray) else t for t in batch[0])
+            batch_seq_length = args.max_seq_length if not args.dynamic_length else torch.max(batch[-2], 0)[0].item()
             data = [
                 batch[0][:, :batch_seq_length],
                 batch[1][:, :batch_seq_length],
@@ -389,28 +369,28 @@ def eval_func(model):
             metric.update(predictions[0], label)
         return metric.result()
 
-
     if args.benchmark:
         model = onnx.load(args.model_path)
-        if args.mode == "performance":            
+        if args.mode == "performance":
             total_time = 0.0
             num_iter = 100
             num_warmup = 10
 
-            sess_options = ort.SessionOptions()
+            sess_options = onnxruntime.SessionOptions()
             sess_options.intra_op_num_threads = args.intra_op_num_threads
-            session = onnxruntime.InferenceSession(model.SerializeToString(),
-                                                   sess_options,
-                                                   providers=onnxruntime.get_available_providers())
+            session = onnxruntime.InferenceSession(
+                model.SerializeToString(), sess_options, providers=onnxruntime.get_available_providers()
+            )
             ort_inputs = {}
             len_inputs = len(session.get_inputs())
             inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)]
-            
+
             for idx, batch in enumerate(dataloader):
                 if idx + 1 > num_iter:
                     break
 
                 batch = tuple(t.detach().cpu().numpy() if not isinstance(t, np.ndarray) else t for t in batch)
+                batch_seq_length = args.max_seq_length if not args.dynamic_length else torch.max(batch[-2], 0)[0].item()
                 data = [
                     batch[0][:, :batch_seq_length],
                     batch[1][:, :batch_seq_length],
@@ -428,7 +408,7 @@ def eval_func(model):
             print(args)
             throughput = (num_iter - num_warmup) / total_time
             print("Throughput: {} samples/s".format(throughput))
-        elif args.mode == 'accuracy':
+        elif args.mode == "accuracy":
             acc_result = eval_func(model)
             print("Batch size = %d" % args.batch_size)
             print("Accuracy: %.5f" % acc_result)
@@ -436,15 +416,12 @@ def eval_func(model):
     if args.tune:
         # optimize model
         with tempfile.TemporaryDirectory(prefix="ort.opt.") as tmp_dir:
-            opt_options = FusionOptions('bert')
+            opt_options = FusionOptions("bert")
             opt_options.enable_embed_layer_norm = False
 
             model_optimizer = optimizer.optimize_model(
-                args.model_path,
-                'bert',
-                num_heads=12,
-                hidden_size=768,
-                optimization_options=opt_options)
+                args.model_path, "bert", num_heads=12, hidden_size=768, optimization_options=opt_options
+            )
             model = model_optimizer.model
 
             # check the optimized model is valid
@@ -454,13 +431,10 @@ def eval_func(model):
                 model = pathlib.Path(tmp_dir).joinpath("opt.onnx").as_posix()
             except Exception as e:
                 logger.warning("Optimized model is invalid: {}. ".format(e))
-                logger.warning("Model optimizer will be skipped. " \
-                               "Try to upgrade onnxruntime to avoid this error")
+                logger.warning("Model optimizer will be skipped. " "Try to upgrade onnxruntime to avoid this error")
                 model = args.model_path
 
-            custom_tune_config = tuning.TuningConfig(
-                config_set=config.DynamicQuantConfig.get_config_set_for_tuning()
-            )
+            custom_tune_config = tuning.TuningConfig(config_set=config.DynamicQuantConfig.get_config_set_for_tuning())
             best_model = tuning.autotune(
                 model_input=model,
                 tune_config=custom_tune_config,
diff --git a/examples/nlp/bert/quantization/ptq_dynamic/prepare_data.sh b/examples/nlp/bert/quantization/ptq_dynamic/prepare_data.sh
index 8e434a5c5..c1fddb546 100644
--- a/examples/nlp/bert/quantization/ptq_dynamic/prepare_data.sh
+++ b/examples/nlp/bert/quantization/ptq_dynamic/prepare_data.sh
@@ -14,10 +14,10 @@ function init_params {
   do
     case $var in
       --data_dir=*)
-          data_dir=$(echo $var |cut -f2 -d=)
+          data_dir=$(echo "$var" |cut -f2 -d=)
       ;;
       --task_name=*)
-          task_name=$(echo $var |cut -f2 -d=)
+          task_name=$(echo "$var" |cut -f2 -d=)
       ;;
     esac
   done
@@ -27,7 +27,7 @@ function init_params {
 # run_tuning
 function download_data {
     wget https://raw.githubusercontent.com/huggingface/transformers/f98ef14d161d7bcdc9808b5ec399981481411cc1/utils/download_glue_data.py
-    python download_glue_data.py --data_dir=${data_dir} --tasks=${task_name}
+    python download_glue_data.py --data_dir="${data_dir}" --tasks="${task_name}"
 }
 
 main "$@"
diff --git a/examples/nlp/bert/quantization/ptq_dynamic/prepare_model.py b/examples/nlp/bert/quantization/ptq_dynamic/prepare_model.py
index 0efed802f..5b9216640 100644
--- a/examples/nlp/bert/quantization/ptq_dynamic/prepare_model.py
+++ b/examples/nlp/bert/quantization/ptq_dynamic/prepare_model.py
@@ -1,14 +1,14 @@
 import argparse
 import os
 import sys
-import zipfile
 import urllib
+import zipfile
 
 import torch
 import transformers
 
 # Please refer to [Bert-GLUE_OnnxRuntime_quantization guide]
-# (https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/notebooks/bert/Bert-GLUE_OnnxRuntime_quantization.ipynb) 
+# (https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/notebooks/bert/Bert-GLUE_OnnxRuntime_quantization.ipynb)
 # for detailed model export.
 
 MODEL_URL = "https://download.pytorch.org/tutorial/MRPC.zip"
@@ -19,16 +19,13 @@ def parse_arguments():
     parser = argparse.ArgumentParser()
     parser.add_argument("--input_model", type=str, required=False, default="MRPC.zip")
     parser.add_argument("--output_model", type=str, required=True)
-    parser.add_argument('--max_len',
-                        type=int,
-                        default=128,
-                        help='Maximum length of the sentence pairs')
+    parser.add_argument("--max_len", type=int, default=128, help="Maximum length of the sentence pairs")
     return parser.parse_args()
 
 
 def progressbar(cur, total=100):
-    percent = '{:.2%}'.format(cur / total)
-    sys.stdout.write("\r[%-100s] %s" % ('#' * int(cur), percent))
+    percent = "{:.2%}".format(cur / total)
+    sys.stdout.write("\r[%-100s] %s" % ("#" * int(cur), percent))
     sys.stdout.flush()
 
 
@@ -42,15 +39,15 @@ def schedule(blocknum, blocksize, totalsize):
 
 def is_zip_file(filename):
     try:
-        with open(filename, 'rb') as f:
+        with open(filename, "rb") as f:
             magic_number = f.read(4)
-            return magic_number == b'PK\x03\x04'  # ZIP file magic number
+            return magic_number == b"PK\x03\x04"  # ZIP file magic number
     except OSError:
         return False
 
 
 def extrafile(filename, target_folder="."):
-    with zipfile.ZipFile(filename, 'r') as zin:
+    with zipfile.ZipFile(filename, "r") as zin:
         zin.extractall(target_folder)
 
 
@@ -80,33 +77,30 @@ def download_model(url, model_name, retry_times=5):
 def export_model(model, output_model, max_len=128):
     with torch.no_grad():
         inputs = {
-            'input_ids': torch.ones(1, max_len, dtype=torch.int64),
-            'attention_mask': torch.ones(1, max_len, dtype=torch.int64),
-            'token_type_ids': torch.ones(1, max_len, dtype=torch.int64)
+            "input_ids": torch.ones(1, max_len, dtype=torch.int64),
+            "attention_mask": torch.ones(1, max_len, dtype=torch.int64),
+            "token_type_ids": torch.ones(1, max_len, dtype=torch.int64),
         }
 
-        symbolic_names = {0: 'batch_size', 1: 'max_seq_len'}
+        symbolic_names = {0: "batch_size", 1: "max_seq_len"}
         torch.onnx.export(
             model,  # model being run
             (
-                inputs['input_ids'],
-                inputs['attention_mask'],
-                inputs['token_type_ids'],
+                inputs["input_ids"],
+                inputs["attention_mask"],
+                inputs["token_type_ids"],
             ),  # model input (or a tuple for multiple inputs)
             output_model,  # where to save the model (can be a file or file-like object)
             opset_version=14,  # the ONNX version to export the model
             do_constant_folding=True,  # whether to execute constant folding
-            input_names=[
-                'input_ids',  # the model's input names
-                'input_mask',
-                'segment_ids'
-            ],
-            output_names=['output'],  # the model's output names
+            input_names=["input_ids", "input_mask", "segment_ids"],  # the model's input names
+            output_names=["output"],  # the model's output names
             dynamic_axes={
-                'input_ids': symbolic_names,  # variable length axes
-                'input_mask': symbolic_names,
-                'segment_ids': symbolic_names
-            })
+                "input_ids": symbolic_names,  # variable length axes
+                "input_mask": symbolic_names,
+                "segment_ids": symbolic_names,
+            },
+        )
         assert os.path.exists(output_model), f"Export failed! {output_model} doesn't exist!"
         print("ONNX Model exported to {0}".format(output_model))
 
@@ -114,8 +108,7 @@ def export_model(model, output_model, max_len=128):
 def prepare_model(input_model, output_model, max_len):
     is_download_successful = download_model(MODEL_URL, input_model, MAX_TIMES_RETRY_DOWNLOAD)
     if is_download_successful:
-        folder_name = is_download_successful if isinstance(is_download_successful,
-                                                           str) else "./MRPC"
+        folder_name = is_download_successful if isinstance(is_download_successful, str) else "./MRPC"
         model = transformers.BertForSequenceClassification.from_pretrained(folder_name)
         export_model(model, output_model, max_len)
 
diff --git a/examples/nlp/bert/quantization/ptq_dynamic/run_benchmark.sh b/examples/nlp/bert/quantization/ptq_dynamic/run_benchmark.sh
index d71c0a908..766d50476 100644
--- a/examples/nlp/bert/quantization/ptq_dynamic/run_benchmark.sh
+++ b/examples/nlp/bert/quantization/ptq_dynamic/run_benchmark.sh
@@ -14,16 +14,16 @@ function init_params {
   do
     case $var in
       --input_model=*)
-          input_model=$(echo $var |cut -f2 -d=)
+          input_model=$(echo "$var" |cut -f2 -d=)
       ;;
       --mode=*)
-          mode=$(echo $var |cut -f2 -d=)
+          mode=$(echo "$var" |cut -f2 -d=)
       ;;
       --dataset_location=*)
-          dataset_location=$(echo $var |cut -f2 -d=)
+          dataset_location=$(echo "$var" |cut -f2 -d=)
       ;;
       --batch_size=*)
-          batch_size=$(echo $var |cut -f2 -d=)
+          batch_size=$(echo "$var" |cut -f2 -d=)
       ;;
     esac
   done
@@ -43,16 +43,15 @@ function run_benchmark {
 
     model_name_or_path="bert-base-uncased"
     task_name="mrpc"
-    model_type="bert"
 
     python main.py \
-           --model_path ${input_model} \
-           --model_name_or_path ${model_name_or_path} \
-           --data_path ${dataset_location} \
-           --task ${task_name} \
-           --batch_size ${batch_size} \
-           --mode ${mode} \
-           --dynamic_length ${dynamic_length} \
+           --model_path "${input_model}" \
+           --model_name_or_path "${model_name_or_path}" \
+           --data_path "${dataset_location}" \
+           --task "${task_name}" \
+           --batch_size "${batch_size}" \
+           --mode "${mode}" \
+           --dynamic_length "${dynamic_length}" \
            --benchmark
             
 }
diff --git a/examples/nlp/bert/quantization/ptq_dynamic/run_quant.sh b/examples/nlp/bert/quantization/ptq_dynamic/run_quant.sh
index 6876ddc50..53e864930 100644
--- a/examples/nlp/bert/quantization/ptq_dynamic/run_quant.sh
+++ b/examples/nlp/bert/quantization/ptq_dynamic/run_quant.sh
@@ -12,13 +12,13 @@ function init_params {
   do
     case $var in
       --input_model=*)
-          input_model=$(echo $var |cut -f2 -d=)
+          input_model=$(echo "$var" |cut -f2 -d=)
       ;;
       --output_model=*)
-          output_model=$(echo $var |cut -f2 -d=)
+          output_model=$(echo "$var" |cut -f2 -d=)
       ;;
       --dataset_location=*)
-          dataset_location=$(echo $var |cut -f2 -d=)
+          dataset_location=$(echo "$var" |cut -f2 -d=)
       ;;
     esac
   done
@@ -30,15 +30,14 @@ function run_tuning {
     model_name_or_path="bert-base-uncased"
     batch_size=8
     task_name="mrpc"
-    model_type="bert"
 
     python main.py \
-           --model_path ${input_model} \
-           --output_model ${output_model} \
-           --model_name_or_path ${model_name_or_path} \
-           --data_path ${dataset_location} \
-           --task ${task_name} \
-           --batch_size ${batch_size} \
+           --model_path "${input_model}" \
+           --output_model "${output_model}" \
+           --model_name_or_path "${model_name_or_path}" \
+           --data_path "${dataset_location}" \
+           --task "${task_name}" \
+           --batch_size "${batch_size}" \
            --tune
 }
 
diff --git a/examples/nlp/bert/quantization/ptq_static/README.md b/examples/nlp/bert/quantization/ptq_static/README.md
index fb2f13851..c34e76a79 100644
--- a/examples/nlp/bert/quantization/ptq_static/README.md
+++ b/examples/nlp/bert/quantization/ptq_static/README.md
@@ -11,7 +11,6 @@ This example load a BERT model and confirm its accuracy and speed based on [GLUE
 pip install onnx-neural-compressor
 pip install -r requirements.txt
 ```
-> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment).
 
 ## 2. Prepare Dataset
 
diff --git a/examples/nlp/bert/quantization/ptq_static/main.py b/examples/nlp/bert/quantization/ptq_static/main.py
index b12f36d47..9a3996132 100644
--- a/examples/nlp/bert/quantization/ptq_static/main.py
+++ b/examples/nlp/bert/quantization/ptq_static/main.py
@@ -16,70 +16,43 @@
 # under the License.
 # pylint:disable=redefined-outer-name,logging-format-interpolation
 
-import logging
-import pathlib
-import tempfile
 import argparse
 import dataclasses
+import logging
 import os
+import pathlib
+import tempfile
+from typing import List, Optional, Union
+
+import numpy as np
 import onnx
 import onnxruntime
-import transformers
+import time
 import torch
-import numpy as np
-from onnx_neural_compressor import data_reader
-from typing import List, Optional, Union
-from torch.utils import data
-from onnx_neural_compressor import config
-from onnx_neural_compressor import quantization
-from onnx_neural_compressor.quantization import tuning
+import transformers
 from onnxruntime.transformers import optimizer
 from onnxruntime.transformers.fusion_options import FusionOptions
+from torch.utils import data
+
+from onnx_neural_compressor import config, data_reader, quantization
+from onnx_neural_compressor.quantization import tuning
 
 logger = logging.getLogger(__name__)
-logging.basicConfig(format = "%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
-                    datefmt = "%m/%d/%Y %H:%M:%S",
-                    level = logging.WARN)
+logging.basicConfig(
+    format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.WARN
+)
 logger.info("Evaluating ONNXRuntime full precision accuracy and performance:")
 parser = argparse.ArgumentParser(
-description="BERT fine-tune examples for classification/regression tasks.",
-formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-parser.add_argument(
-    "--model_path",
-    type=str,
-    help="Pre-trained model on onnx file"
-)
-parser.add_argument(
-    "--benchmark",
-    action="store_true", \
-    default=False
-)
-parser.add_argument(
-    "--tune",
-    action="store_true", \
-    default=False,
-    help="whether quantize the model"
-)
-parser.add_argument(
-    "--output_model",
-    type=str,
-    help="output model path"
-)
-parser.add_argument(
-    "--mode",
-    type=str,
-    help="benchmark mode of performance or accuracy"
-)
-parser.add_argument(
-    "--model_name_or_path",
-    type=str,
-    help="pretrained model name or path"
-)
-parser.add_argument(
-    "--data_path",
-    type=str,
-    help="input data path"
+    description="BERT fine-tune examples for classification/regression tasks.",
+    formatter_class=argparse.ArgumentDefaultsHelpFormatter,
 )
+parser.add_argument("--model_path", type=str, help="Pre-trained model on onnx file")
+parser.add_argument("--benchmark", action="store_true", default=False)
+parser.add_argument("--tune", action="store_true", default=False, help="whether quantize the model")
+parser.add_argument("--output_model", type=str, help="output model path")
+parser.add_argument("--mode", type=str, help="benchmark mode of performance or accuracy")
+parser.add_argument("--model_name_or_path", type=str, help="pretrained model name or path")
+parser.add_argument("--data_path", type=str, help="input data path")
 parser.add_argument(
     "--batch_size",
     default=8,
@@ -89,41 +62,22 @@
     "--task",
     type=str,
     default="mrpc",
-    choices=["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", \
-            "mnli", "wnli", "sst-2"],
-    help="GLUE task name"
+    choices=["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", "mnli", "wnli", "sst-2"],
+    help="GLUE task name",
 )
 parser.add_argument(
-    "--quant_format",
-    type=str,
-    default="QOperator", 
-    choices=["QDQ", "QOperator"],
-    help="quantization format"
+    "--quant_format", type=str, default="QOperator", choices=["QDQ", "QOperator"], help="quantization format"
 )
+parser.add_argument("--dynamic_length", type=bool, default=False, help="dynamic length")
+parser.add_argument("--max_seq_length", type=int, default=128, help="max sequence length")
 parser.add_argument(
-    "--dynamic_length",
-    type=bool,
-    default=False, 
-    help="dynamic length"
+    "--model_type", type=str, default="bert", choices=["distilbert", "bert", "mobilebert", "roberta"], help="model type"
 )
 parser.add_argument(
-    "--max_seq_length",
-    type=int,
-    default=128, 
-    help="max sequence length"
-)
-parser.add_argument(
-    "--model_type",
+    "--device",
     type=str,
-    default="bert", 
-    choices=["distilbert", "bert", "mobilebert", "roberta"],
-    help="model type"
-)
-parser.add_argument(
-    '--device',
-    type=str,
-    default='cpu',
-    choices=['cpu', 'npu'],
+    default="cpu",
+    choices=["cpu", "npu"],
 )
 args = parser.parse_args()
 
@@ -148,57 +102,77 @@ class ONNXRTBertDataset:
           filter (Filter objects, default=None): filter out examples according
                                                  to specific conditions.
     """
-    def __init__(self, model, data_dir, model_name_or_path, max_seq_length=128,\
-                do_lower_case=True, task="mrpc", model_type="bert", dynamic_length=False,\
-                evaluate=True, transform=None, filter=None):
+
+    def __init__(
+        self,
+        model,
+        data_dir,
+        model_name_or_path,
+        max_seq_length=128,
+        do_lower_case=True,
+        task="mrpc",
+        model_type="bert",
+        dynamic_length=False,
+        evaluate=True,
+        transform=None,
+        filter=None,
+    ):
         self.inputs = [inp.name for inp in onnx.load(model).graph.input]
         task = task.lower()
         model_type = model_type.lower()
-        assert task in ["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", \
-            "mnli", "wnli", "sst-2"], "Unsupported task type"
-        assert model_type in ["distilbert", "bert", "mobilebert", "roberta"], "Unsupported \
+        assert task in ["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", "mnli", "wnli", "sst-2"], "Unsupported task type"
+        assert model_type in [
+            "distilbert",
+            "bert",
+            "mobilebert",
+            "roberta",
+        ], "Unsupported \
             model type"
         self.dynamic_length = dynamic_length
         self.model_type = model_type
         self.max_seq_length = max_seq_length
-        tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path,
-            do_lower_case=do_lower_case)
-        self.dataset = load_and_cache_examples(data_dir, model_name_or_path, \
-            max_seq_length, task, model_type, tokenizer, evaluate)
+        tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path, do_lower_case=do_lower_case)
+        self.dataset = load_and_cache_examples(
+            data_dir, model_name_or_path, max_seq_length, task, model_type, tokenizer, evaluate
+        )
 
     def __len__(self):
         return len(self.dataset)
 
     def __getitem__(self, index):
         batch = tuple(t.detach().cpu().numpy() if not isinstance(t, np.ndarray) else t for t in self.dataset[index])
-        return batch[:len(self.inputs)], batch[-1]
+        return batch[: len(self.inputs)], batch[-1]
+
 
-def load_and_cache_examples(data_dir, model_name_or_path, max_seq_length, task, \
-    model_type, tokenizer, evaluate):
+def load_and_cache_examples(data_dir, model_name_or_path, max_seq_length, task, model_type, tokenizer, evaluate):
     processor = transformers.glue_processors[task]()
     output_mode = transformers.glue_output_modes[task]
     # Load data features from cache or dataset file
     if not os.path.exists("./dataset_cached"):
         os.makedirs("./dataset_cached")
-    cached_features_file = os.path.join("./dataset_cached", "cached_{}_{}_{}_{}".format(
-        "dev" if evaluate else "train",
-        list(filter(None, model_name_or_path.split("/"))).pop(),
-        str(max_seq_length),
-        str(task)))
+    cached_features_file = os.path.join(
+        "./dataset_cached",
+        "cached_{}_{}_{}_{}".format(
+            "dev" if evaluate else "train",
+            list(filter(None, model_name_or_path.split("/"))).pop(),
+            str(max_seq_length),
+            str(task),
+        ),
+    )
     if os.path.exists(cached_features_file):
         logger.info("Load features from cached file {}.".format(cached_features_file))
         features = torch.load(cached_features_file)
     else:
         logger.info("Create features from dataset file at {}.".format(data_dir))
         label_list = processor.get_labels()
-        examples = processor.get_dev_examples(data_dir) if evaluate else \
-            processor.get_train_examples(data_dir)
-        features = convert_examples_to_features(examples,
-                                                tokenizer,
-                                                task=task,
-                                                label_list=label_list,
-                                                max_length=max_seq_length,
-                                                output_mode=output_mode,
+        examples = processor.get_dev_examples(data_dir) if evaluate else processor.get_train_examples(data_dir)
+        features = convert_examples_to_features(
+            examples,
+            tokenizer,
+            task=task,
+            label_list=label_list,
+            max_length=max_seq_length,
+            output_mode=output_mode,
         )
         logger.info("Save features into cached file {}.".format(cached_features_file))
         torch.save(features, cached_features_file)
@@ -211,10 +185,10 @@ def load_and_cache_examples(data_dir, model_name_or_path, max_seq_length, task,
         all_labels = torch.tensor([f.label for f in features], dtype=torch.long)
     elif output_mode == "regression":
         all_labels = torch.tensor([f.label for f in features], dtype=torch.float)
-    dataset = data.TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, \
-        all_seq_lengths, all_labels)
+    dataset = data.TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, all_seq_lengths, all_labels)
     return dataset
 
+
 def convert_examples_to_features(
     examples,
     tokenizer,
@@ -232,7 +206,7 @@ def convert_examples_to_features(
         logger.info("Use label list {} for task {}.".format(label_list, task))
     label_map = {label: i for i, label in enumerate(label_list)}
     features = []
-    for (ex_index, example) in enumerate(examples):
+    for ex_index, example in enumerate(examples):
         inputs = tokenizer.encode_plus(
             example.text_a,
             example.text_b,
@@ -251,19 +225,14 @@ def convert_examples_to_features(
         padding_length = max_length - len(input_ids)
 
         input_ids = input_ids + ([pad_token] * padding_length)
-        attention_mask = attention_mask + \
-            ([0 if mask_padding_with_zero else 1] * padding_length)
+        attention_mask = attention_mask + ([0 if mask_padding_with_zero else 1] * padding_length)
         token_type_ids = token_type_ids + ([pad_token_segment_id] * padding_length)
 
-        assert len(input_ids) == max_length, \
-            "Error with input_ids length {} vs {}".format(
-            len(input_ids), max_length)
-        assert len(attention_mask) == max_length, \
-            "Error with attention_mask length {} vs {}".format(
+        assert len(input_ids) == max_length, "Error with input_ids length {} vs {}".format(len(input_ids), max_length)
+        assert len(attention_mask) == max_length, "Error with attention_mask length {} vs {}".format(
             len(attention_mask), max_length
         )
-        assert len(token_type_ids) == max_length, \
-            "Error with token_type_ids length {} vs {}".format(
+        assert len(token_type_ids) == max_length, "Error with token_type_ids length {} vs {}".format(
             len(token_type_ids), max_length
         )
         if output_mode == "classification":
@@ -283,6 +252,7 @@ def convert_examples_to_features(
         features.append(feats)
     return features
 
+
 @dataclasses.dataclass(frozen=True)
 class InputFeatures:
     """
@@ -306,6 +276,7 @@ class InputFeatures:
     label: Optional[Union[int, float]] = None
     seq_length: Optional[List[int]] = None
 
+
 class ONNXRTGLUE:
     """Computes GLUE score.
 
@@ -315,9 +286,9 @@ class ONNXRTGLUE:
                                   sts-b, cola, mnli, wnli.
 
     """
+
     def __init__(self, task="mrpc"):
-        assert task in ["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", \
-            "mnli", "wnli", "sst-2"], "Unsupported task type"
+        assert task in ["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", "mnli", "wnli", "sst-2"], "Unsupported task type"
         self.pred_list = None
         self.label_list = None
         self.task = task
@@ -330,7 +301,7 @@ def __init__(self, task="mrpc"):
             "qnli": "acc",
             "rte": "acc",
             "wnli": "acc",
-            "sst-2": "acc"
+            "sst-2": "acc",
         }
 
     def update(self, preds, labels):
@@ -358,21 +329,23 @@ def result(self):
             processed_preds = np.argmax(self.pred_list, axis=1)
         elif output_mode == "regression":
             processed_preds = np.squeeze(self.pred_list)
-        result = transformers.glue_compute_metrics(\
-            self.task, processed_preds, self.label_list)
+        result = transformers.glue_compute_metrics(self.task, processed_preds, self.label_list)
         return result[self.return_key[self.task]]
 
+
 class DataReader(data_reader.CalibrationDataReader):
     def __init__(self, model_path, dynamic_length=False, batch_size=1, calibration_sampling_size=8):
         self.encoded_list = []
-        self.batch_size=batch_size
-        dataset = ONNXRTBertDataset(args.model_path,
+        self.batch_size = batch_size
+        dataset = ONNXRTBertDataset(
+            args.model_path,
             data_dir=args.data_path,
             model_name_or_path=args.model_name_or_path,
             max_seq_length=args.max_seq_length,
             task=args.task,
             model_type=args.model_type,
-            dynamic_length=args.dynamic_length)
+            dynamic_length=args.dynamic_length,
+        )
         dataloader = data.DataLoader(
             dataset,
             sampler=data.SequentialSampler(dataset),
@@ -381,7 +354,7 @@ def __init__(self, model_path, dynamic_length=False, batch_size=1, calibration_s
         )
         model = onnx.load(model_path, load_external_data=False)
         inputs_names = [input.name for input in model.graph.input]
-        self.batch_size=batch_size
+        self.batch_size = batch_size
 
         for idx, batch in enumerate(dataloader):
             if idx + 1 > calibration_sampling_size:
@@ -403,17 +376,20 @@ def get_next(self):
     def rewind(self):
         self.iter_next = iter(self.encoded_list)
 
+
 if __name__ == "__main__":
     # set config for npu test
     provider = "DmlExecutionProvider" if args.device == "npu" else "CPUExecutionProvider"
 
-    dataset = ONNXRTBertDataset(args.model_path,
+    dataset = ONNXRTBertDataset(
+        args.model_path,
         data_dir=args.data_path,
         model_name_or_path=args.model_name_or_path,
         max_seq_length=args.max_seq_length,
         task=args.task,
         model_type=args.model_type,
-        dynamic_length=args.dynamic_length)
+        dynamic_length=args.dynamic_length,
+    )
     dataloader = data.DataLoader(
         dataset,
         sampler=data.SequentialSampler(dataset),
@@ -428,11 +404,10 @@ def eval_func(model):
         len_inputs = len(session.get_inputs())
         inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)]
 
-        batch_seq_length = args.max_seq_length if not args.dynamic_length else torch.max(batch[-2], 0)[0].item()
-
         for idx, batch in enumerate(dataloader):
             label = batch[-1]
             batch = tuple(t.detach().cpu().numpy() if not isinstance(t, np.ndarray) else t for t in batch[0])
+            batch_seq_length = args.max_seq_length if not args.dynamic_length else torch.max(batch[-2], 0)[0].item()
             inputs = [
                 batch[0][:, :batch_seq_length],
                 batch[1][:, :batch_seq_length],
@@ -446,24 +421,25 @@ def eval_func(model):
 
     if args.benchmark:
         model = onnx.load(args.model_path)
-        if args.mode == 'performance':
+        if args.mode == "performance":
             total_time = 0.0
             num_iter = 100
             num_warmup = 10
 
-            sess_options = ort.SessionOptions()
+            sess_options = onnxruntime.SessionOptions()
             sess_options.intra_op_num_threads = args.intra_op_num_threads
-            session = onnxruntime.InferenceSession(model.SerializeToString(),
-                                                   sess_options,
-                                                   providers=onnxruntime.get_available_providers())
+            session = onnxruntime.InferenceSession(
+                model.SerializeToString(), sess_options, providers=onnxruntime.get_available_providers()
+            )
             ort_inputs = {}
             len_inputs = len(session.get_inputs())
             inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)]
-            
+
             for idx, batch in enumerate(dataloader):
                 if idx + 1 > num_iter:
                     break
                 batch = tuple(t.detach().cpu().numpy() if not isinstance(t, np.ndarray) else t for t in batch)
+                batch_seq_length = args.max_seq_length if not args.dynamic_length else torch.max(batch[-2], 0)[0].item()
                 inputs = [
                     batch[0][:, :batch_seq_length],
                     batch[1][:, :batch_seq_length],
@@ -481,7 +457,7 @@ def eval_func(model):
             print(args)
             throughput = (num_iter - num_warmup) / total_time
             print("Throughput: {} samples/s".format(throughput))
-        elif args.mode == 'accuracy':
+        elif args.mode == "accuracy":
             acc_result = eval_func(model)
             print("Batch size = %d" % args.batch_size)
             print("Accuracy: %.5f" % acc_result)
@@ -493,11 +469,8 @@ def eval_func(model):
             opt_options.enable_embed_layer_norm = False
 
             model_optimizer = optimizer.optimize_model(
-                args.model_path,
-                "bert",
-                num_heads=12,
-                hidden_size=768,
-                optimization_options=opt_options)
+                args.model_path, "bert", num_heads=12, hidden_size=768, optimization_options=opt_options
+            )
             model = model_optimizer.model
 
             # check the optimized model is valid
@@ -507,14 +480,17 @@ def eval_func(model):
                 model = pathlib.Path(tmp_dir).joinpath("opt.onnx").as_posix()
             except Exception as e:
                 logger.warning("Optimized model is invalid: {}. ".format(e))
-                logger.warning("Model optimizer will be skipped. " \
-                               "Try to upgrade onnxruntime to avoid this error")
+                logger.warning("Model optimizer will be skipped. " "Try to upgrade onnxruntime to avoid this error")
                 model = args.model_path
 
             calibration_data_reader = DataReader(args.model_path, calibration_sampling_size=8)
             custom_tune_config = tuning.TuningConfig(
                 config_set=config.StaticQuantConfig.get_config_set_for_tuning(
-                    quant_format=quantization.QuantFormat.QOperator if args.quant_format == "QOperator" else quantization.QuantFormat.QDQ,
+                    quant_format=(
+                        quantization.QuantFormat.QOperator
+                        if args.quant_format == "QOperator"
+                        else quantization.QuantFormat.QDQ
+                    ),
                     calibration_sampling_size=8,
                     extra_options={"optypes_to_exclude_output_quant": ["MatMul", "Gemm", "Attention", "FusedGemm"]},
                     execution_provider=provider,
diff --git a/examples/nlp/bert/quantization/ptq_static/prepare_data.sh b/examples/nlp/bert/quantization/ptq_static/prepare_data.sh
index 8e434a5c5..c1fddb546 100644
--- a/examples/nlp/bert/quantization/ptq_static/prepare_data.sh
+++ b/examples/nlp/bert/quantization/ptq_static/prepare_data.sh
@@ -14,10 +14,10 @@ function init_params {
   do
     case $var in
       --data_dir=*)
-          data_dir=$(echo $var |cut -f2 -d=)
+          data_dir=$(echo "$var" |cut -f2 -d=)
       ;;
       --task_name=*)
-          task_name=$(echo $var |cut -f2 -d=)
+          task_name=$(echo "$var" |cut -f2 -d=)
       ;;
     esac
   done
@@ -27,7 +27,7 @@ function init_params {
 # run_tuning
 function download_data {
     wget https://raw.githubusercontent.com/huggingface/transformers/f98ef14d161d7bcdc9808b5ec399981481411cc1/utils/download_glue_data.py
-    python download_glue_data.py --data_dir=${data_dir} --tasks=${task_name}
+    python download_glue_data.py --data_dir="${data_dir}" --tasks="${task_name}"
 }
 
 main "$@"
diff --git a/examples/nlp/bert/quantization/ptq_static/prepare_model.py b/examples/nlp/bert/quantization/ptq_static/prepare_model.py
index 0a29b5830..5b9216640 100644
--- a/examples/nlp/bert/quantization/ptq_static/prepare_model.py
+++ b/examples/nlp/bert/quantization/ptq_static/prepare_model.py
@@ -1,14 +1,14 @@
 import argparse
 import os
 import sys
-import zipfile
 import urllib
+import zipfile
 
 import torch
 import transformers
 
 # Please refer to [Bert-GLUE_OnnxRuntime_quantization guide]
-# (https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/notebooks/bert/Bert-GLUE_OnnxRuntime_quantization.ipynb) 
+# (https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/notebooks/bert/Bert-GLUE_OnnxRuntime_quantization.ipynb)
 # for detailed model export.
 
 MODEL_URL = "https://download.pytorch.org/tutorial/MRPC.zip"
@@ -19,16 +19,13 @@ def parse_arguments():
     parser = argparse.ArgumentParser()
     parser.add_argument("--input_model", type=str, required=False, default="MRPC.zip")
     parser.add_argument("--output_model", type=str, required=True)
-    parser.add_argument('--max_len',
-                        type=int,
-                        default=128,
-                        help='Maximum length of the sentence pairs')
+    parser.add_argument("--max_len", type=int, default=128, help="Maximum length of the sentence pairs")
     return parser.parse_args()
 
 
 def progressbar(cur, total=100):
-    percent = '{:.2%}'.format(cur / total)
-    sys.stdout.write("\r[%-100s] %s" % ('#' * int(cur), percent))
+    percent = "{:.2%}".format(cur / total)
+    sys.stdout.write("\r[%-100s] %s" % ("#" * int(cur), percent))
     sys.stdout.flush()
 
 
@@ -42,15 +39,15 @@ def schedule(blocknum, blocksize, totalsize):
 
 def is_zip_file(filename):
     try:
-        with open(filename, 'rb') as f:
+        with open(filename, "rb") as f:
             magic_number = f.read(4)
-            return magic_number == b'PK\x03\x04'  # ZIP file magic number
+            return magic_number == b"PK\x03\x04"  # ZIP file magic number
     except OSError:
         return False
 
 
 def extrafile(filename, target_folder="."):
-    with zipfile.ZipFile(filename, 'r') as zin:
+    with zipfile.ZipFile(filename, "r") as zin:
         zin.extractall(target_folder)
 
 
@@ -80,30 +77,30 @@ def download_model(url, model_name, retry_times=5):
 def export_model(model, output_model, max_len=128):
     with torch.no_grad():
         inputs = {
-            'input_ids': torch.ones(1, max_len, dtype=torch.int64),
-            'attention_mask': torch.ones(1, max_len, dtype=torch.int64),
-            'token_type_ids': torch.ones(1, max_len, dtype=torch.int64)
+            "input_ids": torch.ones(1, max_len, dtype=torch.int64),
+            "attention_mask": torch.ones(1, max_len, dtype=torch.int64),
+            "token_type_ids": torch.ones(1, max_len, dtype=torch.int64),
         }
 
-        symbolic_names = {0: 'batch_size', 1: 'max_seq_len'}
+        symbolic_names = {0: "batch_size", 1: "max_seq_len"}
         torch.onnx.export(
             model,  # model being run
-            (inputs['input_ids'], inputs['attention_mask'],
-             inputs['token_type_ids']),  # model input (or a tuple for multiple inputs)
+            (
+                inputs["input_ids"],
+                inputs["attention_mask"],
+                inputs["token_type_ids"],
+            ),  # model input (or a tuple for multiple inputs)
             output_model,  # where to save the model (can be a file or file-like object)
             opset_version=14,  # the ONNX version to export the model
             do_constant_folding=True,  # whether to execute constant folding
-            input_names=[
-                'input_ids',  # the model's input names
-                'input_mask',
-                'segment_ids'
-            ],
-            output_names=['output'],  # the model's output names
+            input_names=["input_ids", "input_mask", "segment_ids"],  # the model's input names
+            output_names=["output"],  # the model's output names
             dynamic_axes={
-                'input_ids': symbolic_names,  # variable length axes
-                'input_mask': symbolic_names,
-                'segment_ids': symbolic_names
-            })
+                "input_ids": symbolic_names,  # variable length axes
+                "input_mask": symbolic_names,
+                "segment_ids": symbolic_names,
+            },
+        )
         assert os.path.exists(output_model), f"Export failed! {output_model} doesn't exist!"
         print("ONNX Model exported to {0}".format(output_model))
 
@@ -111,8 +108,7 @@ def export_model(model, output_model, max_len=128):
 def prepare_model(input_model, output_model, max_len):
     is_download_successful = download_model(MODEL_URL, input_model, MAX_TIMES_RETRY_DOWNLOAD)
     if is_download_successful:
-        folder_name = is_download_successful if isinstance(is_download_successful,
-                                                           str) else "./MRPC"
+        folder_name = is_download_successful if isinstance(is_download_successful, str) else "./MRPC"
         model = transformers.BertForSequenceClassification.from_pretrained(folder_name)
         export_model(model, output_model, max_len)
 
diff --git a/examples/nlp/bert/quantization/ptq_static/run_benchmark.sh b/examples/nlp/bert/quantization/ptq_static/run_benchmark.sh
index d71c0a908..766d50476 100644
--- a/examples/nlp/bert/quantization/ptq_static/run_benchmark.sh
+++ b/examples/nlp/bert/quantization/ptq_static/run_benchmark.sh
@@ -14,16 +14,16 @@ function init_params {
   do
     case $var in
       --input_model=*)
-          input_model=$(echo $var |cut -f2 -d=)
+          input_model=$(echo "$var" |cut -f2 -d=)
       ;;
       --mode=*)
-          mode=$(echo $var |cut -f2 -d=)
+          mode=$(echo "$var" |cut -f2 -d=)
       ;;
       --dataset_location=*)
-          dataset_location=$(echo $var |cut -f2 -d=)
+          dataset_location=$(echo "$var" |cut -f2 -d=)
       ;;
       --batch_size=*)
-          batch_size=$(echo $var |cut -f2 -d=)
+          batch_size=$(echo "$var" |cut -f2 -d=)
       ;;
     esac
   done
@@ -43,16 +43,15 @@ function run_benchmark {
 
     model_name_or_path="bert-base-uncased"
     task_name="mrpc"
-    model_type="bert"
 
     python main.py \
-           --model_path ${input_model} \
-           --model_name_or_path ${model_name_or_path} \
-           --data_path ${dataset_location} \
-           --task ${task_name} \
-           --batch_size ${batch_size} \
-           --mode ${mode} \
-           --dynamic_length ${dynamic_length} \
+           --model_path "${input_model}" \
+           --model_name_or_path "${model_name_or_path}" \
+           --data_path "${dataset_location}" \
+           --task "${task_name}" \
+           --batch_size "${batch_size}" \
+           --mode "${mode}" \
+           --dynamic_length "${dynamic_length}" \
            --benchmark
             
 }
diff --git a/examples/nlp/bert/quantization/ptq_static/run_quant.sh b/examples/nlp/bert/quantization/ptq_static/run_quant.sh
index 08821d983..976e8e0c2 100644
--- a/examples/nlp/bert/quantization/ptq_static/run_quant.sh
+++ b/examples/nlp/bert/quantization/ptq_static/run_quant.sh
@@ -12,16 +12,16 @@ function init_params {
   do
     case $var in
       --input_model=*)
-          input_model=$(echo $var |cut -f2 -d=)
+          input_model=$(echo "$var" |cut -f2 -d=)
       ;;
       --output_model=*)
-          output_model=$(echo $var |cut -f2 -d=)
+          output_model=$(echo "$var" |cut -f2 -d=)
       ;;
       --dataset_location=*)
-          dataset_location=$(echo $var |cut -f2 -d=)
+          dataset_location=$(echo "$var" |cut -f2 -d=)
       ;;
       --quant_format=*)
-          quant_format=$(echo $var |cut -f2 -d=)
+          quant_format=$(echo "$var" |cut -f2 -d=)
       ;;
     esac
   done
@@ -36,14 +36,14 @@ function run_tuning {
     model_type="bert"
 
     python main.py \
-           --model_path ${input_model} \
-           --output_model ${output_model} \
-           --model_name_or_path ${model_name_or_path} \
-           --data_path ${dataset_location} \
-           --task ${task_name} \
-           --batch_size ${batch_size} \
-           --model_type ${model_type} \
-           --quant_format ${quant_format} \
+           --model_path "${input_model}" \
+           --output_model "${output_model}" \
+           --model_name_or_path "${model_name_or_path}" \
+           --data_path "${dataset_location}" \
+           --task "${task_name}" \
+           --batch_size "${batch_size}" \
+           --model_type "${model_type}" \
+           --quant_format "${quant_format}" \
            --tune
 }
 
diff --git a/onnx_neural_compressor/algorithms/post_training_quant/__init__.py b/onnx_neural_compressor/algorithms/post_training_quant/__init__.py
index e3fdc07b1..28f108cb6 100644
--- a/onnx_neural_compressor/algorithms/post_training_quant/__init__.py
+++ b/onnx_neural_compressor/algorithms/post_training_quant/__init__.py
@@ -10,4 +10,4 @@
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
-# limitations under the License.
\ No newline at end of file
+# limitations under the License.
diff --git a/onnx_neural_compressor/algorithms/post_training_quant/calibrate.py b/onnx_neural_compressor/algorithms/post_training_quant/calibrate.py
index b7cc35c7d..40e3b9645 100644
--- a/onnx_neural_compressor/algorithms/post_training_quant/calibrate.py
+++ b/onnx_neural_compressor/algorithms/post_training_quant/calibrate.py
@@ -30,10 +30,11 @@
 import onnxruntime
 from onnxruntime import quantization as ort_quant
 from packaging import version
-from onnx_neural_compressor.algorithms.post_training_quant import calibrator
+
+from onnx_neural_compressor import logger, onnx_model
 from onnx_neural_compressor.algorithms import utility as quant_utils
-from onnx_neural_compressor import onnx_model
-from onnx_neural_compressor import logger
+from onnx_neural_compressor.algorithms.post_training_quant import calibrator
+
 if sys.version_info < (3, 11) and util.find_spec("onnxruntime_extensions"):
     import onnxruntime_extensions
 
@@ -68,7 +69,11 @@ def __init__(
             execution_provider (list, optional): execution provider for onnxruntime. Defaults to 'CPUExecutionProvider'.
             reduce_range (bool, optional): use 7 bit or not. Defaults to False.
         """
-        self.model_wrapper = model_wrapper if isinstance(model_wrapper, onnx_model.ONNXModel) else onnx_model.ONNXModel(model_wrapper, load_external_data=True)
+        self.model_wrapper = (
+            model_wrapper
+            if isinstance(model_wrapper, onnx_model.ONNXModel)
+            else onnx_model.ONNXModel(model_wrapper, load_external_data=True)
+        )
         self.model = self.model_wrapper.model
         ai_onnx_domain = [opset for opset in self.model.opset_import if not opset.domain or opset.domain == "ai.onnx"]
         self.opset_version = ai_onnx_domain[0].version
@@ -224,11 +229,17 @@ def get_activation_tensors_calib_range(self, q_config=None):
         if sys.version_info < (3, 11) and util.find_spec("onnxruntime_extensions"):
             so.register_custom_ops_library(onnxruntime_extensions.get_library_path())
 
-        execution_provider = self.execution_provider if self.execution_provider != "TensorrtExecutionProvider" else "CUDAExecutionProvider"
+        execution_provider = (
+            self.execution_provider
+            if self.execution_provider != "TensorrtExecutionProvider"
+            else "CUDAExecutionProvider"
+        )
         session = (
             onnxruntime.InferenceSession(self.augmented_model.SerializeToString(), so, providers=[execution_provider])
             if not self.model_wrapper.is_large_model
-            else onnxruntime.InferenceSession(self.model_wrapper.model_path + "_augment.onnx", so, providers=[execution_provider])
+            else onnxruntime.InferenceSession(
+                self.model_wrapper.model_path + "_augment.onnx", so, providers=[execution_provider]
+            )
         )
 
         len_inputs = len(session.get_inputs())
@@ -268,7 +279,9 @@ def _collect_data(inputs):
                     node_name = name_to_node[node_output_names[output_idx]]
                     if node_output_names[output_idx] not in name_to_calibrator:
                         calib_method = (
-                            q_config[node_name]["calibrate_method"].name if q_config and node_name in q_config else ort_quant.CalibrationMethod.MinMax.name
+                            q_config[node_name]["calibrate_method"].name
+                            if q_config and node_name in q_config
+                            else ort_quant.CalibrationMethod.MinMax.name
                         )
                         assert calib_method in calibrator.CALIBRATOR, "Calibration method {} is not registered.".format(
                             calib_method
@@ -283,18 +296,13 @@ def _collect_data(inputs):
                     # per iteration in the future.
                     if _calibrator.method_name == ort_quant.CalibrationMethod.MinMax.name:
                         _calibrator.collect(output)
-                        activation_tensors_calib_range[node_output_names[output_idx]] = [
-                            list(_calibrator.calib_range)
-                        ]
+                        activation_tensors_calib_range[node_output_names[output_idx]] = [list(_calibrator.calib_range)]
                         name_to_calibrator[node_output_names[output_idx]] = _calibrator
                     else:
-                        intermediate_tensor.setdefault((node_output_names[output_idx], node_name), []).append(
-                            output
-                        )
+                        intermediate_tensor.setdefault((node_output_names[output_idx], node_name), []).append(output)
                 elif q_config is None:
                     activation_tensors_calib_range.setdefault(node_output_names[output_idx], []).append(output)
 
-
         idx = 0
         while True:
             inputs = self.dataloader.get_next()
@@ -314,10 +322,12 @@ def _collect_data(inputs):
         for (output_name, node_name), datas in merged_dict.items():
             if any([data is None for data in datas]):
                 continue
-            if any([data.dtype in [bool] for data in datas]): # output type of some ops is bool, skip
+            if any([data.dtype in [bool] for data in datas]):  # output type of some ops is bool, skip
                 continue
             calib_method = (
-                q_config[node_name]["calibrate_method"].name if q_config and node_name in q_config else ort_quant.CalibrationMethod.MinMax.name
+                q_config[node_name]["calibrate_method"].name
+                if q_config and node_name in q_config
+                else ort_quant.CalibrationMethod.MinMax.name
             )
             _calibrator = calibrator.CALIBRATOR[calib_method]()
             _calibrator.collect(datas)
@@ -386,7 +396,9 @@ def get_weight_tensors_calib_range(self):
                     os.path.dirname(self.model_wrapper.model_path) if self.model_wrapper.model_path is not None else ""
                 ),
             )
-            _calibrator = calibrator.CALIBRATOR[ort_quant.CalibrationMethod.MinMax.name]()  # use minmax method to calibrate initializer tensors
+            _calibrator = calibrator.CALIBRATOR[
+                ort_quant.CalibrationMethod.MinMax.name
+            ]()  # use minmax method to calibrate initializer tensors
             if initializer_tensor.flatten().size > 0:
                 _calibrator.collect(initializer_tensor)
                 weight_tensors_calib_range[initializer_tensor_name] = [list(_calibrator.calib_range)]
@@ -560,16 +572,19 @@ def calculate_quantization_params(self, q_config, quantization_thresholds):
             qType = 2  # uint8
 
             # input and output tensor follow activation_type and activation_sym
-            if tensor_name in input_name_to_nodes and \
-                any([i.name in q_config for i in input_name_to_nodes[tensor_name]]):
+            if tensor_name in input_name_to_nodes and any(
+                [i.name in q_config for i in input_name_to_nodes[tensor_name]]
+            ):
                 for child in input_name_to_nodes[tensor_name]:
                     if child.name in q_config and q_config[child.name] not in ["fp32", "fp16", "bf16"]:
                         sym = q_config[child.name]["activation_sym"]
                         qType = q_config[child.name]["activation_type"]
                         break
-            elif tensor_name in output_name_to_node and \
-                output_name_to_node[tensor_name].name in q_config and \
-                q_config[output_name_to_node[tensor_name].name] not in ["fp32", "fp16", "bf16"]:
+            elif (
+                tensor_name in output_name_to_node
+                and output_name_to_node[tensor_name].name in q_config
+                and q_config[output_name_to_node[tensor_name].name] not in ["fp32", "fp16", "bf16"]
+            ):
                 sym = q_config[output_name_to_node[tensor_name].name]["activation_sym"]
                 qType = q_config[output_name_to_node[tensor_name].name]["activation_type"]
             if self.execution_provider in ["TensorrtExecutionProvider"]:
diff --git a/onnx_neural_compressor/algorithms/post_training_quant/calibrator.py b/onnx_neural_compressor/algorithms/post_training_quant/calibrator.py
index 8ffbb0c46..042518092 100644
--- a/onnx_neural_compressor/algorithms/post_training_quant/calibrator.py
+++ b/onnx_neural_compressor/algorithms/post_training_quant/calibrator.py
@@ -20,6 +20,7 @@
 """Calibrator for onnx models."""
 
 import copy
+
 import numpy as np
 from scipy import stats
 
diff --git a/onnx_neural_compressor/algorithms/post_training_quant/operators/__init__.py b/onnx_neural_compressor/algorithms/post_training_quant/operators/__init__.py
index 25f7fe13b..454c3ea69 100644
--- a/onnx_neural_compressor/algorithms/post_training_quant/operators/__init__.py
+++ b/onnx_neural_compressor/algorithms/post_training_quant/operators/__init__.py
@@ -13,8 +13,9 @@
 # limitations under the License.
 """Operators for onnx model."""
 
-from os import path
 import glob
+from os import path
+
 from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
 
 modules = glob.glob(path.join(path.dirname(__file__), "*.py"))
diff --git a/onnx_neural_compressor/algorithms/post_training_quant/operators/activation.py b/onnx_neural_compressor/algorithms/post_training_quant/operators/activation.py
index baaa82d9a..c06d92dac 100644
--- a/onnx_neural_compressor/algorithms/post_training_quant/operators/activation.py
+++ b/onnx_neural_compressor/algorithms/post_training_quant/operators/activation.py
@@ -15,10 +15,9 @@
 
 import onnx
 
-from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
+from onnx_neural_compressor import constants, utility
 from onnx_neural_compressor.algorithms import utility as quant_utils
-from onnx_neural_compressor import constants
-from onnx_neural_compressor import utility
+from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
 
 
 @base_op.op_registry(op_types="LeakyRelu, Sigmoid", mode=[constants.STATIC_QUANT])
@@ -101,7 +100,10 @@ def quantize(self):
             self.quantizer.model.replace_input_of_all_nodes(node.output[0], node.input[0])
             self.quantizer.remove_nodes.append(node)
 
-@base_op.op_registry(op_types="Softmax, BiasGelu, Elu, Exp, FastGelu, Gelu, Softplus, Tanh", mode=[constants.STATIC_QUANT])
+
+@base_op.op_registry(
+    op_types="Softmax, BiasGelu, Elu, Exp, FastGelu, Gelu, Softplus, Tanh", mode=[constants.STATIC_QUANT]
+)
 class Float16ActivationOperator(base_op.Operator):
     """Float16 Activation operator."""
 
diff --git a/onnx_neural_compressor/algorithms/post_training_quant/operators/argmax.py b/onnx_neural_compressor/algorithms/post_training_quant/operators/argmax.py
index a6932a8f9..594e24c05 100644
--- a/onnx_neural_compressor/algorithms/post_training_quant/operators/argmax.py
+++ b/onnx_neural_compressor/algorithms/post_training_quant/operators/argmax.py
@@ -13,10 +13,9 @@
 # limitations under the License.
 """ArgMax operator."""
 
-from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
+from onnx_neural_compressor import constants, utility
 from onnx_neural_compressor.algorithms import utility as quant_utils
-from onnx_neural_compressor import constants
-from onnx_neural_compressor import utility
+from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
 
 
 @base_op.op_registry(op_types="ArgMax", mode=[constants.STATIC_QUANT])
diff --git a/onnx_neural_compressor/algorithms/post_training_quant/operators/attention.py b/onnx_neural_compressor/algorithms/post_training_quant/operators/attention.py
index d8da2f1ed..46f102352 100644
--- a/onnx_neural_compressor/algorithms/post_training_quant/operators/attention.py
+++ b/onnx_neural_compressor/algorithms/post_training_quant/operators/attention.py
@@ -15,10 +15,9 @@
 
 import onnx
 
-from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
+from onnx_neural_compressor import constants, utility
 from onnx_neural_compressor.algorithms import utility as quant_utils
-from onnx_neural_compressor import constants
-from onnx_neural_compressor import utility
+from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
 
 
 @base_op.op_registry(op_types="Attention", mode=[constants.DYNAMIC_QUANT, constants.STATIC_QUANT])
diff --git a/onnx_neural_compressor/algorithms/post_training_quant/operators/base_op.py b/onnx_neural_compressor/algorithms/post_training_quant/operators/base_op.py
index 7e0f0e805..c3c97617a 100644
--- a/onnx_neural_compressor/algorithms/post_training_quant/operators/base_op.py
+++ b/onnx_neural_compressor/algorithms/post_training_quant/operators/base_op.py
@@ -13,9 +13,10 @@
 # limitations under the License.
 """Base Operator."""
 
-from onnx_neural_compressor import constants
 from onnxruntime import quantization
 
+from onnx_neural_compressor import constants
+
 OPERATORS = {
     "dynamic_quant": {},
     "static_quant": {},
diff --git a/onnx_neural_compressor/algorithms/post_training_quant/operators/binary_op.py b/onnx_neural_compressor/algorithms/post_training_quant/operators/binary_op.py
index a40f2e43a..4aa1637b7 100644
--- a/onnx_neural_compressor/algorithms/post_training_quant/operators/binary_op.py
+++ b/onnx_neural_compressor/algorithms/post_training_quant/operators/binary_op.py
@@ -15,10 +15,9 @@
 
 import onnx
 
-from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
+from onnx_neural_compressor import constants, utility
 from onnx_neural_compressor.algorithms import utility as quant_utils
-from onnx_neural_compressor import constants
-from onnx_neural_compressor import utility
+from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
 
 
 @base_op.op_registry(op_types="Add, Mul", mode=[constants.STATIC_QUANT])
@@ -139,7 +138,10 @@ def convert(self):
                     self.quantizer.model.replace_input_of_all_nodes(child.output[0], node.output[0] + "_quantized")
             node.output[0] = node.output[0] + "_quantized"
 
-@base_op.op_registry(op_types="Sum, Sub, Div, Pow, Equal, Greater, GreaterOrEqual, Less, LessOrEqual", mode=[constants.STATIC_QUANT])
+
+@base_op.op_registry(
+    op_types="Sum, Sub, Div, Pow, Equal, Greater, GreaterOrEqual, Less, LessOrEqual", mode=[constants.STATIC_QUANT]
+)
 class Float16BinaryOperator(base_op.Operator):
     """Float16 Binary operator."""
 
diff --git a/onnx_neural_compressor/algorithms/post_training_quant/operators/concat.py b/onnx_neural_compressor/algorithms/post_training_quant/operators/concat.py
index 1a0df76ba..9e0f0ff6b 100644
--- a/onnx_neural_compressor/algorithms/post_training_quant/operators/concat.py
+++ b/onnx_neural_compressor/algorithms/post_training_quant/operators/concat.py
@@ -15,10 +15,9 @@
 
 import onnx
 
-from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
+from onnx_neural_compressor import constants, utility
 from onnx_neural_compressor.algorithms import utility as quant_utils
-from onnx_neural_compressor import constants
-from onnx_neural_compressor import utility
+from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
 
 
 @base_op.op_registry(op_types="Concat", mode=[constants.STATIC_QUANT])
diff --git a/onnx_neural_compressor/algorithms/post_training_quant/operators/conv.py b/onnx_neural_compressor/algorithms/post_training_quant/operators/conv.py
index 8e305535c..ede7e1bfa 100644
--- a/onnx_neural_compressor/algorithms/post_training_quant/operators/conv.py
+++ b/onnx_neural_compressor/algorithms/post_training_quant/operators/conv.py
@@ -17,9 +17,9 @@
 import onnx
 from onnx import onnx_pb as onnx_proto
 
-from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
-from onnx_neural_compressor.algorithms import utility as quant_utils
 from onnx_neural_compressor import constants
+from onnx_neural_compressor.algorithms import utility as quant_utils
+from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
 
 
 @base_op.op_registry(op_types="Conv, FusedConv", mode=[constants.DYNAMIC_QUANT])
@@ -115,9 +115,7 @@ def convert(self):
 
         scales_mul_node = quant_utils.find_by_name(scales_mul_op, self.quantizer.new_nodes)
         if scales_mul_node is None:
-            scales_mul_node = onnx.helper.make_node(
-                "Mul", [scale_0, scale_1], [scales_mul_op + ":0"], scales_mul_op
-            )
+            scales_mul_node = onnx.helper.make_node("Mul", [scale_0, scale_1], [scales_mul_op + ":0"], scales_mul_op)
             self.quantizer.new_nodes.append(scales_mul_node)
 
         scales_mul_op_output = scales_mul_node.output[0]
@@ -126,13 +124,12 @@ def convert(self):
         # and make the output of this node the same as output of original conv node.
         output_scale_mul_op = node.name + "_output_scale_mul"
         self.quantizer.new_nodes.append(
-            onnx.helper.make_node(
-                "Mul", [cast_op_output, scales_mul_op_output], [node.output[0]], output_scale_mul_op
-            )
+            onnx.helper.make_node("Mul", [cast_op_output, scales_mul_op_output], [node.output[0]], output_scale_mul_op)
         )
         self.quantizer.remove_nodes.extend(parents[1:])
         self.quantizer.remove_nodes.append(node)
 
+
 @base_op.op_registry(op_types="Conv, FusedConv", mode=[constants.STATIC_QUANT])
 class StaticConvOperator(ConvOperator):
     """Conv Operator."""
@@ -174,9 +171,7 @@ def convert(self):
         """Convert to QOperator format."""
         node = self.node
 
-        if len(self.quantizer.model.get_children(node)) == 0 or not node.name.endswith(
-            "_quant"
-        ):  # pragma: no cover
+        if len(self.quantizer.model.get_children(node)) == 0 or not node.name.endswith("_quant"):  # pragma: no cover
             return
         parents = self.quantizer.model.get_parents(node)
         child = self.quantizer.model.get_children(node)[0]
diff --git a/onnx_neural_compressor/algorithms/post_training_quant/operators/direct_q8.py b/onnx_neural_compressor/algorithms/post_training_quant/operators/direct_q8.py
index 79639186a..77d09793b 100644
--- a/onnx_neural_compressor/algorithms/post_training_quant/operators/direct_q8.py
+++ b/onnx_neural_compressor/algorithms/post_training_quant/operators/direct_q8.py
@@ -13,16 +13,15 @@
 # limitations under the License.
 """Direct8Bit Operator."""
 
-from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
+from onnx_neural_compressor import constants, utility
 from onnx_neural_compressor.algorithms import utility as quant_utils
-from onnx_neural_compressor import constants
-from onnx_neural_compressor import utility
+from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
 
 
 @base_op.op_registry(
     op_types="Reshape, Transpose, Squeeze, Unsqueeze, Flatten, Expand, Slice, "
     "SpaceToDepth, DepthToSpace, Upsample, Tile, CenterCropPad",
-    mode=[constants.STATIC_QUANT]
+    mode=[constants.STATIC_QUANT],
 )
 class Direct8BitOperator(base_op.Operator):
     """Direct8Bit Operator."""
@@ -66,8 +65,9 @@ def convert(self):
             for parent in parents:
                 if parent.op_type == "DequantizeLinear":
                     # make sure parent DequantizeLinear of input 0 is not used by other ops
-                    if len(self.quantizer.model.get_children(parent)) == 1 and \
-                        not self.quantizer.model.is_graph_output(parents[0].output[0]):
+                    if len(self.quantizer.model.get_children(parent)) == 1 and not self.quantizer.model.is_graph_output(
+                        parents[0].output[0]
+                    ):
                         self.quantizer.remove_nodes.append(parent)
                     self.node.input[0] = parent.input[0]
                     break
diff --git a/onnx_neural_compressor/algorithms/post_training_quant/operators/embed_layernorm.py b/onnx_neural_compressor/algorithms/post_training_quant/operators/embed_layernorm.py
index a4e35796c..0b9967f3d 100644
--- a/onnx_neural_compressor/algorithms/post_training_quant/operators/embed_layernorm.py
+++ b/onnx_neural_compressor/algorithms/post_training_quant/operators/embed_layernorm.py
@@ -15,10 +15,9 @@
 
 import onnx
 
-from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
+from onnx_neural_compressor import constants, utility
 from onnx_neural_compressor.algorithms import utility as quant_utils
-from onnx_neural_compressor import constants
-from onnx_neural_compressor import utility
+from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
 
 
 @base_op.op_registry(op_types="EmbedLayerNormalization", mode=[constants.DYNAMIC_QUANT, constants.STATIC_QUANT])
diff --git a/onnx_neural_compressor/algorithms/post_training_quant/operators/gather.py b/onnx_neural_compressor/algorithms/post_training_quant/operators/gather.py
index 4a573d08b..fd851885f 100644
--- a/onnx_neural_compressor/algorithms/post_training_quant/operators/gather.py
+++ b/onnx_neural_compressor/algorithms/post_training_quant/operators/gather.py
@@ -15,13 +15,14 @@
 
 import onnx
 
-from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
+from onnx_neural_compressor import constants, utility
 from onnx_neural_compressor.algorithms import utility as quant_utils
-from onnx_neural_compressor import constants
-from onnx_neural_compressor import utility
+from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
 
 
-@base_op.op_registry(op_types="Gather, GatherElements, GatherND", mode=[constants.DYNAMIC_QUANT, constants.STATIC_QUANT])
+@base_op.op_registry(
+    op_types="Gather, GatherElements, GatherND", mode=[constants.DYNAMIC_QUANT, constants.STATIC_QUANT]
+)
 class GatherOperator(base_op.Operator):
     """Gather Operator."""
 
diff --git a/onnx_neural_compressor/algorithms/post_training_quant/operators/gavgpool.py b/onnx_neural_compressor/algorithms/post_training_quant/operators/gavgpool.py
index 6d3dfb460..a91c1e531 100644
--- a/onnx_neural_compressor/algorithms/post_training_quant/operators/gavgpool.py
+++ b/onnx_neural_compressor/algorithms/post_training_quant/operators/gavgpool.py
@@ -15,10 +15,9 @@
 
 import onnx
 
-from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
+from onnx_neural_compressor import constants, utility
 from onnx_neural_compressor.algorithms import utility as quant_utils
-from onnx_neural_compressor import constants
-from onnx_neural_compressor import utility
+from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
 
 
 @base_op.op_registry(op_types="GlobalAveragePool", mode=[constants.STATIC_QUANT])
diff --git a/onnx_neural_compressor/algorithms/post_training_quant/operators/gemm.py b/onnx_neural_compressor/algorithms/post_training_quant/operators/gemm.py
index 8e05ea63b..8d0b61c73 100644
--- a/onnx_neural_compressor/algorithms/post_training_quant/operators/gemm.py
+++ b/onnx_neural_compressor/algorithms/post_training_quant/operators/gemm.py
@@ -15,10 +15,9 @@
 
 import onnx
 
-from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
+from onnx_neural_compressor import constants, logger
 from onnx_neural_compressor.algorithms import utility as quant_utils
-from onnx_neural_compressor import constants
-from onnx_neural_compressor import logger
+from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
 
 
 @base_op.op_registry(op_types="Gemm", mode=[constants.STATIC_QUANT])
diff --git a/onnx_neural_compressor/algorithms/post_training_quant/operators/lstm.py b/onnx_neural_compressor/algorithms/post_training_quant/operators/lstm.py
index bfc48ff79..8499f2441 100644
--- a/onnx_neural_compressor/algorithms/post_training_quant/operators/lstm.py
+++ b/onnx_neural_compressor/algorithms/post_training_quant/operators/lstm.py
@@ -16,10 +16,9 @@
 import numpy
 import onnx
 
-from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
+from onnx_neural_compressor import constants, utility
 from onnx_neural_compressor.algorithms import utility as quant_utils
-from onnx_neural_compressor import constants
-from onnx_neural_compressor import utility
+from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
 
 
 @base_op.op_registry(op_types="LSTM", mode=[constants.DYNAMIC_QUANT])
diff --git a/onnx_neural_compressor/algorithms/post_training_quant/operators/matmul.py b/onnx_neural_compressor/algorithms/post_training_quant/operators/matmul.py
index 18b9841a9..eff98f533 100644
--- a/onnx_neural_compressor/algorithms/post_training_quant/operators/matmul.py
+++ b/onnx_neural_compressor/algorithms/post_training_quant/operators/matmul.py
@@ -16,9 +16,9 @@
 import onnx
 from onnx import onnx_pb as onnx_proto
 
-from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
-from onnx_neural_compressor.algorithms import utility as quant_utils
 from onnx_neural_compressor import constants
+from onnx_neural_compressor.algorithms import utility as quant_utils
+from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
 
 
 @base_op.op_registry(op_types="MatMul", mode=[constants.DYNAMIC_QUANT])
@@ -93,9 +93,7 @@ def convert(self):
 
         scales_mul_node = quant_utils.find_by_name(scales_mul_op, self.quantizer.new_nodes)
         if scales_mul_node is None:
-            scales_mul_node = onnx.helper.make_node(
-                "Mul", [scale[0], scale[1]], [scales_mul_op + ":0"], scales_mul_op
-            )
+            scales_mul_node = onnx.helper.make_node("Mul", [scale[0], scale[1]], [scales_mul_op + ":0"], scales_mul_op)
             self.quantizer.new_nodes.append(scales_mul_node)
 
         scales_mul_op_output = scales_mul_node.output[0]
@@ -104,9 +102,7 @@ def convert(self):
         # and make the output of this node the same as output of original matmul node.
         output_scale_mul_op = node.name + "_output_scale_mul"
         self.quantizer.new_nodes.append(
-            onnx.helper.make_node(
-                "Mul", [cast_op_output, scales_mul_op_output], [node.output[0]], output_scale_mul_op
-            )
+            onnx.helper.make_node("Mul", [cast_op_output, scales_mul_op_output], [node.output[0]], output_scale_mul_op)
         )
         if parents[1].op_type == "DequantizeLinear":
             self.quantizer.remove_nodes.append(parents[1])
@@ -138,9 +134,7 @@ def convert(self):
         """Convert to QOperator format."""
         node = self.node
         parents = self.quantizer.model.get_parents(node)
-        if len(self.quantizer.model.get_children(node)) == 0 or not node.name.endswith(
-            "_quant"
-        ):  # pragma: no cover
+        if len(self.quantizer.model.get_children(node)) == 0 or not node.name.endswith("_quant"):  # pragma: no cover
             return
 
         qlinear_matmul_inputs = []
@@ -166,8 +160,9 @@ def convert(self):
         self.quantizer.remove_nodes.append(node)
 
         # make sure parent DequantizeLinear of input 0 is not used by other ops
-        if len(self.quantizer.model.get_children(parents[0])) == 1 and \
-            not self.quantizer.model.is_graph_output(parents[0].output[0]):
+        if len(self.quantizer.model.get_children(parents[0])) == 1 and not self.quantizer.model.is_graph_output(
+            parents[0].output[0]
+        ):
             self.quantizer.remove_nodes.extend(parents)
         else:
             self.quantizer.remove_nodes.append(parents[1])
diff --git a/onnx_neural_compressor/algorithms/post_training_quant/operators/maxpool.py b/onnx_neural_compressor/algorithms/post_training_quant/operators/maxpool.py
index 1e86984cb..cd5119c13 100644
--- a/onnx_neural_compressor/algorithms/post_training_quant/operators/maxpool.py
+++ b/onnx_neural_compressor/algorithms/post_training_quant/operators/maxpool.py
@@ -13,10 +13,9 @@
 # limitations under the License.
 """MaxPool Operator."""
 
-from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
+from onnx_neural_compressor import constants, utility
 from onnx_neural_compressor.algorithms import utility as quant_utils
-from onnx_neural_compressor import constants
-from onnx_neural_compressor import utility
+from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
 
 
 @base_op.op_registry(op_types="MaxPool", mode=[constants.STATIC_QUANT])
diff --git a/onnx_neural_compressor/algorithms/post_training_quant/operators/pad.py b/onnx_neural_compressor/algorithms/post_training_quant/operators/pad.py
index f5abde24b..61f7efd9e 100644
--- a/onnx_neural_compressor/algorithms/post_training_quant/operators/pad.py
+++ b/onnx_neural_compressor/algorithms/post_training_quant/operators/pad.py
@@ -15,10 +15,9 @@
 
 import onnx
 
-from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
+from onnx_neural_compressor import constants, utility
 from onnx_neural_compressor.algorithms import utility as quant_utils
-from onnx_neural_compressor import constants
-from onnx_neural_compressor import utility
+from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
 
 
 @base_op.op_registry(op_types="Pad", mode=[constants.STATIC_QUANT])
diff --git a/onnx_neural_compressor/algorithms/post_training_quant/operators/pooling.py b/onnx_neural_compressor/algorithms/post_training_quant/operators/pooling.py
index 1a5b43fd8..fb97ce630 100644
--- a/onnx_neural_compressor/algorithms/post_training_quant/operators/pooling.py
+++ b/onnx_neural_compressor/algorithms/post_training_quant/operators/pooling.py
@@ -15,10 +15,9 @@
 
 import onnx
 
-from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
+from onnx_neural_compressor import constants, utility
 from onnx_neural_compressor.algorithms import utility as quant_utils
-from onnx_neural_compressor import constants
-from onnx_neural_compressor import utility
+from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
 
 
 @base_op.op_registry(op_types="AveragePool", mode=[constants.STATIC_QUANT])
diff --git a/onnx_neural_compressor/algorithms/post_training_quant/operators/reduce.py b/onnx_neural_compressor/algorithms/post_training_quant/operators/reduce.py
index 9a089b08e..f89000e2e 100644
--- a/onnx_neural_compressor/algorithms/post_training_quant/operators/reduce.py
+++ b/onnx_neural_compressor/algorithms/post_training_quant/operators/reduce.py
@@ -13,14 +13,14 @@
 # limitations under the License.
 """Reduce Operator."""
 
-from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
+from onnx_neural_compressor import constants, utility
 from onnx_neural_compressor.algorithms import utility as quant_utils
-from onnx_neural_compressor import constants
-from onnx_neural_compressor import utility
+from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
 
 
 @base_op.op_registry(
-    op_types="ReduceMean, ReduceLogSum, ReduceLogSumExp, " "ReduceL1, ReduceL2, ReduceProd, ReduceSum, ReduceSumSquare" , mode=[constants.STATIC_QUANT]
+    op_types="ReduceMean, ReduceLogSum, ReduceLogSumExp, " "ReduceL1, ReduceL2, ReduceProd, ReduceSum, ReduceSumSquare",
+    mode=[constants.STATIC_QUANT],
 )
 class ReduceOperator(base_op.Operator):
     """Reduce Operator."""
diff --git a/onnx_neural_compressor/algorithms/post_training_quant/operators/resize.py b/onnx_neural_compressor/algorithms/post_training_quant/operators/resize.py
index 177dd6ec9..0cba83441 100644
--- a/onnx_neural_compressor/algorithms/post_training_quant/operators/resize.py
+++ b/onnx_neural_compressor/algorithms/post_training_quant/operators/resize.py
@@ -13,10 +13,9 @@
 # limitations under the License.
 """Resize Operator."""
 
-from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
+from onnx_neural_compressor import constants, utility
 from onnx_neural_compressor.algorithms import utility as quant_utils
-from onnx_neural_compressor import constants
-from onnx_neural_compressor import utility
+from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
 
 
 @base_op.op_registry(op_types="Resize", mode=[constants.STATIC_QUANT])
diff --git a/onnx_neural_compressor/algorithms/post_training_quant/operators/split.py b/onnx_neural_compressor/algorithms/post_training_quant/operators/split.py
index 551c97acc..97bded14f 100644
--- a/onnx_neural_compressor/algorithms/post_training_quant/operators/split.py
+++ b/onnx_neural_compressor/algorithms/post_training_quant/operators/split.py
@@ -15,10 +15,9 @@
 
 import onnx
 
-from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
+from onnx_neural_compressor import constants, utility
 from onnx_neural_compressor.algorithms import utility as quant_utils
-from onnx_neural_compressor import constants
-from onnx_neural_compressor import utility
+from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
 
 
 @base_op.op_registry(op_types="Split", mode=[constants.STATIC_QUANT])
diff --git a/onnx_neural_compressor/algorithms/post_training_quant/operators/unary_op.py b/onnx_neural_compressor/algorithms/post_training_quant/operators/unary_op.py
index 9d081d9d4..87c402b99 100644
--- a/onnx_neural_compressor/algorithms/post_training_quant/operators/unary_op.py
+++ b/onnx_neural_compressor/algorithms/post_training_quant/operators/unary_op.py
@@ -13,10 +13,9 @@
 # limitations under the License.
 """Unary operator."""
 
-from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
+from onnx_neural_compressor import constants, utility
 from onnx_neural_compressor.algorithms import utility as quant_utils
-from onnx_neural_compressor import constants
-from onnx_neural_compressor import utility
+from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
 
 
 @base_op.op_registry(op_types="Exp, Log, Round, Sqrt", mode=[constants.STATIC_QUANT])
diff --git a/onnx_neural_compressor/algorithms/post_training_quant/quantizer.py b/onnx_neural_compressor/algorithms/post_training_quant/quantizer.py
index c3e46730d..4e8b815e5 100644
--- a/onnx_neural_compressor/algorithms/post_training_quant/quantizer.py
+++ b/onnx_neural_compressor/algorithms/post_training_quant/quantizer.py
@@ -16,14 +16,14 @@
 import copy
 import logging
 import os
-import onnxruntime as ort
+
 import numpy as np
 import onnx
+import onnxruntime as ort
 
-from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
+from onnx_neural_compressor import logger, onnx_model
 from onnx_neural_compressor.algorithms import utility as quant_utils
-from onnx_neural_compressor import logger
-from onnx_neural_compressor import onnx_model
+from onnx_neural_compressor.algorithms.post_training_quant.operators import base_op
 
 
 class Quantizer:
@@ -143,10 +143,7 @@ def should_quantize(self, node):
     def should_convert(self, node):
         """Check if node should be converted."""
         name = quant_utils.get_node_original_name(node)
-        if (
-            name in self.config
-            and self.config[name] not in self.fallback_list
-        ):
+        if name in self.config and self.config[name] not in self.fallback_list:
             return True
         else:
             return False
@@ -230,8 +227,12 @@ def merge_dedicated_qdq_pair(self):
                     for n in dq_nodes:
                         datas.append(
                             [
-                                onnx.numpy_helper.to_array(quant_utils.find_by_name(n.input[1], self.model.initializer())),
-                                onnx.numpy_helper.to_array(quant_utils.find_by_name(n.input[2], self.model.initializer())),
+                                onnx.numpy_helper.to_array(
+                                    quant_utils.find_by_name(n.input[1], self.model.initializer())
+                                ),
+                                onnx.numpy_helper.to_array(
+                                    quant_utils.find_by_name(n.input[2], self.model.initializer())
+                                ),
                             ]
                         )
                     for idx, data in enumerate(datas):
@@ -280,15 +281,16 @@ def remove_duplicate_qdq_paris(self):
         for node in self.model.nodes():
             if node.op_type == "DequantizeLinear":
                 matched_parents = self.model.match_parent_path(
-                        node,
-                        ["QuantizeLinear", "DequantizeLinear", "QuantizeLinear"],
-                        [None, None, None],
-                    )
+                    node,
+                    ["QuantizeLinear", "DequantizeLinear", "QuantizeLinear"],
+                    [None, None, None],
+                )
 
                 if matched_parents is not None:
                     # (node) DQ - (matched_parents) Q-DQ-Q
-                    if all([i.op_type == "QuantizeLinear" for i in self.model.get_children(matched_parents[1])]) and \
-                        not self.model.is_graph_output(matched_parents[1].output[0]):
+                    if all(
+                        [i.op_type == "QuantizeLinear" for i in self.model.get_children(matched_parents[1])]
+                    ) and not self.model.is_graph_output(matched_parents[1].output[0]):
                         self.remove_nodes.append(matched_parents[1])
                     if all([i.op_type == "DequantizeLinear" for i in self.model.get_children(matched_parents[0])]):
                         self.remove_nodes.append(matched_parents[0])
@@ -337,7 +339,8 @@ def quantize_bias_tensor(self, node):
             or input_name not in self.quantized_value_map
             or (
                 input_name in self.quantized_value_map
-                and quant_utils.find_by_name(self.quantized_value_map[input_name].scale_name, self.model.initializer()) is None
+                and quant_utils.find_by_name(self.quantized_value_map[input_name].scale_name, self.model.initializer())
+                is None
             )
         ):
             self._dynamic_quantize_bias(input_name, weight_name + "_scale", bias_name, bias_name + "_quantized")
@@ -505,7 +508,6 @@ def quantize_weight_per_channel(self, weight_name, weight_qType, sym, channel_ax
 
         return (weight.name + "_quantized", weight.name + "_zero_point", weight.name + "_scale")
 
-
     def dequantize_tensor(self, node, value_name):
         """Dequantize tensor."""
         if value_name in self.quantized_value_map:
@@ -749,7 +751,9 @@ def quantize_outputs(self, node, initializer_use_weight_qType=True, direct_int8=
             for child in self.model.get_children(node):
                 self.replace_input.append([child, tensor_name, dequant_node.output[0]])
             if tensor_name not in self.quantized_value_map:
-                quantized_value = quant_utils.QuantizedValue(tensor_name, dq_output, scale_name, zp_name, quant_utils.QuantizedValueType.Input)
+                quantized_value = quant_utils.QuantizedValue(
+                    tensor_name, dq_output, scale_name, zp_name, quant_utils.QuantizedValueType.Input
+                )
                 self.quantized_value_map[tensor_name] = quantized_value
 
     def quantize_inputs(self, node, indices=None, initializer_use_weight_qType=True, direct_int8=False):
@@ -799,7 +803,13 @@ def quantize_inputs(self, node, indices=None, initializer_use_weight_qType=True,
                 self.replace_input.append([node, weight.name, dequant_node.output[0]])
                 if weight.name not in self.quantized_value_map:
                     quantized_value = quant_utils.QuantizedValue(
-                        weight.name, q_weight_name, scale_name, zp_name, quant_utils.QuantizedValueType.Initializer, None, dtype
+                        weight.name,
+                        q_weight_name,
+                        scale_name,
+                        zp_name,
+                        quant_utils.QuantizedValueType.Initializer,
+                        None,
+                        dtype,
                     )
                     self.quantized_value_map[weight.name] = quantized_value
             else:
@@ -822,9 +832,7 @@ def quantize_weights_per_channel(self, node, indices, weight_qType, sym, axis):
                 continue
 
             q_name, zp_name, scale_name = self.quantize_weight_per_channel(inp, weight_qType, sym, axis)
-            weight_name = (
-                ("_").join([inp, str(weight_qType)]) if self.model.get_initializer_share_num(inp) > 1 else inp
-            )
+            weight_name = ("_").join([inp, str(weight_qType)]) if self.model.get_initializer_share_num(inp) > 1 else inp
             dequant_node = onnx.helper.make_node(
                 "DequantizeLinear",
                 [q_name, scale_name, zp_name],
@@ -848,6 +856,7 @@ def quantize_weights_per_channel(self, node, indices, weight_qType, sym, axis):
                 )
                 self.new_nodes.append(qlinear_node)
 
+
 class StaticQuantizer(Quantizer):
     """Static quantizer class."""
 
@@ -887,7 +896,7 @@ def __init__(
             static=True,
             quantization_params=quantization_params,
             op_types_to_quantize=op_types_to_quantize,
-            )
+        )
         self.fallback_list = fallback_list
         self.reduce_range = reduce_range
         self.add_qdq_pair_to_weight = add_qdq_pair_to_weight
@@ -919,9 +928,9 @@ def _revert_conv_add_fusion(self):
         for node in self.model.nodes():
             if node.op_type == "Conv" and len(node.input) == 3:
                 bias_tensor = self.model.get_initializer(node.input[2])
-                bias_array = numpy_helper.to_array(bias_tensor).reshape((-1, 1, 1))
+                bias_array = onnx.numpy_helper.to_array(bias_tensor).reshape((-1, 1, 1))
                 self.model.remove_initializer(bias_tensor)
-                self.model.add_initializer(numpy_helper.from_array(bias_array, bias_tensor.name))
+                self.model.add_initializer(onnx.numpy_helper.from_array(bias_array, bias_tensor.name))
                 kwargs = {}
                 activation_params = None
                 for attr in node.attribute:
@@ -994,6 +1003,7 @@ def _quantize_activation(self, node, tensor_name, direct_int8=False):
             )
             self.quantized_value_map[tensor_name] = quantized_value
 
+
 class DynamicQuantizer(Quantizer):
     """Dynamic quantizer class."""
 
@@ -1027,13 +1037,11 @@ def __init__(
             static=False,
             quantization_params=quantization_params,
             op_types_to_quantize=op_types_to_quantize,
-            )
+        )
 
     def _quantize_activation(self, node, tensor_name, direct_int8=False):
         """Quantize node activation."""
-        qlinear_node = self.model.find_node_by_name(
-            tensor_name + "_QuantizeLinear", self.new_nodes, self.model.graph()
-        )
+        qlinear_node = self.model.find_node_by_name(tensor_name + "_QuantizeLinear", self.new_nodes, self.model.graph())
         if qlinear_node is None:
             if (
                 self.fuse_dynamic_quant
diff --git a/onnx_neural_compressor/algorithms/smoother/core.py b/onnx_neural_compressor/algorithms/smoother/core.py
index f4bf049c2..ab902de07 100644
--- a/onnx_neural_compressor/algorithms/smoother/core.py
+++ b/onnx_neural_compressor/algorithms/smoother/core.py
@@ -20,6 +20,7 @@
 import numpy as np
 import onnx
 import onnxruntime as ort
+
 from onnx_neural_compressor import data_reader, logger, onnx_model, utility
 from onnx_neural_compressor.algorithms import utility as quant_utils
 from onnx_neural_compressor.algorithms.smoother import calibrator
diff --git a/onnx_neural_compressor/algorithms/utility.py b/onnx_neural_compressor/algorithms/utility.py
index 06e270c33..a45ee2683 100644
--- a/onnx_neural_compressor/algorithms/utility.py
+++ b/onnx_neural_compressor/algorithms/utility.py
@@ -17,17 +17,17 @@
 
 import enum
 import os
+import pathlib
 import re
 import struct
 import sys
 from importlib import util
 
 import numpy as np
+from onnxruntime.quantization import onnx_model
 from packaging import version
 
-from onnx_neural_compressor import constants
-from onnx_neural_compressor import utility
-from onnxruntime.quantization import onnx_model
+from onnx_neural_compressor import constants, utility, logger
 
 if sys.version_info < (3, 11) and util.find_spec("onnxruntime_extensions"):  # pragma: no cover
     import onnxruntime_extensions
@@ -80,6 +80,7 @@
     onnx.TensorProto.INT8: (-64, 64),
 }
 
+
 def check_model_with_infer_shapes(model):
     """Check if the model has been shape inferred."""
     if isinstance(model, (pathlib.Path, str)):
@@ -90,6 +91,7 @@ def check_model_with_infer_shapes(model):
         return True
     return False
 
+
 def find_by_name(name, item_list):
     """Helper function to find item by name in a list."""
     items = []
@@ -102,9 +104,11 @@ def find_by_name(name, item_list):
     else:
         return None
 
+
 def is_quantizable_type(data_type):
     return data_type in [onnx.TensorProto.FLOAT, onnx.TensorProto.FLOAT16, onnx.TensorProto.BFLOAT16]
 
+
 def get_qmin_qmax_for_qType(qType, reduce_range=False, sym=False):  # noqa: N802
     """Get qmin, qmax for qType."""
     if qType == onnx.TensorProto.FLOAT8E4M3FN:
@@ -124,6 +128,7 @@ def get_qmin_qmax_for_qType(qType, reduce_range=False, sym=False):  # noqa: N802
 
     return qrange
 
+
 def dtype_to_name(dtype_mapping, dtype):
     """Map data type and its string representation."""
     return list(dtype_mapping.keys())[list(dtype_mapping.values()).index(dtype)]
@@ -439,22 +444,25 @@ def calculate_scale_zp(rmin, rmax, quantize_range, qType, sym):
     if isinstance(rmax, np.ndarray):
         if sym:
             max_range = np.maximum(abs(rmin), abs(rmax))
-            rmin = - max_range
+            rmin = -max_range
             rmax = max_range
         scale = (rmax - rmin) / (qmax - qmin)
         scale[scale < np.finfo(rmax.dtype).tiny] = 1
-        zero_point = np.multiply(np.ones(rmax.shape), np.round((qmax + qmin) / 2.0)).astype(dtype) if sym else \
-            np.round(qmin - rmin / scale).astype(dtype)
+        zero_point = (
+            np.multiply(np.ones(rmax.shape), np.round((qmax + qmin) / 2.0)).astype(dtype)
+            if sym
+            else np.round(qmin - rmin / scale).astype(dtype)
+        )
     else:
         if sym:
             max_range = max(abs(rmin), abs(rmax))
             scale = (float(max_range) * 2) / (qmax - qmin) if max_range > 0 else 1
         else:
             scale = (float(rmax) - float(rmin)) / (qmax - qmin) if rmin != rmax else 1
-        zero_point = np.round((qmax + qmin) / 2.0).astype(dtype) if sym else \
-            np.round(qmin - rmin / scale).astype(dtype)
+        zero_point = np.round((qmax + qmin) / 2.0).astype(dtype) if sym else np.round(qmin - rmin / scale).astype(dtype)
     return np.float32(scale), zero_point
 
+
 def quantize_data(data, quantize_range, qType, sym):
     """Quantize data.
 
@@ -493,6 +501,7 @@ def get_node_original_name(node) -> str:
         # For unquantized nodes
         return node_name
 
+
 class QuantType(enum.Enum):  # pragma: no cover
     """Represent QuantType value."""
 
@@ -758,6 +767,7 @@ def _get_value(self, node, idx):
         raise Exception("Incomplete symbolic shape inference")
     return symbolic_shape_inference.out_mp_
 
+
 def dump_model_op_stats(model, quantize_config, fp32_op_list):
     qdq_ops = ["QuantizeLinear", "DequantizeLinear", "DynamicQuantizeLinear"]
     res = {}
@@ -800,6 +810,7 @@ def dump_model_op_stats(model, quantize_config, fp32_op_list):
 
     utility.Statistics(output_data, header="Quantization Statistics", field_names=field_names).print_stat()
 
+
 def dump_woq_stats(model, quantize_config, fp32_op_list):
     res = {}
     for optype in fp32_op_list:
diff --git a/onnx_neural_compressor/algorithms/weight_only/awq.py b/onnx_neural_compressor/algorithms/weight_only/awq.py
index bb8783528..b2db33dcb 100644
--- a/onnx_neural_compressor/algorithms/weight_only/awq.py
+++ b/onnx_neural_compressor/algorithms/weight_only/awq.py
@@ -25,8 +25,8 @@
 from packaging import version
 
 from onnx_neural_compressor import config, constants, data_reader, logger, onnx_model
-from onnx_neural_compressor.algorithms.weight_only import rtn
 from onnx_neural_compressor.algorithms import utility as quant_utils
+from onnx_neural_compressor.algorithms.weight_only import rtn
 
 from typing import List, Union  # isort: skip
 
@@ -107,7 +107,7 @@ def _apply_awq_scale(model, weight_config, absorb_pairs, output_dicts, num_bits,
                 else:
                     q_weight = quant_utils.qdq_tensor(weight, num_bits, group_size, sym, "int")
 
-                q_weight = q_weight[:org_w_shape[0], :] / np.expand_dims(scales, axis=-1)
+                q_weight = q_weight[: org_w_shape[0], :] / np.expand_dims(scales, axis=-1)
                 out = np.matmul(inp, q_weight)
                 loss += np.mean(np.power((org_out - out), 2))
 
@@ -258,7 +258,7 @@ def _apply_awq_clip(model, weight_config, absorb_pairs, output_dicts, num_bits,
                 else:
                     weight = quant_utils.qdq_tensor(weight, num_bits, group_size, sym, "int", ratio)
 
-                cur_out = np.matmul(inp, weight[:, :org_w_shape[0]].T)
+                cur_out = np.matmul(inp, weight[:, : org_w_shape[0]].T)
                 loss = np.mean(np.power((org_out - cur_out), 2))
                 is_best = loss < best_error
                 if is_best:
diff --git a/onnx_neural_compressor/algorithms/weight_only/gptq.py b/onnx_neural_compressor/algorithms/weight_only/gptq.py
index f0a1b9038..c95c346f8 100644
--- a/onnx_neural_compressor/algorithms/weight_only/gptq.py
+++ b/onnx_neural_compressor/algorithms/weight_only/gptq.py
@@ -25,8 +25,8 @@
 from packaging.version import Version
 
 from onnx_neural_compressor import config, constants, data_reader, onnx_model, utility
-from onnx_neural_compressor.algorithms.layer_wise import core
 from onnx_neural_compressor.algorithms import utility as quant_utils
+from onnx_neural_compressor.algorithms.layer_wise import core
 
 from typing import List, Union  # isort: skip
 
diff --git a/onnx_neural_compressor/algorithms/weight_only/rtn.py b/onnx_neural_compressor/algorithms/weight_only/rtn.py
index f570b5271..6856f378d 100644
--- a/onnx_neural_compressor/algorithms/weight_only/rtn.py
+++ b/onnx_neural_compressor/algorithms/weight_only/rtn.py
@@ -24,8 +24,8 @@
 from packaging import version
 
 from onnx_neural_compressor import config, constants, onnx_model, utility
-from onnx_neural_compressor.algorithms.layer_wise import core
 from onnx_neural_compressor.algorithms import utility as quant_utils
+from onnx_neural_compressor.algorithms.layer_wise import core
 
 from typing import List, Union  # isort: skip
 
diff --git a/onnx_neural_compressor/config.py b/onnx_neural_compressor/config.py
index 7b32b5b79..59d0ceb65 100644
--- a/onnx_neural_compressor/config.py
+++ b/onnx_neural_compressor/config.py
@@ -23,19 +23,16 @@
 import os
 import pathlib
 import re
-from abc import ABC
-from abc import abstractmethod
+from abc import ABC, abstractmethod
 
 import numpy as np
 import onnx
 import pydantic
-from onnx_neural_compressor import constants
-from onnx_neural_compressor import data_reader
-from onnx_neural_compressor import logger
-from onnx_neural_compressor import utility
 from onnxruntime import quantization
 from typing_extensions import Self
 
+from onnx_neural_compressor import constants, data_reader, logger, utility
+
 from collections import OrderedDict  # isort: skip
 from typing import Any, Callable, Dict, List, NamedTuple, Optional, Tuple, Type, Union, _GenericAlias  # isort: skip
 
@@ -302,7 +299,7 @@ def __getitem__(self, key):
             return getattr(self, key)
         else:
             raise KeyError(f"No such attribute: {key}")
-    
+
     def __setitem__(self, key, value):
         setattr(self, key, value)
 
@@ -421,7 +418,6 @@ def build_tuning_param(config: BaseConfig, param: str):
             raise ValueError(f"Unsupported param type: {param}")
         return tuning_param
 
-
     def expand(self) -> List[BaseConfig]:
         """Expand the config.
 
@@ -483,7 +479,9 @@ def expand(self) -> List[BaseConfig]:
             local_op_level_config_lst = model_level_config_lst
         else:
             tuning_param_name_lst = [tuning_param.name for tuning_param in op_tuning_param_list]
-            tuning_param_val_lst = list(itertools.product(*[tuning_param.options for tuning_param in op_tuning_param_list]))
+            tuning_param_val_lst = list(
+                itertools.product(*[tuning_param.options for tuning_param in op_tuning_param_list])
+            )
             tuning_param_pair_lst = [dict(zip(tuning_param_name_lst[::-1], val[::-1])) for val in tuning_param_val_lst]
 
             for model_level_config in model_level_config_lst:
@@ -544,6 +542,7 @@ def __eq__(self, other: BaseConfig) -> bool:
             return False
         return self.get_init_args() == other.get_init_args()
 
+
 class ComposableConfig(BaseConfig):
     name = constants.COMPOSABLE_CONFIG
 
@@ -666,10 +665,18 @@ def to_dict(self):
         result = {}
         for key, val in self.__dict__.items():
             if not isinstance(val, list):
-                result[key] = getattr(val, "tensor_type", val) if isinstance(val, quantization.QuantType) else getattr(val, "value", val)
+                result[key] = (
+                    getattr(val, "tensor_type", val)
+                    if isinstance(val, quantization.QuantType)
+                    else getattr(val, "value", val)
+                )
             else:
                 result[key] = [
-                    getattr(item, "tensor_type", item) if isinstance(item, quantization.QuantType) else getattr(item, "value", item)
+                    (
+                        getattr(item, "tensor_type", item)
+                        if isinstance(item, quantization.QuantType)
+                        else getattr(item, "value", item)
+                    )
                     for item in val
                 ]
         return result
@@ -680,6 +687,7 @@ def __eq__(self, other):
         else:
             return self.to_dict() == other
 
+
 class _OperatorConfig(NamedTuple):
     config: OperatorConfig
     operators: List[Union[str, Callable]]
@@ -1218,7 +1226,6 @@ class StaticQuantConfig(BaseConfig, quantization.StaticQuantConfig):
     ]
     name: str = constants.STATIC_QUANT
 
-
     def __init__(
         self,
         calibration_data_reader: data_reader.CalibrationDataReader = None,
@@ -1268,7 +1275,11 @@ def __init__(
         if execution_provider is None:
             execution_provider = utility.auto_detect_ep()
         if op_types_to_quantize is None:
-            op_types_to_quantize = constants.STATIC_QOPERATOR_OP_LIST_MAP.get(execution_provider, []) if quant_format == quantization.QuantFormat.QOperator else constants.STATIC_QDQ_OP_LIST_MAP.get(execution_provider, [])
+            op_types_to_quantize = (
+                constants.STATIC_QOPERATOR_OP_LIST_MAP.get(execution_provider, [])
+                if quant_format == quantization.QuantFormat.QOperator
+                else constants.STATIC_QDQ_OP_LIST_MAP.get(execution_provider, [])
+            )
         if not reduce_range and not utility.CpuInfo().vnni:
             logger.warning(
                 "VNNI is not supported and reduce_range=False, reduce_range=True is recommended to avoid potential accuracy issue."
@@ -1292,14 +1303,16 @@ def __init__(
         if "TensorrtExecutionProvider" in execution_provider:
             logger.info("Update some parameters for TensorrtExecutionProvider")
             os.environ["ORT_TENSORRT_INT8_ENABLE"] = "0"
-            self.extra_options.update({
-                "add_qdq_pair_to_weight": True,
-                "dedicated_qdq_pair": True,
-                "optypes_to_exclude_output_quant": ["Conv", "Gemm", "Add", "MatMul"],
-            })
+            self.extra_options.update(
+                {
+                    "add_qdq_pair_to_weight": True,
+                    "dedicated_qdq_pair": True,
+                    "optypes_to_exclude_output_quant": ["Conv", "Gemm", "Add", "MatMul"],
+                }
+            )
         else:
             os.environ["ORT_TENSORRT_UNAVAILABLE"] = "1"
- 
+
         BaseConfig.__init__(self, white_list=self.op_types_to_quantize)
         self.execution_provider = execution_provider
         self.quant_last_matmul = quant_last_matmul
@@ -1314,7 +1327,7 @@ def __init__(
     def get_model_info(model, white_list=constants.STATIC_QOPERATOR_CPU_OP_LIST) -> list:
         if not isinstance(model, onnx.ModelProto):
             model = onnx.load(model, load_external_data=False)
- 
+
         filter_result = []
         for node in model.graph.node:
             if node.op_type in white_list:
@@ -1355,11 +1368,23 @@ def to_config_mapping(self, config_list: list = None, model_info: list = None) -
             op_type_config_dict, op_name_config_dict = config._get_op_name_op_type_config()
             last_matmul = None
             for op_name, op_type in model_info:
-                if isinstance(self.op_types_to_quantize, list) and len(self.op_types_to_quantize) > 0 and op_type not in self.op_types_to_quantize:
+                if (
+                    isinstance(self.op_types_to_quantize, list)
+                    and len(self.op_types_to_quantize) > 0
+                    and op_type not in self.op_types_to_quantize
+                ):
                     continue
-                if isinstance(self.nodes_to_quantize, list) and len(self.nodes_to_quantize) > 0 and op_name not in self.nodes_to_quantize:
+                if (
+                    isinstance(self.nodes_to_quantize, list)
+                    and len(self.nodes_to_quantize) > 0
+                    and op_name not in self.nodes_to_quantize
+                ):
                     continue
-                if isinstance(self.nodes_to_exclude, list) and len(self.nodes_to_exclude) > 0 and op_name in self.nodes_to_exclude:
+                if (
+                    isinstance(self.nodes_to_exclude, list)
+                    and len(self.nodes_to_exclude) > 0
+                    and op_name in self.nodes_to_exclude
+                ):
                     continue
                 if op_type in op_type_config_dict:
                     self._config_mapping[op_name] = op_type_config_dict[op_type]
@@ -1390,13 +1415,21 @@ def get_config_set_for_tuning(
             execution_provider = utility.auto_detect_ep()
         StaticQuantConfig.register_supported_configs()
         if op_types_to_quantize is None:
-            op_types_to_quantize = constants.STATIC_QOPERATOR_OP_LIST_MAP.get(execution_provider, []) if quant_format == quantization.QuantFormat.QOperator else constants.STATIC_QDQ_OP_LIST_MAP.get(execution_provider, [])
+            op_types_to_quantize = (
+                constants.STATIC_QOPERATOR_OP_LIST_MAP.get(execution_provider, [])
+                if quant_format == quantization.QuantFormat.QOperator
+                else constants.STATIC_QDQ_OP_LIST_MAP.get(execution_provider, [])
+            )
 
         op_type_candidate = [
             op_types_to_quantize,
             list(set(op_types_to_quantize).difference({"Add", "Mul"})),
             list(set(op_types_to_quantize).difference({"Add", "Mul", "Gather", "GatherElements", "GatherND"})),
-            list(set(op_types_to_quantize).difference({"Add", "Mul", "Gather", "GatherElements", "GatherND", "Attention"})),
+            list(
+                set(op_types_to_quantize).difference(
+                    {"Add", "Mul", "Gather", "GatherElements", "GatherND", "Attention"}
+                )
+            ),
         ]
 
         cfg_lst = []
@@ -1426,58 +1459,103 @@ def register_supported_configs(cls) -> None:
                     weight_type=onnx.TensorProto.UINT8,
                     weight_sym=False,
                     per_channel=[True, False],
-                    calibrate_method=[quantization.CalibrationMethod.MinMax, quantization.CalibrationMethod.Entropy, quantization.CalibrationMethod.Percentile],
+                    calibrate_method=[
+                        quantization.CalibrationMethod.MinMax,
+                        quantization.CalibrationMethod.Entropy,
+                        quantization.CalibrationMethod.Percentile,
+                    ],
                     activation_type=onnx.TensorProto.UINT8,
                     activation_sym=False,
                 ),
                 operators=["GatherND", "GatherElements", "Gather"],
                 valid_func_list=utility.STATIC_CHECK_FUNC_LIST,
-            ))
+            )
+        )
         supported_configs.append(
             _OperatorConfig(
                 config=OperatorConfig(
                     weight_type=onnx.TensorProto.UINT8,
                     weight_sym=False,
                     per_channel=False,
-                    calibrate_method=[quantization.CalibrationMethod.MinMax, quantization.CalibrationMethod.Entropy, quantization.CalibrationMethod.Percentile],
+                    calibrate_method=[
+                        quantization.CalibrationMethod.MinMax,
+                        quantization.CalibrationMethod.Entropy,
+                        quantization.CalibrationMethod.Percentile,
+                    ],
                     activation_type=onnx.TensorProto.UINT8,
                     activation_sym=False,
                 ),
                 operators=["EmbedLayerNormalization"],
                 valid_func_list=utility.STATIC_CHECK_FUNC_LIST,
-            ))
+            )
+        )
         supported_configs.append(
             _OperatorConfig(
                 config=OperatorConfig(
                     weight_type=onnx.TensorProto.INT8,
                     weight_sym=True,
                     per_channel=[True, False],
-                    calibrate_method=[quantization.CalibrationMethod.MinMax, quantization.CalibrationMethod.Entropy, quantization.CalibrationMethod.Percentile],
+                    calibrate_method=[
+                        quantization.CalibrationMethod.MinMax,
+                        quantization.CalibrationMethod.Entropy,
+                        quantization.CalibrationMethod.Percentile,
+                    ],
                     activation_type=onnx.TensorProto.UINT8,
                     activation_sym=False,
                 ),
                 operators=["Conv", "MatMul", "Gemm", "FusedConv"],
                 valid_func_list=utility.STATIC_CHECK_FUNC_LIST,
-            ))
+            )
+        )
         supported_configs.append(
             _OperatorConfig(
                 config=OperatorConfig(
                     weight_type=onnx.TensorProto.INT8,
                     weight_sym=True,
                     per_channel=False,
-                    calibrate_method=[quantization.CalibrationMethod.MinMax, quantization.CalibrationMethod.Entropy, quantization.CalibrationMethod.Percentile],
+                    calibrate_method=[
+                        quantization.CalibrationMethod.MinMax,
+                        quantization.CalibrationMethod.Entropy,
+                        quantization.CalibrationMethod.Percentile,
+                    ],
                     activation_type=onnx.TensorProto.UINT8,
                     activation_sym=False,
                 ),
                 operators=[
-                    "Relu", "Clip", "LeakyRelu", "Sigmoid", "MaxPool", "GlobalAveragePool",
-                    "Pad", "Split", "Squeeze", "Reshape", "Concat", "AveragePool", "Tile", 
-                    "Unsqueeze", "Transpose", "Resize", "Abs", "Shrink", "Sign", "Attention",
-                    "Flatten", "Expand", "Slice", "Mod", "ReduceMax", "ReduceMin",
-                    "CenterCropPad", "Add", "Mul", "ArgMax",
+                    "Relu",
+                    "Clip",
+                    "LeakyRelu",
+                    "Sigmoid",
+                    "MaxPool",
+                    "GlobalAveragePool",
+                    "Pad",
+                    "Split",
+                    "Squeeze",
+                    "Reshape",
+                    "Concat",
+                    "AveragePool",
+                    "Tile",
+                    "Unsqueeze",
+                    "Transpose",
+                    "Resize",
+                    "Abs",
+                    "Shrink",
+                    "Sign",
+                    "Attention",
+                    "Flatten",
+                    "Expand",
+                    "Slice",
+                    "Mod",
+                    "ReduceMax",
+                    "ReduceMin",
+                    "CenterCropPad",
+                    "Add",
+                    "Mul",
+                    "ArgMax",
                 ],
                 valid_func_list=utility.STATIC_CHECK_FUNC_LIST,
-            ))
+            )
+        )
         cls.supported_configs = supported_configs
 
     def to_dict(self):
@@ -1492,10 +1570,18 @@ def to_dict(self):
                 result[key] = local_result
                 continue
             if not isinstance(val, list):
-                result[key] = getattr(val, "tensor_type", val) if isinstance(val, quantization.QuantType) else getattr(val, "value", val)
+                result[key] = (
+                    getattr(val, "tensor_type", val)
+                    if isinstance(val, quantization.QuantType)
+                    else getattr(val, "value", val)
+                )
             else:
                 result[key] = [
-                    getattr(item, "tensor_type", item) if isinstance(item, quantization.QuantType) else getattr(item, "value", item)
+                    (
+                        getattr(item, "tensor_type", item)
+                        if isinstance(item, quantization.QuantType)
+                        else getattr(item, "value", item)
+                    )
                     for item in val
                 ]
         return result
@@ -1590,6 +1676,7 @@ def get_config_set_for_tuning(
     ) -> Union[None, "SmoothQuantConfig", List["SmoothQuantConfig"]]:  # pragma: no cover
         return SmoothQuantConfig(alpha=np.arange(0.3, 0.7, 0.05))
 
+
 def get_default_sq_config() -> SmoothQuantConfig:
     """Generate the default smooth quant config.
 
@@ -1670,7 +1757,7 @@ def __init__(
     def get_model_info(model, white_list=constants.DYNAMIC_CPU_OP_LIST) -> list:
         if not isinstance(model, onnx.ModelProto):
             model = onnx.load(model, load_external_data=False)
- 
+
         filter_result = []
         for node in model.graph.node:
             if node.op_type in white_list:
@@ -1710,11 +1797,23 @@ def to_config_mapping(self, config_list: list = None, model_info: list = None) -
             op_type_config_dict, op_name_config_dict = config._get_op_name_op_type_config()
             last_matmul = None
             for op_name, op_type in model_info:
-                if isinstance(self.op_types_to_quantize, list) and len(self.op_types_to_quantize) > 0 and op_type not in self.op_types_to_quantize:
+                if (
+                    isinstance(self.op_types_to_quantize, list)
+                    and len(self.op_types_to_quantize) > 0
+                    and op_type not in self.op_types_to_quantize
+                ):
                     continue
-                if isinstance(self.nodes_to_quantize, list) and len(self.nodes_to_quantize) > 0 and op_name not in self.nodes_to_quantize:
+                if (
+                    isinstance(self.nodes_to_quantize, list)
+                    and len(self.nodes_to_quantize) > 0
+                    and op_name not in self.nodes_to_quantize
+                ):
                     continue
-                if isinstance(self.nodes_to_exclude, list) and len(self.nodes_to_exclude) > 0 and op_name in self.nodes_to_exclude:
+                if (
+                    isinstance(self.nodes_to_exclude, list)
+                    and len(self.nodes_to_exclude) > 0
+                    and op_name in self.nodes_to_exclude
+                ):
                     continue
                 if op_type in op_type_config_dict:
                     self._config_mapping[op_name] = op_type_config_dict[op_type]
@@ -1746,9 +1845,19 @@ def get_config_set_for_tuning(
         op_type_candidate = [
             op_types_to_quantize,
             list(set(op_types_to_quantize).difference({"EmbedLayerNormalization", "Gather", "LSTM"})),
-            list(set(op_types_to_quantize).difference({"EmbedLayerNormalization", "Gather", "LSTM", "Conv", "FusedConv"})),
-            list(set(op_types_to_quantize).difference({"EmbedLayerNormalization", "Gather", "LSTM", "Conv", "FusedConv", "Attention"})),
-            list(set(op_types_to_quantize).difference({"EmbedLayerNormalization", "Gather", "LSTM", "Conv", "FusedConv", "MatMul"})),
+            list(
+                set(op_types_to_quantize).difference({"EmbedLayerNormalization", "Gather", "LSTM", "Conv", "FusedConv"})
+            ),
+            list(
+                set(op_types_to_quantize).difference(
+                    {"EmbedLayerNormalization", "Gather", "LSTM", "Conv", "FusedConv", "Attention"}
+                )
+            ),
+            list(
+                set(op_types_to_quantize).difference(
+                    {"EmbedLayerNormalization", "Gather", "LSTM", "Conv", "FusedConv", "MatMul"}
+                )
+            ),
         ]
 
         cfg_lst = []
@@ -1780,7 +1889,8 @@ def register_supported_configs(cls) -> None:
                 ),
                 operators=["FusedConv", "Conv", "EmbedLayerNormalization"],
                 valid_func_list=utility.DYNAMIC_CHECK_FUNC_LIST,
-            ))
+            )
+        )
         supported_configs.append(
             _OperatorConfig(
                 config=OperatorConfig(
@@ -1792,7 +1902,8 @@ def register_supported_configs(cls) -> None:
                 ),
                 operators=["MatMul"],
                 valid_func_list=utility.DYNAMIC_CHECK_FUNC_LIST,
-            ))
+            )
+        )
         supported_configs.append(
             _OperatorConfig(
                 config=OperatorConfig(
@@ -1804,7 +1915,8 @@ def register_supported_configs(cls) -> None:
                 ),
                 operators=["Gather", "Attention", "LSTM"],
                 valid_func_list=utility.DYNAMIC_CHECK_FUNC_LIST,
-            ))
+            )
+        )
         cls.supported_configs = supported_configs
 
     def to_dict(self):
@@ -1819,10 +1931,18 @@ def to_dict(self):
                 result[key] = local_result
                 continue
             if not isinstance(val, list):
-                result[key] = getattr(val, "tensor_type", val) if isinstance(val, quantization.QuantType) else getattr(val, "value", val)
+                result[key] = (
+                    getattr(val, "tensor_type", val)
+                    if isinstance(val, quantization.QuantType)
+                    else getattr(val, "value", val)
+                )
             else:
                 result[key] = [
-                    getattr(item, "tensor_type", item) if isinstance(item, quantization.QuantType) else getattr(item, "value", item)
+                    (
+                        getattr(item, "tensor_type", item)
+                        if isinstance(item, quantization.QuantType)
+                        else getattr(item, "value", item)
+                    )
                     for item in val
                 ]
         return result
diff --git a/onnx_neural_compressor/constants.py b/onnx_neural_compressor/constants.py
index 39e6429ed..71caf2a49 100644
--- a/onnx_neural_compressor/constants.py
+++ b/onnx_neural_compressor/constants.py
@@ -56,76 +56,247 @@
 
 GPTQ_OP_LIST = ["MatMul"]
 
-DYNAMIC_CPU_OP_LIST = [
-    "FusedConv", "Conv", "EmbedLayerNormalization", "MatMul", "Gather", "Attention", "LSTM"
-]
-DYNAMIC_CUDA_OP_LIST = [
-    "FusedConv", "Conv", "EmbedLayerNormalization", "MatMul", "Gather", "Attention", "LSTM"
-]
+DYNAMIC_CPU_OP_LIST = ["FusedConv", "Conv", "EmbedLayerNormalization", "MatMul", "Gather", "Attention", "LSTM"]
+DYNAMIC_CUDA_OP_LIST = ["FusedConv", "Conv", "EmbedLayerNormalization", "MatMul", "Gather", "Attention", "LSTM"]
 DYNAMIC_DML_OP_LIST = []
-DYNAMIC_DNNL_OP_LIST = [
-    "FusedConv", "Conv", "EmbedLayerNormalization", "MatMul", "Gather", "Attention", "LSTM"
-]
+DYNAMIC_DNNL_OP_LIST = ["FusedConv", "Conv", "EmbedLayerNormalization", "MatMul", "Gather", "Attention", "LSTM"]
 DYNAMIC_TRT_OP_LIST = []
 
 STATIC_QDQ_CPU_OP_LIST = [
-    "FusedConv", "Conv", "Gather", "GatherElements", "GatherND", "Tile", 
-    "MatMul", "Gemm", "EmbedLayerNormalization", "Attention",
-    "Relu", "Clip", "LeakyRelu", "Sigmoid", "MaxPool", "GlobalAveragePool",
-    "Pad", "Split", "Squeeze", "Reshape", "Concat", "AveragePool",
-    "Unsqueeze", "Transpose", "Resize", "Abs", "Shrink", "Sign",
-    "Flatten", "Expand", "Slice", "Mod", "ReduceMax", "ReduceMin", "CenterCropPad"
+    "FusedConv",
+    "Conv",
+    "Gather",
+    "GatherElements",
+    "GatherND",
+    "Tile",
+    "MatMul",
+    "Gemm",
+    "EmbedLayerNormalization",
+    "Attention",
+    "Relu",
+    "Clip",
+    "LeakyRelu",
+    "Sigmoid",
+    "MaxPool",
+    "GlobalAveragePool",
+    "Pad",
+    "Split",
+    "Squeeze",
+    "Reshape",
+    "Concat",
+    "AveragePool",
+    "Unsqueeze",
+    "Transpose",
+    "Resize",
+    "Abs",
+    "Shrink",
+    "Sign",
+    "Flatten",
+    "Expand",
+    "Slice",
+    "Mod",
+    "ReduceMax",
+    "ReduceMin",
+    "CenterCropPad",
 ]
 STATIC_QDQ_CUDA_OP_LIST = [
-    "FusedConv", "Conv", "Gather",
-    "MatMul", "Gemm", "EmbedLayerNormalization", "Attention",
-    "Relu", "Clip", "LeakyRelu", "Sigmoid", "MaxPool", "GlobalAveragePool",
-    "Pad", "Split", "Squeeze", "Reshape", "Concat", "AveragePool",
-    "Unsqueeze", "Transpose", "Resize", "Abs", "Shrink", "Sign",
-    "Flatten", "Expand", "Slice", "Mod", "ReduceMax", "ReduceMin",
+    "FusedConv",
+    "Conv",
+    "Gather",
+    "MatMul",
+    "Gemm",
+    "EmbedLayerNormalization",
+    "Attention",
+    "Relu",
+    "Clip",
+    "LeakyRelu",
+    "Sigmoid",
+    "MaxPool",
+    "GlobalAveragePool",
+    "Pad",
+    "Split",
+    "Squeeze",
+    "Reshape",
+    "Concat",
+    "AveragePool",
+    "Unsqueeze",
+    "Transpose",
+    "Resize",
+    "Abs",
+    "Shrink",
+    "Sign",
+    "Flatten",
+    "Expand",
+    "Slice",
+    "Mod",
+    "ReduceMax",
+    "ReduceMin",
 ]
 STATIC_QDQ_DML_OP_LIST = [
-    "Conv", "MatMul", "Relu", "Clip", "MaxPool",
+    "Conv",
+    "MatMul",
+    "Relu",
+    "Clip",
+    "MaxPool",
 ]
 STATIC_QDQ_DNNL_OP_LIST = [
-    "FusedConv", "Conv", "Gather",
-    "MatMul", "Gemm", "EmbedLayerNormalization", "Attention",
-    "Relu", "Clip", "LeakyRelu", "Sigmoid", "MaxPool", "GlobalAveragePool",
-    "Pad", "Split", "Squeeze", "Reshape", "Concat", "AveragePool",
-    "Unsqueeze", "Transpose", "Resize",
+    "FusedConv",
+    "Conv",
+    "Gather",
+    "MatMul",
+    "Gemm",
+    "EmbedLayerNormalization",
+    "Attention",
+    "Relu",
+    "Clip",
+    "LeakyRelu",
+    "Sigmoid",
+    "MaxPool",
+    "GlobalAveragePool",
+    "Pad",
+    "Split",
+    "Squeeze",
+    "Reshape",
+    "Concat",
+    "AveragePool",
+    "Unsqueeze",
+    "Transpose",
+    "Resize",
 ]
 STATIC_QDQ_TRT_OP_LIST = [
-    "Conv", "MatMul", "Attention", "LeakyRelu", "Gather", "Sigmoid",
-    "MaxPool", "EmbedLayerNormalization", "GlobalAveragePool", "Pad",
-    "Split", "Squeeze", "Reshape", "Concat", "AveragePool", "Unsqueeze",
-    "Transpose", "Resize", "Gemm", "Add", 
+    "Conv",
+    "MatMul",
+    "Attention",
+    "LeakyRelu",
+    "Gather",
+    "Sigmoid",
+    "MaxPool",
+    "EmbedLayerNormalization",
+    "GlobalAveragePool",
+    "Pad",
+    "Split",
+    "Squeeze",
+    "Reshape",
+    "Concat",
+    "AveragePool",
+    "Unsqueeze",
+    "Transpose",
+    "Resize",
+    "Gemm",
+    "Add",
 ]
 
 STATIC_QOPERATOR_CPU_OP_LIST = [
-    "FusedConv", "Conv", "Gather", "GatherElements", "GatherND", "Tile", 
-    "MatMul", "Gemm", "EmbedLayerNormalization", "Attention", "Mul",
-    "Relu", "Clip", "LeakyRelu", "Sigmoid", "MaxPool", "GlobalAveragePool",
-    "Pad", "Split", "Add", "Squeeze", "Reshape", "Concat", "AveragePool",
-    "Unsqueeze", "Transpose", "ArgMax", "Resize", "Abs", "Shrink", "Sign",
-    "Flatten", "Expand", "Slice", "Mod", "ReduceMax", "ReduceMin", "CenterCropPad",
+    "FusedConv",
+    "Conv",
+    "Gather",
+    "GatherElements",
+    "GatherND",
+    "Tile",
+    "MatMul",
+    "Gemm",
+    "EmbedLayerNormalization",
+    "Attention",
+    "Mul",
+    "Relu",
+    "Clip",
+    "LeakyRelu",
+    "Sigmoid",
+    "MaxPool",
+    "GlobalAveragePool",
+    "Pad",
+    "Split",
+    "Add",
+    "Squeeze",
+    "Reshape",
+    "Concat",
+    "AveragePool",
+    "Unsqueeze",
+    "Transpose",
+    "ArgMax",
+    "Resize",
+    "Abs",
+    "Shrink",
+    "Sign",
+    "Flatten",
+    "Expand",
+    "Slice",
+    "Mod",
+    "ReduceMax",
+    "ReduceMin",
+    "CenterCropPad",
 ]
 STATIC_QOPERATOR_CUDA_OP_LIST = [
-    "FusedConv", "Conv", "Gather",
-    "MatMul", "Gemm", "EmbedLayerNormalization", "Attention", "Mul",
-    "Relu", "Clip", "LeakyRelu", "Sigmoid", "MaxPool", "GlobalAveragePool",
-    "Pad", "Split", "Add", "Squeeze", "Reshape", "Concat", "AveragePool",
-    "Unsqueeze", "Transpose", "ArgMax", "Resize", "Abs", "Shrink", "Sign",
-    "Flatten", "Expand", "Slice", "Mod", "ReduceMax", "ReduceMin",
+    "FusedConv",
+    "Conv",
+    "Gather",
+    "MatMul",
+    "Gemm",
+    "EmbedLayerNormalization",
+    "Attention",
+    "Mul",
+    "Relu",
+    "Clip",
+    "LeakyRelu",
+    "Sigmoid",
+    "MaxPool",
+    "GlobalAveragePool",
+    "Pad",
+    "Split",
+    "Add",
+    "Squeeze",
+    "Reshape",
+    "Concat",
+    "AveragePool",
+    "Unsqueeze",
+    "Transpose",
+    "ArgMax",
+    "Resize",
+    "Abs",
+    "Shrink",
+    "Sign",
+    "Flatten",
+    "Expand",
+    "Slice",
+    "Mod",
+    "ReduceMax",
+    "ReduceMin",
 ]
 STATIC_QOPERATOR_DML_OP_LIST = [
-    "Conv", "MatMul", "Mul", "Relu", "Clip", "MaxPool", "Add",
+    "Conv",
+    "MatMul",
+    "Mul",
+    "Relu",
+    "Clip",
+    "MaxPool",
+    "Add",
 ]
 STATIC_QOPERATOR_DNNL_OP_LIST = [
-    "FusedConv", "Conv", "Gather",
-    "MatMul", "Gemm", "EmbedLayerNormalization", "Attention", "Mul",
-    "Relu", "Clip", "LeakyRelu", "Sigmoid", "MaxPool", "GlobalAveragePool",
-    "Pad", "Split", "Add", "Squeeze", "Reshape", "Concat", "AveragePool",
-    "Unsqueeze", "Transpose", "ArgMax", "Resize",
+    "FusedConv",
+    "Conv",
+    "Gather",
+    "MatMul",
+    "Gemm",
+    "EmbedLayerNormalization",
+    "Attention",
+    "Mul",
+    "Relu",
+    "Clip",
+    "LeakyRelu",
+    "Sigmoid",
+    "MaxPool",
+    "GlobalAveragePool",
+    "Pad",
+    "Split",
+    "Add",
+    "Squeeze",
+    "Reshape",
+    "Concat",
+    "AveragePool",
+    "Unsqueeze",
+    "Transpose",
+    "ArgMax",
+    "Resize",
 ]
 STATIC_QOPERATOR_TRT_OP_LIST = []
 
diff --git a/onnx_neural_compressor/quantization/__init__.py b/onnx_neural_compressor/quantization/__init__.py
index ee6c44379..1dcd5e428 100644
--- a/onnx_neural_compressor/quantization/__init__.py
+++ b/onnx_neural_compressor/quantization/__init__.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 
+from onnxruntime.quantization import CalibrationMethod
 from onnxruntime.quantization.quant_utils import QuantFormat, QuantType
 
-from onnxruntime.quantization import CalibrationMethod
 from onnx_neural_compressor.quantization.quantize import quantize
diff --git a/onnx_neural_compressor/quantization/algorithm_entry.py b/onnx_neural_compressor/quantization/algorithm_entry.py
index 4428ff7ad..1e42810e4 100644
--- a/onnx_neural_compressor/quantization/algorithm_entry.py
+++ b/onnx_neural_compressor/quantization/algorithm_entry.py
@@ -18,12 +18,11 @@
 
 import onnx
 import onnxruntime as ort
-from onnx_neural_compressor.algorithms import utility as quant_utils
-from onnx_neural_compressor.algorithms.post_training_quant import calibrate
-from onnx_neural_compressor.algorithms.post_training_quant import quantizer
 from onnxruntime import quantization
 
 from onnx_neural_compressor import config, constants, data_reader, logger, utility
+from onnx_neural_compressor.algorithms import utility as quant_utils
+from onnx_neural_compressor.algorithms.post_training_quant import calibrate, quantizer
 from onnx_neural_compressor.algorithms.smoother import core
 from onnx_neural_compressor.algorithms.weight_only import awq, gptq, rtn
 
@@ -53,7 +52,7 @@ def gptq_quantize_entry(
     quant_config: config.GPTQConfig,
     calibration_data_reader: data_reader.CalibrationDataReader,
     *args,
-    **kwargs
+    **kwargs,
 ) -> onnx.ModelProto:
     """The main entry to apply gptq quantization."""
     assert calibration_data_reader is not None, "Please provide calibration_data_reader"
@@ -83,7 +82,7 @@ def awq_quantize_entry(
     quant_config: config.AWQConfig,
     calibration_data_reader: data_reader.CalibrationDataReader,
     *args,
-    **kwargs
+    **kwargs,
 ) -> onnx.ModelProto:
     """The main entry to apply awq quantization."""
     assert calibration_data_reader is not None, "Please provide calibration_data_reader"
@@ -105,6 +104,7 @@ def awq_quantize_entry(
     quant_utils.dump_woq_stats(model, config_mapping, quant_config.white_list)
     return model
 
+
 ###################### Static quant Entry ##################################
 @utility.register_algo(name=constants.STATIC_QUANT)
 def static_quantize_entry(
@@ -166,7 +166,7 @@ def smooth_quant_entry(
     calibration_data_reader: data_reader.CalibrationDataReader,
     model_output: Union[pathlib.Path, str] = None,
     *args,
-    **kwargs
+    **kwargs,
 ) -> Union[pathlib.Path, str, onnx.ModelProto]:
     """Apply smooth quant."""
     assert calibration_data_reader is not None, "Please provide calibration_data_reader"
@@ -179,7 +179,7 @@ def smooth_quant_entry(
     smoother = core.Smoother(
         model,
         calibration_data_reader,
-        execution_provider=getattr(quant_config, "execution_provider", "CPUExecutionProvider")
+        execution_provider=getattr(quant_config, "execution_provider", "CPUExecutionProvider"),
     )
     smoothed_model = smoother.transform(**quant_config.to_dict())
     with tempfile.TemporaryDirectory(prefix="ort.quant.") as tmp_dir:
@@ -235,7 +235,7 @@ def dynamic_quantize_entry(
         model,
         config_mapping,
         op_types_to_quantize=quant_config.op_types_to_quantize,
-        )
+    )
     _quantizer.quantize_model()
     if model_output is not None:
         _quantizer.model.save(model_output)
diff --git a/onnx_neural_compressor/quantization/matmul_nbits_quantizer.py b/onnx_neural_compressor/quantization/matmul_nbits_quantizer.py
index c338454f1..b41c56270 100644
--- a/onnx_neural_compressor/quantization/matmul_nbits_quantizer.py
+++ b/onnx_neural_compressor/quantization/matmul_nbits_quantizer.py
@@ -14,10 +14,11 @@
 
 from typing import List, Union  # isort: skip
 
-import onnx
-import onnxruntime as ort
 import pathlib
 import tempfile
+
+import onnx
+import onnxruntime as ort
 from onnxruntime.quantization import matmul_4bits_quantizer
 
 from onnx_neural_compressor import config, data_reader, logger, onnx_model, utility
@@ -156,7 +157,10 @@ def int4_quant_algo(self):
         opt_tmp_file = tempfile.TemporaryDirectory()
 
         # do graph optimization if not layer_wise_quant
-        if not getattr(self.algo_config, "layer_wise_quant", False) and self.optimization_level != ort.GraphOptimizationLevel.ORT_DISABLE_ALL:
+        if (
+            not getattr(self.algo_config, "layer_wise_quant", False)
+            and self.optimization_level != ort.GraphOptimizationLevel.ORT_DISABLE_ALL
+        ):
             if not isinstance(model, str):
                 onnx.save(model, pathlib.Path(opt_tmp_file.name).joinpath("tmp.onnx").as_posix())
                 model = pathlib.Path(opt_tmp_file.name).joinpath("tmp.onnx").as_posix()
@@ -179,4 +183,3 @@ def int4_quant_algo(self):
 
     def process(self):
         self.int4_quant_algo()
-
diff --git a/onnx_neural_compressor/quantization/quantize.py b/onnx_neural_compressor/quantization/quantize.py
index dab9c7b5d..c90e16d38 100644
--- a/onnx_neural_compressor/quantization/quantize.py
+++ b/onnx_neural_compressor/quantization/quantize.py
@@ -13,13 +13,13 @@
 # limitations under the License.
 
 import pathlib
+import tempfile
 from typing import Union
 
 import onnx
+import onnxruntime as ort
 from onnxruntime.quantization.quantize import QuantConfig
 
-import onnxruntime as ort
-import tempfile
 from onnx_neural_compressor import config
 from onnx_neural_compressor.quantization import algorithm_entry as algos
 
@@ -35,7 +35,7 @@ def quantize(
         if optimization_level != ort.GraphOptimizationLevel.ORT_DISABLE_ALL:
             sess_options = ort.SessionOptions()
             sess_options.graph_optimization_level = optimization_level
-            sess_options.optimized_model_filepath =  pathlib.Path(tmp_dir).joinpath("opt.onnx").as_posix()
+            sess_options.optimized_model_filepath = pathlib.Path(tmp_dir).joinpath("opt.onnx").as_posix()
             session = ort.InferenceSession(model_input, sess_options)
             del session
             model_input = sess_options.optimized_model_filepath
@@ -50,8 +50,8 @@ def quantize(
                     model_input, quant_config, quant_config.calibration_data_reader, model_output=model_output
                 )
         elif isinstance(quant_config, config.DynamicQuantConfig):
-            algos.dynamic_quantize_entry(
-                model_input, quant_config, model_output=model_output
-                )
+            algos.dynamic_quantize_entry(model_input, quant_config, model_output=model_output)
         else:
-            raise TypeError("Invalid quantization config type, it must be either StaticQuantConfig or DynamicQuantConfig.")
+            raise TypeError(
+                "Invalid quantization config type, it must be either StaticQuantConfig or DynamicQuantConfig."
+            )
diff --git a/onnx_neural_compressor/quantization/tuning.py b/onnx_neural_compressor/quantization/tuning.py
index b0b96ed59..a5caa4c35 100644
--- a/onnx_neural_compressor/quantization/tuning.py
+++ b/onnx_neural_compressor/quantization/tuning.py
@@ -21,11 +21,11 @@
 import uuid
 
 import onnx
-
-from onnx_neural_compressor import config, data_reader, logger, utility
 import onnxruntime as ort
 from onnx import external_data_helper
 
+from onnx_neural_compressor import config, data_reader, logger, utility
+
 from typing import Any, Callable, Dict, Generator, Iterator, List, Optional, Sized, Tuple, Union  # isort: skip
 
 
@@ -103,7 +103,9 @@ def _set_eval_fn_registry(self, user_eval_fns: List[Dict]) -> None:
             {
                 self.EVAL_FN: user_eval_fn_pair[self.EVAL_FN],
                 self.WEIGHT: user_eval_fn_pair.get(self.WEIGHT, 1.0),
-                self.FN_NAME: user_eval_fn_pair.get(self.FN_NAME, getattr(user_eval_fn_pair[self.EVAL_FN], "__name__", "custom_func")),
+                self.FN_NAME: user_eval_fn_pair.get(
+                    self.FN_NAME, getattr(user_eval_fn_pair[self.EVAL_FN], "__name__", "custom_func")
+                ),
             }
             for user_eval_fn_pair in user_eval_fns
         ]
@@ -252,6 +254,7 @@ def __iter__(self) -> Generator[config.BaseConfig, Any, None]:
             self.verify_config_list.append(new_config)
             yield new_config
 
+
 class TuningConfig:
     """Config for auto tuning pipeline.
 
@@ -368,6 +371,7 @@ def print_config_diff(self, config):
         else:
             logger.info("quant config difference: {}".format(config.get_diff_dict(self.tuning_history[0].quant_config)))
 
+
 class TuningLogger:
     """A unified logger for the tuning/quantization process.
 
@@ -540,9 +544,7 @@ def autotune(
                 pathlib.Path(model_input).parent.joinpath("config.json").as_posix(),
                 pathlib.Path(tmp_folder.name).joinpath("config.json").as_posix(),
             )
-        eval_result: float = eval_func_wrapper.evaluate(
-            pathlib.Path(tmp_folder.name).joinpath("eval.onnx").as_posix()
-        )
+        eval_result: float = eval_func_wrapper.evaluate(pathlib.Path(tmp_folder.name).joinpath("eval.onnx").as_posix())
         tuning_logger.evaluation_end()
         logger.info("Evaluation result: %.4f", eval_result)
         tuning_monitor.add_trial_result(trial_index, eval_result, quant_config)
@@ -554,8 +556,10 @@ def autotune(
 
     tuning_logger.tuning_end()
     if best_quant_model is None:
-        logger.info("Don't find the quantized model which meets accuracy requirement. "
-            "Please try other configs or adjust tolerable_loss.")
+        logger.info(
+            "Don't find the quantized model which meets accuracy requirement. "
+            "Please try other configs or adjust tolerable_loss."
+        )
         exit(0)
 
     tmp_folder.cleanup()
diff --git a/onnx_neural_compressor/utility.py b/onnx_neural_compressor/utility.py
index f0e4ac093..f1cf126d2 100644
--- a/onnx_neural_compressor/utility.py
+++ b/onnx_neural_compressor/utility.py
@@ -103,8 +103,10 @@ def random_seed(self, random_seed):
         if check_value("random_seed", random_seed, int):
             self._random_seed = random_seed
 
+
 options = Options()
 
+
 def singleton(cls):
     """Singleton decorator."""
 
@@ -311,49 +313,122 @@ def auto_detect_ep():
     else:
         return "CPUExecutionProvider"
 
+
 def static_basic_check(config, optype, execution_provider, quant_format):
     if quant_format == quantization.QuantFormat.QOperator:
         if execution_provider not in constants.STATIC_QOPERATOR_OP_LIST_MAP:
-            raise ValueError("Unsupported execution_provider {}, only support {}.".format(execution_provider, list(constants.STATIC_QOPERATOR_OP_LIST_MAP.keys())))
+            raise ValueError(
+                "Unsupported execution_provider {}, only support {}.".format(
+                    execution_provider, list(constants.STATIC_QOPERATOR_OP_LIST_MAP.keys())
+                )
+            )
         supported_optype = constants.STATIC_QOPERATOR_OP_LIST_MAP[execution_provider]
         if optype not in supported_optype:
-            raise ValueError("Unsupported optype {} for {}, only support {}.".format(optype, execution_provider, supported_optype))
+            raise ValueError(
+                "Unsupported optype {} for {}, only support {}.".format(optype, execution_provider, supported_optype)
+            )
     elif quant_format == quantization.QuantFormat.QDQ:
         if execution_provider not in constants.STATIC_QDQ_OP_LIST_MAP:
-            raise ValueError("Unsupported execution_provider {}, only support {}.".format(execution_provider, list(constants.STATIC_QDQ_OP_LIST_MAP.keys())))
+            raise ValueError(
+                "Unsupported execution_provider {}, only support {}.".format(
+                    execution_provider, list(constants.STATIC_QDQ_OP_LIST_MAP.keys())
+                )
+            )
         supported_optype = constants.STATIC_QDQ_OP_LIST_MAP[execution_provider]
         if optype not in supported_optype:
-            raise ValueError("Unsupported optype {} for {}, only support {}.".format(optype, execution_provider, supported_optype))
+            raise ValueError(
+                "Unsupported optype {} for {}, only support {}.".format(optype, execution_provider, supported_optype)
+            )
     else:
-        raise ValueError("Unsupported quant_format {}, only support QuantFormat.QOperator and QuantFormat.QDQ.".format(quant_format))
+        raise ValueError(
+            "Unsupported quant_format {}, only support QuantFormat.QOperator and QuantFormat.QDQ.".format(quant_format)
+        )
     return config
 
+
 def static_cpu_check(config, optype, execution_provider, quant_format):
     if execution_provider != "CPUExecutionProvider":
         return config
 
     # only support per-tensor
-    if optype in ["EmbedLayerNormalization", "Relu", "Clip", "LeakyRelu", "Sigmoid", "MaxPool", "GlobalAveragePool",
-                    "Pad", "Split", "Squeeze", "Reshape", "Concat", "AveragePool", "Tile",
-                    "Unsqueeze", "Transpose", "Resize", "Abs", "Shrink", "Sign", "Attention",
-                    "Flatten", "Expand", "Slice", "Mod", "ReduceMax", "ReduceMin",
-                    "CenterCropPad", "Add", "Mul", "ArgMax"]:
+    if optype in [
+        "EmbedLayerNormalization",
+        "Relu",
+        "Clip",
+        "LeakyRelu",
+        "Sigmoid",
+        "MaxPool",
+        "GlobalAveragePool",
+        "Pad",
+        "Split",
+        "Squeeze",
+        "Reshape",
+        "Concat",
+        "AveragePool",
+        "Tile",
+        "Unsqueeze",
+        "Transpose",
+        "Resize",
+        "Abs",
+        "Shrink",
+        "Sign",
+        "Attention",
+        "Flatten",
+        "Expand",
+        "Slice",
+        "Mod",
+        "ReduceMax",
+        "ReduceMin",
+        "CenterCropPad",
+        "Add",
+        "Mul",
+        "ArgMax",
+    ]:
         setattr(config, "per_channel", False)
 
     if optype in ["Attention"]:
         setattr(config, "activation_type", onnx.TensorProto.UINT8)
     return config
 
+
 def static_cuda_check(config, optype, execution_provider, quant_format):
     if execution_provider != "CUDAExecutionProvider":
         return config
 
     # only support per-tensor
-    if optype in ["EmbedLayerNormalization", "Relu", "Clip", "LeakyRelu", "Sigmoid", "MaxPool", "GlobalAveragePool",
-                    "Pad", "Split", "Squeeze", "Reshape", "Concat", "AveragePool", "Tile",
-                    "Unsqueeze", "Transpose", "Resize", "Abs", "Shrink", "Sign", "Attention",
-                    "Flatten", "Expand", "Slice", "Mod", "ReduceMax", "ReduceMin",
-                    "CenterCropPad", "Add", "Mul", "ArgMax"]:
+    if optype in [
+        "EmbedLayerNormalization",
+        "Relu",
+        "Clip",
+        "LeakyRelu",
+        "Sigmoid",
+        "MaxPool",
+        "GlobalAveragePool",
+        "Pad",
+        "Split",
+        "Squeeze",
+        "Reshape",
+        "Concat",
+        "AveragePool",
+        "Tile",
+        "Unsqueeze",
+        "Transpose",
+        "Resize",
+        "Abs",
+        "Shrink",
+        "Sign",
+        "Attention",
+        "Flatten",
+        "Expand",
+        "Slice",
+        "Mod",
+        "ReduceMax",
+        "ReduceMin",
+        "CenterCropPad",
+        "Add",
+        "Mul",
+        "ArgMax",
+    ]:
         setattr(config, "per_channel", False)
 
     if optype in ["Attention"]:
@@ -361,6 +436,7 @@ def static_cuda_check(config, optype, execution_provider, quant_format):
         setattr(config, "weight_type", onnx.TensorProto.INT8)
     return config
 
+
 def static_dml_check(config, optype, execution_provider, quant_format):
     if execution_provider != "DmlExecutionProvider":
         return config
@@ -370,6 +446,7 @@ def static_dml_check(config, optype, execution_provider, quant_format):
         setattr(config, "per_channel", False)
     return config
 
+
 def static_dnnl_check(config, optype, execution_provider, quant_format):
     if execution_provider != "DnnlExecutionProvider":
         return config
@@ -377,6 +454,7 @@ def static_dnnl_check(config, optype, execution_provider, quant_format):
     # current configurations are same as CPU EP
     return static_cpu_check(config, optype, execution_provider, quant_format)
 
+
 def static_trt_check(config, optype, execution_provider, quant_format):
     if execution_provider != "TensorrtExecutionProvider":
         return config
@@ -395,6 +473,7 @@ def static_trt_check(config, optype, execution_provider, quant_format):
         setattr(config, "activation_sym", True)
     return config
 
+
 STATIC_CHECK_FUNC_LIST = [
     static_basic_check,
     static_cpu_check,
@@ -407,13 +486,20 @@ def static_trt_check(config, optype, execution_provider, quant_format):
 
 def dynamic_basic_check(config, optype, execution_provider, quant_format=None):
     if execution_provider not in constants.DYNAMIC_OP_LIST_MAP:
-        raise ValueError("Unsupported execution_provider {}, only support {}.".format(execution_provider, list(constants.DYNAMIC_OP_LIST_MAP.keys())))
+        raise ValueError(
+            "Unsupported execution_provider {}, only support {}.".format(
+                execution_provider, list(constants.DYNAMIC_OP_LIST_MAP.keys())
+            )
+        )
 
     supported_optype = constants.DYNAMIC_OP_LIST_MAP[execution_provider]
     if optype not in supported_optype:
-        raise ValueError("Unsupported optype {} for {}, only support {}.".format(optype, execution_provider, supported_optype))
+        raise ValueError(
+            "Unsupported optype {} for {}, only support {}.".format(optype, execution_provider, supported_optype)
+        )
     return config
 
+
 def dynamic_cpu_check(config, optype, execution_provider, quant_format=None):
     if execution_provider != "CPUExecutionProvider":
         return config
@@ -422,12 +508,14 @@ def dynamic_cpu_check(config, optype, execution_provider, quant_format=None):
         setattr(config, "per_channel", False)
     return config
 
+
 def dynamic_cuda_check(config, optype, execution_provider, quant_format=None):
     if execution_provider != "CUDAExecutionProvider":
         return config
     # current configurations are same as CPU EP
     return dynamic_cpu_check(config, optype, execution_provider, quant_format)
 
+
 def dynamic_dml_check(config, optype, execution_provider, quant_format=None):
     if execution_provider != "DmlExecutionProvider":
         return config
@@ -435,12 +523,14 @@ def dynamic_dml_check(config, optype, execution_provider, quant_format=None):
     # don't support dynamic quantization
     return None
 
+
 def dynamic_dnnl_check(config, optype, execution_provider, quant_format=None):
     if execution_provider != "DnnlExecutionProvider":
         return config
     # current configurations are same as CPU EP
     return dynamic_cpu_check(config, optype, execution_provider, quant_format)
 
+
 def dynamic_trt_check(config, optype, execution_provider, quant_format=None):
     if execution_provider != "TensorrtExecutionProvider":
         return config
@@ -448,6 +538,7 @@ def dynamic_trt_check(config, optype, execution_provider, quant_format=None):
     # don't support dynamic quantization
     return None
 
+
 DYNAMIC_CHECK_FUNC_LIST = [
     dynamic_basic_check,
     dynamic_cpu_check,
diff --git a/test/quantization/layer_wise/test_layer_wise.py b/test/quantization/layer_wise/test_layer_wise.py
index 20fe6c547..af0bca3e4 100644
--- a/test/quantization/layer_wise/test_layer_wise.py
+++ b/test/quantization/layer_wise/test_layer_wise.py
@@ -122,9 +122,9 @@ def test_rtn_layer_wise(self):
         qmodel = self._apply_quantize(rtn_config, algos.rtn_quantize_entry)
         self.assertTrue(self._check_model_is_quantized(qmodel))
 
-        lwq_quantized_weight = self._get_quantized_matmul_weight(qmodel_lwq, "/lm_head/MatMul_Q4G32")
+        lwq_quantized_weight = self._get_quantized_matmul_weight(qmodel_lwq, "/lm_head/MatMul_Q4")
         self.assertIsNotNone(lwq_quantized_weight)
-        quantized_weight = self._get_quantized_matmul_weight(qmodel, "/lm_head/MatMul_Q4G32")
+        quantized_weight = self._get_quantized_matmul_weight(qmodel, "/lm_head/MatMul_Q4")
         self.assertIsNotNone(quantized_weight)
         self.assertTrue((lwq_quantized_weight == quantized_weight).all())
 
@@ -152,9 +152,9 @@ def test_rtn_layer_wise_with_ort_like_api(self):
         self.assertTrue(self._check_model_is_quantized(qmodel_lwq))
 
         # compare qmodel
-        lwq_quantized_weight = self._get_quantized_matmul_weight(qmodel_lwq, "/lm_head/MatMul_Q4G128")
+        lwq_quantized_weight = self._get_quantized_matmul_weight(qmodel_lwq, "/lm_head/MatMul_Q4")
         self.assertIsNotNone(lwq_quantized_weight)
-        quantized_weight = self._get_quantized_matmul_weight(qmodel, "/lm_head/MatMul_Q4G128")
+        quantized_weight = self._get_quantized_matmul_weight(qmodel, "/lm_head/MatMul_Q4")
         self.assertIsNotNone(quantized_weight)
         self.assertTrue((lwq_quantized_weight == quantized_weight).all())
 
@@ -169,9 +169,9 @@ def test_gptq_layer_wise(self):
         qmodel = self._apply_quantize(gptq_config, algos.gptq_quantize_entry, self.calibration_data_reader)
         self.assertTrue(self._check_model_is_quantized(qmodel))
 
-        lwq_quantized_weight = self._get_quantized_matmul_weight(qmodel_lwq, "/lm_head/MatMul_Q4G32")
+        lwq_quantized_weight = self._get_quantized_matmul_weight(qmodel_lwq, "/lm_head/MatMul_Q4")
         self.assertIsNotNone(lwq_quantized_weight)
-        quantized_weight = self._get_quantized_matmul_weight(qmodel, "/lm_head/MatMul_Q4G32")
+        quantized_weight = self._get_quantized_matmul_weight(qmodel, "/lm_head/MatMul_Q4")
         self.assertIsNotNone(quantized_weight)
         self.assertTrue((lwq_quantized_weight == quantized_weight).all())
 
@@ -203,9 +203,9 @@ def test_gptq_layer_wise_with_ort_like_api(self):
         self.assertTrue(self._check_model_is_quantized(qmodel_lwq))
 
         # compare qmodel
-        lwq_quantized_weight = self._get_quantized_matmul_weight(qmodel_lwq, "/lm_head/MatMul_Q4G128")
+        lwq_quantized_weight = self._get_quantized_matmul_weight(qmodel_lwq, "/lm_head/MatMul_Q4")
         self.assertIsNotNone(lwq_quantized_weight)
-        quantized_weight = self._get_quantized_matmul_weight(qmodel, "/lm_head/MatMul_Q4G128")
+        quantized_weight = self._get_quantized_matmul_weight(qmodel, "/lm_head/MatMul_Q4")
         self.assertIsNotNone(quantized_weight)
         self.assertTrue((lwq_quantized_weight == quantized_weight).all())
 
diff --git a/test/quantization/post_training_quant/test_calibrate.py b/test/quantization/post_training_quant/test_calibrate.py
index 7f176b9f4..a02880d4a 100644
--- a/test/quantization/post_training_quant/test_calibrate.py
+++ b/test/quantization/post_training_quant/test_calibrate.py
@@ -5,9 +5,9 @@
 
 import numpy as np
 import onnx
+
 from onnx_neural_compressor import data_reader
-from onnx_neural_compressor.algorithms.post_training_quant import calibrate
-from onnx_neural_compressor.algorithms.post_training_quant import calibrator
+from onnx_neural_compressor.algorithms.post_training_quant import calibrate, calibrator
 
 
 def generate_input_initializer(tensor_shape, tensor_dtype, input_name):
@@ -16,18 +16,31 @@ def generate_input_initializer(tensor_shape, tensor_dtype, input_name):
     init = onnx.numpy_helper.from_array(tensor, input_name)
     return init
 
+
 class DataReader(data_reader.CalibrationDataReader):
 
     def __init__(self):
         self.data_list = []
         self.data_list.append(
-            {"input0": np.array([[[[0.45, 0.60, 0.75]], [[0.25, 0.50, 0.75]], [[0.90, 0.70, 0.50]]]]).astype(np.float32)}
+            {
+                "input0": np.array([[[[0.45, 0.60, 0.75]], [[0.25, 0.50, 0.75]], [[0.90, 0.70, 0.50]]]]).astype(
+                    np.float32
+                )
+            }
         )
         self.data_list.append(
-            {"input0": np.array([[[[0.62, 0.94, 0.38]], [[0.70, 0.13, 0.07]], [[0.89, 0.75, 0.84]]]]).astype(np.float32)}
+            {
+                "input0": np.array([[[[0.62, 0.94, 0.38]], [[0.70, 0.13, 0.07]], [[0.89, 0.75, 0.84]]]]).astype(
+                    np.float32
+                )
+            }
         )
         self.data_list.append(
-            {"input0": np.array([[[[0.64, 0.24, 0.97]], [[0.82, 0.58, 0.27]], [[0.019, 0.34, 0.02]]]]).astype(np.float32)}
+            {
+                "input0": np.array([[[[0.64, 0.24, 0.97]], [[0.82, 0.58, 0.27]], [[0.019, 0.34, 0.02]]]]).astype(
+                    np.float32
+                )
+            }
         )
         self.enum_data = None
 
diff --git a/test/quantization/post_training_quant/test_operators.py b/test/quantization/post_training_quant/test_operators.py
index 910ed6060..9345305e8 100644
--- a/test/quantization/post_training_quant/test_operators.py
+++ b/test/quantization/post_training_quant/test_operators.py
@@ -1,8 +1,8 @@
+import collections
 import copy
 import os
 import shutil
 import unittest
-import collections
 
 import numpy as np
 import onnx
@@ -30,10 +30,14 @@ def build_model():
     conv2_node = onnx.helper.make_node("Conv", ["add_out", "conv2_weight"], ["conv2_output"], name="conv2")
 
     # 1, 8, 13, 13
-    concat_node = onnx.helper.make_node("Concat", ["conv1_output", "conv2_output"], ["concat_output"], name="Concat", axis=1)
+    concat_node = onnx.helper.make_node(
+        "Concat", ["conv1_output", "conv2_output"], ["concat_output"], name="Concat", axis=1
+    )
     # 1, 8, 11, 11
     avg_args = {"kernel_shape": [3, 3]}
-    avgpool_node = onnx.helper.make_node("AveragePool", ["concat_output"], ["avg_output"], name="AveragePool", **avg_args)
+    avgpool_node = onnx.helper.make_node(
+        "AveragePool", ["concat_output"], ["avg_output"], name="AveragePool", **avg_args
+    )
     reshape_node = onnx.helper.make_node("Reshape", ["avg_output", "shape"], ["reshape_output"], name="Reshape")
 
     add_node_2 = onnx.helper.make_node("Add", ["reshape_output", "add_init_2"], ["add_out_2"], name="add_2")
@@ -128,7 +132,9 @@ def test_resize(self):
         resize_node = onnx.helper.make_node("Resize", resize_inputs, ["output"], name="resize_node", **resize_attrs)
         resize_roi = [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]
         resize_roi_name = "resize_roi"
-        resize_roi_initializer = onnx.helper.make_tensor(resize_roi_name, onnx.TensorProto.FLOAT, [len(resize_roi)], resize_roi)
+        resize_roi_initializer = onnx.helper.make_tensor(
+            resize_roi_name, onnx.TensorProto.FLOAT, [len(resize_roi)], resize_roi
+        )
         initializers.extend([resize_roi_initializer])
         resize_node.input.extend([resize_roi_name])
 
@@ -159,11 +165,15 @@ def test_resize(self):
         }
 
         q_model = self.qlinear_test(model, q_config, quantize_params, ["Resize", "Conv"])
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1)
 
         q_model = self.qdq_test(model, q_config, quantize_params, ["Resize", "Conv"])
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 4)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 4
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 3)
 
         # test opset version 10
@@ -171,11 +181,15 @@ def test_resize(self):
         model.ir_version = 7  # use stable onnx ir version
 
         q_model = self.qlinear_test(model, q_config, quantize_params, ["Resize", "Conv"])
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1)
 
         q_model = self.qdq_test(model, q_config, quantize_params, ["Resize", "Conv"])
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 3)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 3
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 2)
 
     def test_argmax(self):
@@ -240,7 +254,9 @@ def test_argmax(self):
             "output": [np.uint8(0), np.float32(10.0)],
         }
         q_model = self.qlinear_test(model, q_config, quantize_params, ["Conv", "ArgMax"])
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1)
 
     def test_gemm(self):
@@ -284,10 +300,14 @@ def test_gemm(self):
             "output": [np.uint8(0), np.float32(10.0)],
         }
         q_model = self.qlinear_test(model, q_config, quantize_params, ["Gemm"])
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1)
         q_model = self.qdq_test(model, q_config, quantize_params, ["Gemm"])
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 4)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 4
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 2)
 
         # test gemm with non-constant bias
@@ -308,10 +328,14 @@ def test_gemm(self):
         model = onnx.helper.make_model(graph, opset_imports=[onnx.helper.make_opsetid("", 13)])
         model.ir_version = 7
         q_model = self.qlinear_test(model, q_config, quantize_params, ["Gemm"])
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 0)
         q_model = self.qdq_test(model, q_config, quantize_params, ["Gemm"])
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 3)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 3
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 2)
 
     def test_embed(self):
@@ -319,7 +343,9 @@ def test_embed(self):
         input_ids_tensor = onnx.helper.make_tensor_value_info("input_ids", onnx.TensorProto.INT32, input_ids_shape)
 
         segment_ids_shape = [1, 4]
-        segment_ids_tensor = onnx.helper.make_tensor_value_info("segment_ids", onnx.TensorProto.INT32, segment_ids_shape)
+        segment_ids_tensor = onnx.helper.make_tensor_value_info(
+            "segment_ids", onnx.TensorProto.INT32, segment_ids_shape
+        )
 
         # EmbedLayerNormalization Node Constants and Weights:
         word_embed_shape = [32, 4]
@@ -344,10 +370,14 @@ def test_embed(self):
 
         # EmbedLayerNormalization Outputs:
         layernorm_out_shape = [1, 4, 4]
-        layernorm_out_tensor = onnx.helper.make_tensor_value_info("layernorm_out", onnx.TensorProto.FLOAT, layernorm_out_shape)
+        layernorm_out_tensor = onnx.helper.make_tensor_value_info(
+            "layernorm_out", onnx.TensorProto.FLOAT, layernorm_out_shape
+        )
 
         mask_index_out_shape = [1]
-        mask_index_out_tensor = onnx.helper.make_tensor_value_info("mask_index_out", onnx.TensorProto.INT32, mask_index_out_shape)
+        mask_index_out_tensor = onnx.helper.make_tensor_value_info(
+            "mask_index_out", onnx.TensorProto.INT32, mask_index_out_shape
+        )
 
         # EmbedLayerNormalization Node:
         embed_layer_norm_inputs = ["input_ids", "segment_ids", "word_embed", "pos_embed", "seg_embed", "gamma", "beta"]
@@ -375,7 +405,8 @@ def test_embed(self):
 
         graph = onnx.helper.make_graph(nodes, graph_name, inputs, outputs, initializer=initializers)
         model = onnx.helper.make_model(
-            graph, opset_imports=[onnx.helper.make_opsetid("com.microsoft", 14), onnx.helper.make_opsetid("ai.onnx", 14)]
+            graph,
+            opset_imports=[onnx.helper.make_opsetid("com.microsoft", 14), onnx.helper.make_opsetid("ai.onnx", 14)],
         )
         model.ir_version = 7  # use stable onnx ir version
 
@@ -391,11 +422,17 @@ def test_embed(self):
             "input_ids": [np.uint8(10.0), np.float32(0)],
         }
         q_model = self.qlinear_test(model, q_config, quantize_params, ["EmbedLayerNormalization"])
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QEmbedLayerNormalization"], 1)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["QEmbedLayerNormalization"], 1
+        )
 
         q_model = self.qdq_test(model, q_config, quantize_params, ["EmbedLayerNormalization"])
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 5)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["EmbedLayerNormalization"], 1)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 5
+        )
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["EmbedLayerNormalization"], 1
+        )
 
     def test_LSTM(self):
         input_shape = [1, 1, 200]
@@ -431,7 +468,9 @@ def test_LSTM(self):
 
         q_config = {"lstm": self.q_config}
         q_model = self.dynamic_test(model, q_config, None, ["LSTM"])
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DynamicQuantizeLSTM"], 1)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DynamicQuantizeLSTM"], 1
+        )
 
     def test_concat_reshape_pooling(self):
         model = build_model()
@@ -464,12 +503,16 @@ def test_concat_reshape_pooling(self):
             model, q_config, quantize_params, quantizable_op_types, **{"dedicated_qdq_pair": True}
         )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+        )
 
         q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types, **{"dedicated_qdq_pair": True})
-        q_model.save('test.onnx')
+        q_model.save("test.onnx")
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 7)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 9)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 9
+        )
 
         q_config = {
             "Reshape": self.q_config,
@@ -480,11 +523,15 @@ def test_concat_reshape_pooling(self):
         }
         q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+        )
 
         q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 2)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 3)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 3
+        )
 
         q_config = {
             "Reshape": self.q_config,
@@ -495,11 +542,15 @@ def test_concat_reshape_pooling(self):
         }
         q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 0)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0
+        )
 
         q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 0)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0
+        )
 
         q_config = {
             "Reshape": self.q_config,
@@ -510,12 +561,16 @@ def test_concat_reshape_pooling(self):
         }
         q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["AveragePool"], 1)
 
         q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 4)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 6)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 6
+        )
 
         quantize_params = {
             "input": [np.uint8(10.0), np.float32(0)],
@@ -542,7 +597,9 @@ def test_concat_reshape_pooling(self):
 
         q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 6)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 8)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 8
+        )
 
     def test_conv(self):
         for op in ["Conv", "FusedConv"]:
@@ -567,11 +624,19 @@ def test_conv(self):
             }
             quantizable_op_types = [op]
             q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
-            self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
-            self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 2)
+            self.assertEqual(
+                collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+            )
+            self.assertEqual(
+                collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 2
+            )
             q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
-            self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 4)
-            self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 3)
+            self.assertEqual(
+                collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 4
+            )
+            self.assertEqual(
+                collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 3
+            )
 
     def test_matmul(self):
         A = onnx.helper.make_tensor_value_info("A", onnx.TensorProto.FLOAT, [1, 1, 5, 5])
@@ -590,15 +655,21 @@ def test_matmul(self):
         }
         quantizable_op_types = ["Matmul"]
         q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1)
         q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 3)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 3
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 2)
 
         q_config = {"Matmul": self.q_config}
         q_model = self.dynamic_test(model, q_config, None, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DynamicQuantizeLinear"], 1)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DynamicQuantizeLinear"], 1
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["MatMulInteger"], 1)
 
         quantize_params = {"A": [np.float32(10.0)], "B": [np.float32(10.0)], "C": [np.float32(10.0)]}
@@ -609,7 +680,9 @@ def test_matmul(self):
 
         quantize_params = {}
         q_model = self.dynamic_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DynamicQuantizeLinear"], 1)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DynamicQuantizeLinear"], 1
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["MatMulInteger"], 1)
 
     def test_attention(self):
@@ -632,12 +705,16 @@ def test_attention(self):
         q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QAttention"], 1)
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 2)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0
+        )
 
         self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
         q_config = {"Attention": self.q_config}
         q_model = self.dynamic_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DynamicQuantizeLinear"], 2)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DynamicQuantizeLinear"], 2
+        )
 
         E = onnx.helper.make_tensor_value_info("E", onnx.TensorProto.INT32, [1, 1, 5, 5])
         F = onnx.helper.make_tensor_value_info("F", onnx.TensorProto.FLOAT, [1, 1, 5, 5])
@@ -655,15 +732,21 @@ def test_attention(self):
 
         q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 2)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0
+        )
 
         q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 2)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 2)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 2
+        )
 
         q_config = {"Attention": self.q_config}
         q_model = self.dynamic_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DynamicQuantizeLinear"], 2)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DynamicQuantizeLinear"], 2
+        )
 
     def test_gather(self):
         input_tensor = onnx.helper.make_tensor_value_info("input", onnx.TensorProto.FLOAT, [3, 2])
@@ -701,11 +784,15 @@ def test_gather(self):
         quantizable_op_types = ["Gather", "MatMul"]
         q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+        )
 
         q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 3)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 4)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 4
+        )
 
         q_config = {"Gather": self.q_config, "MatMul": self.q_config}
         q_model = self.dynamic_test(model, q_config, quantize_params, quantizable_op_types)
@@ -743,11 +830,15 @@ def test_split(self):
         }
         quantizable_op_types = ["Split", "MatMul"]
         q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 2)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 2
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1)
 
         q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 5)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 5
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 4)
 
     def test_pad(self):
@@ -777,7 +868,9 @@ def test_pad(self):
                 )
             else:
                 node = onnx.helper.make_node("Pad", ["A", "B"], ["C"], name="Pad", mode=mode)
-                graph = onnx.helper.make_graph([conv_node, node], "test_graph_1", [E, F, B], [C], [E_init, F_init, B_init])
+                graph = onnx.helper.make_graph(
+                    [conv_node, node], "test_graph_1", [E, F, B], [C], [E_init, F_init, B_init]
+                )
             model = onnx.helper.make_model(graph)
             conv_config = {
                 "weight_type": 3,
@@ -797,11 +890,15 @@ def test_pad(self):
             }
             quantizable_op_types = ["Conv", "Pad"]
             q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
-            self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
+            self.assertEqual(
+                collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+            )
             q_model = self.qdq_test(
                 model, q_config, quantize_params, quantizable_op_types, **{"dedicated_qdq_pair": True}
             )
-            self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 4)
+            self.assertEqual(
+                collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 4
+            )
 
         node = onnx.helper.make_node("Pad", ["E", "B", "D"], ["C"], name="Pad", mode="constant")
         graph = onnx.helper.make_graph([node], "test_graph_1", [E, B, D], [C], [E_init, B_init, D_init])
@@ -810,10 +907,14 @@ def test_pad(self):
         quantizable_op_types = ["Pad"]
         q_config = {"Pad": self.q_config}
         q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+        )
 
         q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 2)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 2
+        )
 
     def test_binary(self):
         for op in ["Mul", "Add"]:
@@ -831,16 +932,24 @@ def test_binary(self):
             }
             quantizable_op_types = [op]
             q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
-            self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0)
+            self.assertEqual(
+                collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0
+            )
 
             q_model = self.qlinear_test(model, q_config, {}, quantizable_op_types)
-            self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0)
+            self.assertEqual(
+                collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0
+            )
 
             q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
-            self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0)
+            self.assertEqual(
+                collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0
+            )
 
             q_model = self.qdq_test(model, q_config, {}, quantizable_op_types)
-            self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0)
+            self.assertEqual(
+                collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0
+            )
 
     def test_relu(self):
         A = onnx.helper.make_tensor_value_info("A", onnx.TensorProto.FLOAT, [1, 1, 5, 5])
@@ -879,7 +988,7 @@ def test_relu(self):
         session = ort.InferenceSession(model.SerializeToString(), sess_options, providers=ort.get_available_providers())
         tmp_model = onnx.load(sess_options.optimized_model_filepath)
         q_model = self.qlinear_test(tmp_model, q_config, quantize_params, quantizable_op_types)
-        q_model.save('test.onnx')
+        q_model.save("test.onnx")
         self.assertEqual(len(q_model.model.graph.node), 5)
         q_model = self.qdq_test(tmp_model, q_config, quantize_params, quantizable_op_types)
         self.assertEqual(len(q_model.model.graph.node), 8)
@@ -920,10 +1029,14 @@ def test_clip(self):
         }
         quantizable_op_types = ["Conv", "Clip"]
         q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 2)
         q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 3)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 3
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 3)
 
     def test_activation(self):
@@ -937,26 +1050,38 @@ def test_activation(self):
             quantize_params = {"A": [np.uint8(10.0), np.float32(0)], "B": [np.uint8(10.0), np.float32(0)]}
             quantizable_op_types = [op]
             q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
-            self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
+            self.assertEqual(
+                collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+            )
 
             q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
-            self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 2)
+            self.assertEqual(
+                collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 2
+            )
 
             a_value = np.random.randn(1, 10).astype(np.float32)
             A_init = onnx.helper.make_tensor("A", onnx.TensorProto.FLOAT, [1, 10], a_value.reshape(10).tolist())
             graph = onnx.helper.make_graph([node], "test_graph_1", [A], [B], [A_init])
             model = onnx.helper.make_model(graph)
             q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
-            self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
+            self.assertEqual(
+                collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+            )
 
             q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
-            self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 2)
+            self.assertEqual(
+                collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 2
+            )
 
             q_model = self.qlinear_test(model, q_config, {}, quantizable_op_types)
-            self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0)
+            self.assertEqual(
+                collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0
+            )
 
             q_model = self.qdq_test(model, q_config, {}, quantizable_op_types)
-            self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0)
+            self.assertEqual(
+                collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0
+            )
 
         for op in ["Relu"]:
             B = onnx.helper.make_tensor_value_info("B", onnx.TensorProto.FLOAT, [1, 10])
@@ -968,26 +1093,38 @@ def test_activation(self):
             quantize_params = {"A": [np.uint8(10.0), np.float32(0)], "B": [np.uint8(10.0), np.float32(0)]}
             quantizable_op_types = [op]
             q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
-            self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0)
+            self.assertEqual(
+                collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0
+            )
 
             q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
-            self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0)
+            self.assertEqual(
+                collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0
+            )
 
             a_value = np.random.randn(1, 10).astype(np.float32)
             A_init = onnx.helper.make_tensor("A", onnx.TensorProto.FLOAT, [1, 10], a_value.reshape(10).tolist())
             graph = onnx.helper.make_graph([node], "test_graph_1", [A], [B], [A_init])
             model = onnx.helper.make_model(graph)
             q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
-            self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0)
+            self.assertEqual(
+                collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0
+            )
 
             q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
-            self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0)
+            self.assertEqual(
+                collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0
+            )
 
             q_model = self.qlinear_test(model, q_config, {}, quantizable_op_types)
-            self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0)
+            self.assertEqual(
+                collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0
+            )
 
             q_model = self.qdq_test(model, q_config, {}, quantizable_op_types)
-            self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0)
+            self.assertEqual(
+                collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0
+            )
 
     def test_pooling(self):
         op = "MaxPool"
@@ -1024,10 +1161,14 @@ def test_pooling(self):
         }
         quantizable_op_types = ["Conv", op]
         q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 2)
         q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 4)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 4
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 4)
 
         op = "GlobalAveragePool"
@@ -1043,11 +1184,19 @@ def test_pooling(self):
             opset.version = opset_version
             model = onnx.helper.make_model(graph, opset_imports=[opset])
             q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
-            self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
-            self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1)
+            self.assertEqual(
+                collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+            )
+            self.assertEqual(
+                collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1
+            )
             q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
-            self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 2)
-            self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 2)
+            self.assertEqual(
+                collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 2
+            )
+            self.assertEqual(
+                collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 2
+            )
 
         A = onnx.helper.make_tensor_value_info("A", onnx.TensorProto.FLOAT, [1, 1, 5, 5])
         B = onnx.helper.make_tensor_value_info("B", onnx.TensorProto.FLOAT, [1, 1, 3, 3])
@@ -1068,11 +1217,15 @@ def test_pooling(self):
         }
         quantizable_op_types = ["Conv", op]
         q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 2)
 
         q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 4)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 4
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 4)
 
     def test_exclude_node(self):
@@ -1167,14 +1320,18 @@ def test_more_direct8bit_nodes(self):
         }
         quantizable_op_types = ["MatMul", "Flatten", "Abs", "Sign", "Shrink"]
         q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1)
         session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"])
         self.assertIsNotNone(session)
 
         q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
         q_model.save("qdq.onnx")
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 9)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 9
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 7)
         session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"])
         self.assertIsNotNone(session)
@@ -1229,13 +1386,17 @@ def test_expand(self):
         }
         quantizable_op_types = ["MatMul", "Expand"]
         q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1)
         session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"])
         self.assertIsNotNone(session)
 
         q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 6)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 6
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 4)
         session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"])
         self.assertIsNotNone(session)
@@ -1292,13 +1453,17 @@ def test_slice(self):
         }
         quantizable_op_types = ["MatMul", "Slice"]
         q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1)
         session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"])
         self.assertIsNotNone(session)
 
         q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 6)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 6
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 4)
         session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"])
         self.assertIsNotNone(session)
@@ -1360,14 +1525,18 @@ def test_mod(self):
         }
         quantizable_op_types = ["MatMul", "Mod"]
         q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
-        q_model.save('test.onnx')
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
+        q_model.save("test.onnx")
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1)
         session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"])
         self.assertIsNotNone(session)
 
         q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 8)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 8
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 5)
         session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"])
         self.assertIsNotNone(session)
@@ -1426,13 +1595,17 @@ def test_reducemin_reducemax(self):
         }
         quantizable_op_types = ["MatMul", "ReduceMin"]
         q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1)
         session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"])
         self.assertIsNotNone(session)
 
         q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 6)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 6
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 4)
         session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"])
         self.assertIsNotNone(session)
@@ -1490,13 +1663,17 @@ def test_reducemin_reducemax(self):
         }
         quantizable_op_types = ["MatMul", "ReduceMax"]
         q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1)
         session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"])
         self.assertIsNotNone(session)
 
         q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 6)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 6
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 4)
         session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"])
         self.assertIsNotNone(session)
@@ -1550,13 +1727,17 @@ def test_tile(self):
         }
         quantizable_op_types = ["MatMul", "Tile"]
         q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1)
         session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"])
         self.assertIsNotNone(session)
 
         q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 6)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 6
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 4)
         session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"])
         self.assertIsNotNone(session)
@@ -1571,7 +1752,9 @@ def test_centercroppad(self):
         matmul1_output = onnx.helper.make_tensor_value_info("matmul1_output", onnx.TensorProto.FLOAT, [20, 10, 3])
         matmul1_node = onnx.helper.make_node("MatMul", ["input", "matmul1_weight"], ["matmul1_output"], name="Matmul_0")
 
-        centercroppad_output = onnx.helper.make_tensor_value_info("centercroppad_output", onnx.TensorProto.FLOAT, [10, 7, 3])
+        centercroppad_output = onnx.helper.make_tensor_value_info(
+            "centercroppad_output", onnx.TensorProto.FLOAT, [10, 7, 3]
+        )
         shape = onnx.helper.make_tensor("shape", onnx.TensorProto.INT64, [3], [10, 7, 3])
         centercroppad_node = onnx.helper.make_node(
             "CenterCropPad",
@@ -1614,13 +1797,17 @@ def test_centercroppad(self):
         }
         quantizable_op_types = ["MatMul", "CenterCropPad"]
         q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1)
         session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"])
         self.assertIsNotNone(session)
 
         q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 6)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 6
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 4)
         session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"])
         self.assertIsNotNone(session)
@@ -1679,13 +1866,17 @@ def test_gathernd(self):
         }
         quantizable_op_types = ["MatMul", "GatherND"]
         q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1)
         session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"])
         self.assertIsNotNone(session)
 
         q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 6)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 6
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 4)
         session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"])
         self.assertIsNotNone(session)
@@ -1700,7 +1891,9 @@ def test_gatherelements(self):
         matmul1_output = onnx.helper.make_tensor_value_info("matmul1_output", onnx.TensorProto.FLOAT, [3, 3])
         matmul1_node = onnx.helper.make_node("MatMul", ["input", "matmul1_weight"], ["matmul1_output"], name="Matmul_0")
 
-        gatherelements_output = onnx.helper.make_tensor_value_info("gatherelements_output", onnx.TensorProto.FLOAT, [2, 3])
+        gatherelements_output = onnx.helper.make_tensor_value_info(
+            "gatherelements_output", onnx.TensorProto.FLOAT, [2, 3]
+        )
         indices = onnx.helper.make_tensor("indices", onnx.TensorProto.INT64, [2, 3], [-1, -2, 0, -2, 0, 0])
         gathernd_node = onnx.helper.make_node(
             "GatherElements",
@@ -1744,13 +1937,17 @@ def test_gatherelements(self):
         }
         quantizable_op_types = ["MatMul", "GatherElements"]
         q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1)
         session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"])
         self.assertIsNotNone(session)
 
         q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types)
-        self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 6)
+        self.assertEqual(
+            collections.Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 6
+        )
         self.assertEqual(collections.Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 4)
         session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"])
         self.assertIsNotNone(session)
diff --git a/test/quantization/post_training_quant/test_quant_utils.py b/test/quantization/post_training_quant/test_quant_utils.py
index 19df570bc..6fce47d7c 100644
--- a/test/quantization/post_training_quant/test_quant_utils.py
+++ b/test/quantization/post_training_quant/test_quant_utils.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 import onnx
+
 from onnx_neural_compressor.algorithms import utility as quant_utils
 
 
@@ -18,7 +19,6 @@ def test_4bit_quant_tensor(self):
         data = np.random.random((100, 32))
         q_data, scale, zp = quant_utils.quant_tensor(data)
 
-
     def test_quant_dequant_data(self):
         data = np.random.random((100, 32))
         qrange = quant_utils.get_qmin_qmax_for_qType(
@@ -65,4 +65,4 @@ def test_quant_dequant_data(self):
 
 
 if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
+    unittest.main()
diff --git a/test/quantization/test_autotune.py b/test/quantization/test_autotune.py
index 72cb735eb..051b6cd73 100644
--- a/test/quantization/test_autotune.py
+++ b/test/quantization/test_autotune.py
@@ -22,10 +22,9 @@
 import numpy as np
 import onnx
 import onnxruntime as ort
-from onnx_neural_compressor import quantization
 from optimum.exporters.onnx import main_export
 
-from onnx_neural_compressor import config, data_reader
+from onnx_neural_compressor import config, data_reader, quantization
 from onnx_neural_compressor.quantization import tuning
 
 from typing import Callable, Dict, List, Optional, Union  # isort: skip
@@ -458,9 +457,8 @@ def test_skip_verified_config_mapping(self, mock_warning):
             )
         call_args_list = mock_warning.call_args_list
         # There may be multiple calls to warning, so we need to check all of them
-        self.assertIn(
-            "Skip the verified config mapping.", [info[0][0] for info in call_args_list]
-        )
+        self.assertIn("Skip the verified config mapping.", [info[0][0] for info in call_args_list])
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/quantization/test_config.py b/test/quantization/test_config.py
index b5d92258c..ec9411b45 100644
--- a/test/quantization/test_config.py
+++ b/test/quantization/test_config.py
@@ -5,12 +5,11 @@
 
 import numpy as np
 import onnx
-from onnx_neural_compressor import quantization
-from onnx_neural_compressor.quantization import tuning
 from optimum.exporters.onnx import main_export
 
-from onnx_neural_compressor import config, logger, utility
+from onnx_neural_compressor import config, logger, quantization, utility
 from onnx_neural_compressor.quantization import algorithm_entry as algos
+from onnx_neural_compressor.quantization import tuning
 
 
 def find_onnx_file(folder_path):
@@ -68,7 +67,7 @@ def setUp(self):
         # print the test name
         logger.info(f"Running TestQuantizationConfig test: {self.id()}")
 
-    def _check_node_is_quantized(self, model, node_name, bits):
+    def _check_node_is_quantized(self, model, node_name):
         for node in model.graph.node:
             if (node.name == node_name or node.name == node_name + "_Q4") and node.op_type in [
                 "MatMulNBits",
@@ -164,7 +163,6 @@ def test_dynamic_custom_quant_config(self):
 
             self.assertEqual(len(config_loader.config_set), 2)
 
-
     def test_static_quant_config(self):
         for execution_provider in ["CPUExecutionProvider", "CUDAExecutionProvider", "DnnlExecutionProvider"]:
             tuning_config = tuning.TuningConfig(
@@ -185,7 +183,9 @@ def test_static_quant_config(self):
                 else:
                     self.assertFalse("add" in configs_mapping)
                 if idx in [0, 1]:
-                    self.assertEqual(configs_mapping["Matmul"]["calibrate_method"], quantization.CalibrationMethod.MinMax)
+                    self.assertEqual(
+                        configs_mapping["Matmul"]["calibrate_method"], quantization.CalibrationMethod.MinMax
+                    )
                 self.assertLess(idx, 16)
 
         for execution_provider in ["TensorrtExecutionProvider"]:
@@ -217,7 +217,9 @@ def test_static_quant_config(self):
                 configs_mapping = quant_config.to_config_mapping(model_info=model_info)
                 if "Matmul" in configs_mapping:
                     self.assertFalse(configs_mapping["Matmul"]["per_channel"])
-                    self.assertEqual(configs_mapping["Matmul"]["calibrate_method"], quantization.CalibrationMethod.MinMax)
+                    self.assertEqual(
+                        configs_mapping["Matmul"]["calibrate_method"], quantization.CalibrationMethod.MinMax
+                    )
                 if "add" in configs_mapping:
                     self.assertEqual(configs_mapping["add"]["calibrate_method"], quantization.CalibrationMethod.MinMax)
                 self.assertLess(idx, 16)
diff --git a/test/quantization/test_smooth_quant.py b/test/quantization/test_smooth_quant.py
index 217013844..52f4bd8b3 100644
--- a/test/quantization/test_smooth_quant.py
+++ b/test/quantization/test_smooth_quant.py
@@ -19,10 +19,10 @@
 
 import numpy as np
 import onnx
+import onnxruntime as ort
 from optimum.exporters.onnx import main_export
 
 from onnx_neural_compressor import config, data_reader
-import onnxruntime as ort
 from onnx_neural_compressor.quantization import QuantType
 from onnx_neural_compressor.quantization import algorithm_entry as algos
 from onnx_neural_compressor.quantization import quantize
@@ -113,13 +113,17 @@ def test_sq_with_ort_like_api(self):
 
     def test_smooth_quant_args(self):
         self.data_reader.rewind()
-        sq_config = config.SmoothQuantConfig(weight_type=QuantType.QUInt8, activation_type=QuantType.QUInt8, alpha="auto")
+        sq_config = config.SmoothQuantConfig(
+            weight_type=QuantType.QUInt8, activation_type=QuantType.QUInt8, alpha="auto"
+        )
         model = algos.smooth_quant_entry(self.gptj, sq_config, self.data_reader)
         num_muls = len([i for i in model.graph.node if i.name.endswith("_smooth_mul") and i.op_type == "Mul"])
         self.assertEqual(num_muls, 30)
 
         self.data_reader.rewind()
-        sq_config = config.SmoothQuantConfig(weight_type=QuantType.QUInt8, activation_type=QuantType.QUInt8, scales_per_op=False)
+        sq_config = config.SmoothQuantConfig(
+            weight_type=QuantType.QUInt8, activation_type=QuantType.QUInt8, scales_per_op=False
+        )
         model = algos.smooth_quant_entry(self.gptj, sq_config, self.data_reader)
         num_muls = len([i for i in model.graph.node if i.name.endswith("_smooth_mul") and i.op_type == "Mul"])
         self.assertEqual(num_muls, 15)
@@ -127,20 +131,23 @@ def test_smooth_quant_args(self):
         sess_options = ort.SessionOptions()
         sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED
         sess_options.optimized_model_filepath = "Optimized_model.onnx"
-        sess = ort.InferenceSession(self.gptj,
-                                    sess_options,
-                                    providers=["CPUExecutionProvider"])
+        sess = ort.InferenceSession(self.gptj, sess_options, providers=["CPUExecutionProvider"])
         self.data_reader.rewind()
-        sq_config = config.SmoothQuantConfig(weight_type=QuantType.QUInt8, activation_type=QuantType.QUInt8, folding=True, scales_per_op=False)
+        sq_config = config.SmoothQuantConfig(
+            weight_type=QuantType.QUInt8, activation_type=QuantType.QUInt8, folding=True, scales_per_op=False
+        )
         model = algos.smooth_quant_entry("Optimized_model.onnx", sq_config, self.data_reader)
         num_muls = len([i for i in model.graph.node if i.name.endswith("_smooth_mul") and i.op_type == "Mul"])
         self.assertEqual(num_muls, 10)
 
         self.data_reader.rewind()
-        sq_config = config.SmoothQuantConfig(weight_type=QuantType.QUInt8, activation_type=QuantType.QUInt8, folding=False, scales_per_op=False)
+        sq_config = config.SmoothQuantConfig(
+            weight_type=QuantType.QUInt8, activation_type=QuantType.QUInt8, folding=False, scales_per_op=False
+        )
         model = algos.smooth_quant_entry("Optimized_model.onnx", sq_config, self.data_reader)
         num_muls = len([i for i in model.graph.node if i.name.endswith("_smooth_mul") and i.op_type == "Mul"])
         self.assertEqual(num_muls, 15)
 
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/utils/test_general.py b/test/utils/test_general.py
index b4140e7f2..32cb80087 100644
--- a/test/utils/test_general.py
+++ b/test/utils/test_general.py
@@ -192,7 +192,10 @@ def test_api(self):
         self.assertEqual(fake_default_config.weight_dtype, "int")
         config_set = get_all_config_set()
         self.assertEqual(len(config_set), len(config.config_registry.get_all_config_cls()))
-        self.assertEqual([i for i in config_set if getattr(i, "name", "None") == FAKE_CONFIG_NAME][0].weight_bits, DEFAULT_WEIGHT_BITS)
+        self.assertEqual(
+            [i for i in config_set if getattr(i, "name", "None") == FAKE_CONFIG_NAME][0].weight_bits,
+            DEFAULT_WEIGHT_BITS,
+        )
 
     def test_config_expand_complex_tunable_type(self):
         target_op_type_list_options = [["Conv", "Gemm"], ["Conv", "Matmul"]]
@@ -251,9 +254,10 @@ def test_config_loader_skip_verified_config(self) -> None:
         config_set = [FakeAlgoConfig(weight_bits=[4, 8]), FakeAlgoConfig(weight_bits=8)]
         config_loader = tuning.ConfigLoader(config_set)
         config_count = 0
-        for i, config in enumerate(config_loader):
+        for i, _ in enumerate(config_loader):
             config_count += 1
         self.assertEqual(config_count, 2)
 
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/utils/test_utility.py b/test/utils/test_utility.py
index fa7a4812f..50ce620b9 100644
--- a/test/utils/test_utility.py
+++ b/test/utils/test_utility.py
@@ -17,26 +17,6 @@ def test_set_random_seed(self):
         with self.assertRaises(AssertionError):
             utility.set_random_seed(seed)
 
-    def test_set_workspace(self):
-        workspace = "/path/to/workspace"
-        utility.set_workspace(workspace)
-        self.assertEqual(utility.options.workspace, workspace)
-
-        # non String type
-        workspace = 12345
-        with self.assertRaises(AssertionError):
-            utility.set_workspace(workspace)
-
-    def test_set_resume_from(self):
-        resume_from = "/path/to/resume"
-        utility.set_resume_from(resume_from)
-        self.assertEqual(utility.options.resume_from, resume_from)
-
-        # non String type
-        resume_from = 12345
-        with self.assertRaises(AssertionError):
-            utility.set_resume_from(resume_from)
-
 
 class TestCPUInfo(unittest.TestCase):