Skip to content

Commit

Permalink
example update for 3.x ipex sq (#1902)
Browse files Browse the repository at this point in the history
Signed-off-by: violetch24 <[email protected]>
  • Loading branch information
violetch24 authored Aug 2, 2024
1 parent 000946f commit b35ff8f
Show file tree
Hide file tree
Showing 7 changed files with 49 additions and 30 deletions.
6 changes: 3 additions & 3 deletions examples/.config/model_params_pytorch_3x.json
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@
"batch_size": 8
},
"gpt_j_ipex":{
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant",
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant/ipex",
"dataset_location": "",
"input_model": "",
"main_script": "run_clm_no_trainer.py",
Expand All @@ -99,7 +99,7 @@
"batch_size": 1
},
"llama2_7b_ipex":{
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant",
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant/ipex",
"dataset_location": "",
"input_model": "",
"main_script": "run_clm_no_trainer.py",
Expand All @@ -113,7 +113,7 @@
"batch_size": 1
},
"opt_125m_ipex":{
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant",
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant/ipex",
"dataset_location": "",
"input_model": "",
"main_script": "run_clm_no_trainer.py",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ neural-compressor
intel-extension-for-transformers
lm_eval==0.4.2
peft
optimum-intel
Original file line number Diff line number Diff line change
Expand Up @@ -162,15 +162,6 @@ def get_user_model():
collate_fn=calib_evaluator.collate_batch,
)

from neural_compressor.torch.quantization import SmoothQuantConfig

args.alpha = eval(args.alpha)
excluded_precisions = [] if args.int8_bf16_mixed else ["bf16"]
quant_config = SmoothQuantConfig(alpha=args.alpha, folding=False, excluded_precisions=excluded_precisions)

if re.search("gpt", user_model.config.model_type):
quant_config.set_local(torch.add, SmoothQuantConfig(w_dtype="fp32", act_dtype="fp32"))

from neural_compressor.torch.algorithms.smooth_quant import move_input_to_device
from tqdm import tqdm

Expand All @@ -189,16 +180,39 @@ def run_fn(model):
if calib_iter >= args.calib_iters:
break
return


def eval_func(model):
config = AutoConfig.from_pretrained(args.model)
setattr(model, "config", config)

from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
eval_args = LMEvalParser(
model="hf",
user_model=model,
tokenizer=tokenizer,
batch_size=args.batch_size,
tasks=args.tasks,
device="cpu",
)
results = evaluate(eval_args)
if args.tasks == "wikitext":
return results["results"][args.tasks]["word_perplexity,none"]
else:
return results["results"][args.tasks]["acc,none"]

from utils import get_example_inputs

example_inputs = get_example_inputs(user_model, calib_dataloader)

from neural_compressor.torch.quantization import prepare, convert

user_model = prepare(model=user_model, quant_config=quant_config, example_inputs=example_inputs)
run_fn(user_model)
user_model = convert(user_model)
from neural_compressor.torch.quantization import SmoothQuantConfig, autotune, TuningConfig
tune_config = TuningConfig(config_set=SmoothQuantConfig.get_config_set_for_tuning())
user_model = autotune(
user_model,
tune_config=tune_config,
eval_fn=eval_func,
run_fn=run_fn,
example_inputs=example_inputs,
)
user_model.save(args.output_dir)


Expand Down Expand Up @@ -231,11 +245,10 @@ def run_fn(model):
results = evaluate(eval_args)
for task_name in args.tasks.split(","):
if task_name == "wikitext":
acc = results["results"][task_name]["word_perplexity,none"]
print("Accuracy for %s is: %s" % (task_name, results["results"][task_name]["word_perplexity,none"]))
else:
acc = results["results"][task_name]["acc,none"]
print("Accuracy: %.5f" % acc)
print("Batch size = %d" % args.batch_size)
print("Accuracy for %s is: %s" % (task_name, results["results"][task_name]["acc,none"]))


if args.performance:
user_model.eval()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,9 @@ def get_user_model():
)


from neural_compressor.torch.quantization import get_default_static_config, StaticQuantConfig
quant_config = get_default_static_config()
quant_config.excluded_precisions = [] if args.int8_bf16_mixed else ["bf16"]
from neural_compressor.torch.quantization import StaticQuantConfig
excluded_precisions = [] if args.int8_bf16_mixed else ["bf16"]
quant_config = StaticQuantConfig(excluded_precisions=excluded_precisions)
if re.search("gpt", user_model.config.model_type):
quant_config.set_local("add", StaticQuantConfig(w_dtype="fp32", act_dtype="fp32"))

Expand Down
2 changes: 1 addition & 1 deletion neural_compressor/torch/quantization/autotune.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def autotune(
best_quant_model = None
eval_func_wrapper = EvaluationFuncWrapper(eval_fn, eval_args)
config_loader, tuning_logger, tuning_monitor = init_tuning(tuning_config=tune_config)
baseline: float = eval_func_wrapper.evaluate(model)
baseline: float = eval_func_wrapper.evaluate(deepcopy(model))
tuning_monitor.set_baseline(baseline)
tuning_logger.tuning_start()
for trial_index, quant_config in enumerate(config_loader, 1):
Expand Down
10 changes: 8 additions & 2 deletions neural_compressor/torch/quantization/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1582,8 +1582,14 @@ def get_model_info(self, model: torch.nn.Module, example_inputs) -> List[Tuple[s

@classmethod
def get_config_set_for_tuning(cls) -> Union[None, "SmoothQuantConfig", List["SmoothQuantConfig"]]:
"""Get the default configuration set for tuning."""
return SmoothQuantConfig(alpha=[0.1, 0.5], folding=[True, False], scale_sharing=[True, False])
import numpy as np

return SmoothQuantConfig(
alpha=np.arange(0.1, 1.0, 0.1).tolist(),
folding=[True, False],
scale_sharing=[True, False],
excluded_precisions=[["bf16"]],
)


def get_default_sq_config() -> SmoothQuantConfig:
Expand Down
3 changes: 1 addition & 2 deletions test/3x/torch/quantization/test_static_quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def test_static_quant_with_quantize_API(self):
def test_static_quant_mixed_precision(self):
fp32_model = copy.deepcopy(self.fp32_model)
example_inputs = self.input
quant_config = get_default_static_config()
quant_config = StaticQuantConfig(excluded_precisions=["bf16"])
prepared_model = prepare(fp32_model, quant_config=quant_config, example_inputs=example_inputs)
run_fn(prepared_model)
q_model = convert(prepared_model)
Expand All @@ -229,7 +229,6 @@ def test_static_quant_mixed_precision(self):
q_model = convert(prepared_model)
assert q_model is not None, "Quantization failed!"

quant_config.excluded_precisions = ["bf16"]
prepared_model = prepare(fp32_model, quant_config=quant_config, example_inputs=example_inputs)
run_fn(prepared_model)
q_model = convert(prepared_model)
Expand Down

0 comments on commit b35ff8f

Please sign in to comment.