forked from OpenPPL/ppq
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathProgramEntrance.py
86 lines (73 loc) · 4.23 KB
/
ProgramEntrance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
"""
This file will show you how to quantize your network with PPQ
You should prepare your model and calibration dataset as follow:
~/working/model.onnx <-- your model
~/working/data/*.npy <-- your dataset
if you are using caffe model:
~/working/model.caffemdoel <-- your model
~/working/model.prototext <-- your model
### MAKE SURE YOUR INPUT LAYOUT IS [N, C, H, W] or [C, H, W] ###
quantized model will be generated at: ~/working/quantized.onnx
"""
from ppq import *
from ppq.api import *
from Util import *
# modify configuration below:
WORKING_DIRECTORY = 'working/' # choose your working directory
TARGET_PLATFORM = TargetPlatform.PPL_CUDA_INT8 # choose your target platform
MODEL_TYPE = NetworkFramework.ONNX # or NetworkFramework.CAFFE
NETWORK_INPUTSHAPE = [1, 3, 224, 224] # input shape of your network
CALIBRATION_BATCHSIZE = 16 # batchsize of calibration dataset
EXECUTING_DEVICE = 'cuda' # 'cuda' or 'cpu', 'cpu' is untested.
REQUIRE_ANALYSE = True
# -------------------------------------------------------------------
# SETTING 对象用于控制 PPQ 的量化逻辑
# 当你的网络量化误差过高时,你需要修改 SETTING 对象中的参数来进行特定的优化
# -------------------------------------------------------------------
SETTING = QuantizationSettingFactory.default_setting()
if TARGET_PLATFORM == TargetPlatform.PPL_CUDA_INT8:
SETTING = QuantizationSettingFactory.pplcuda_setting()
if TARGET_PLATFORM == TargetPlatform.DSP_INT8:
SETTING = QuantizationSettingFactory.dsp_setting()
if TARGET_PLATFORM == TargetPlatform.NXP_INT8:
SETTING = QuantizationSettingFactory.nxp_setting()
print('正准备量化你的网络,检查下列设置:')
print(f'WORKING DIRECTORY : {WORKING_DIRECTORY}')
print(f'TARGET PLATFORM : {TARGET_PLATFORM.name}')
print(f'NETWORK INPUTSHAPE : {NETWORK_INPUTSHAPE}')
print(f'CALIBRATION BATCHSIZE: {CALIBRATION_BATCHSIZE}')
dataloader = load_calibration_dataset(
directory=WORKING_DIRECTORY,
input_shape=NETWORK_INPUTSHAPE,
batchsize=CALIBRATION_BATCHSIZE)
print('网络正量化中,根据你的量化配置,这将需要一段时间:')
quantized = quantize(
working_directory=WORKING_DIRECTORY, setting=SETTING,
model_type=MODEL_TYPE, executing_device=EXECUTING_DEVICE,
input_shape=NETWORK_INPUTSHAPE, target_platform=TARGET_PLATFORM,
dataloader=dataloader)
print('网络量化结束,正在生成目标文件:')
if MODEL_TYPE == NetworkFramework.ONNX:
export(working_directory=WORKING_DIRECTORY,
quantized=quantized, platform=TARGET_PLATFORM)
elif MODEL_TYPE == NetworkFramework.CAFFE:
export(working_directory=WORKING_DIRECTORY,
quantized=quantized, platform=TARGET_PLATFORM)
# -------------------------------------------------------------------
# PPQ 计算量化误差时,使用信噪比的倒数作为指标,即噪声能量 / 信号能量
# 量化误差 0.1 表示在整体信号中,量化噪声的能量约为 10%
# 你应当注意,在 graphwise_error_analyse 分析中,我们衡量的是累计误差
# 网络的最后一层往往都具有较大的累计误差,这些误差是其前面的所有层所共同造成的
# 你需要使用 layerwise_error_analyse 逐层分析误差的来源
# -------------------------------------------------------------------
print('正计算网络量化误差(SNR),最后一层的误差应小于 0.1 以保证量化精度:')
reports = graphwise_error_analyse(
graph=quantized, running_device=EXECUTING_DEVICE,
dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE))
for op, snr in reports.items():
if snr > 0.10: ppq_warning(f'层 {op} 的累计量化误差显著,请考虑进行优化')
if REQUIRE_ANALYSE:
print('正计算逐层量化误差(SNR),每一层的独立量化误差应小于 0.1 以保证量化精度:')
layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE,
interested_outputs=[var for var in quantized.outputs],
dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE))