-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[examples] add paraformer fintune recipe (#2289)
* [examples] add paraformer fintune recipe * fix predictor type in yaml
- Loading branch information
Showing
8 changed files
with
439 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# Preliminary | ||
1. Run below command to convert funasr-style ckpt to wenet-style ckpt: | ||
```sh | ||
output_dir=exp/paraformer/large | ||
mkdir -p ${output_dir} | ||
. ./path.sh && python wenet/paraformer/convert_paraformer_to_wenet_config_and_ckpt.py \ | ||
--output_dir ${output_dir} | ||
``` | ||
|
||
# Performance Record | ||
|
||
## Paraformer (original) Result | ||
|
||
| decoding mode | CER | | ||
|---------------------------|-------| | ||
| paraformer greedy search | 1.95 | | ||
|
||
## Paraformer (full-parameter tuning) Result | ||
|
||
* Training info: batch size 28, ctc_weight: 0.3, acc_grad 4, 8 * v100 gpu, 40 epochs | ||
* Decoding info: ctc_weight 0.3, average_num 5 | ||
* Git hash: TBD | ||
|
||
| decoding mode | CER | | ||
|---------------------------|-------| | ||
| ctc greedy search | 4.00 | | ||
| ctc prefix beam search | 4.00 | | ||
| paraformer greedy search | 2.16 | | ||
|
||
## Paraformer-dynamic training (full-parameter tuning) Result | ||
|
||
* Training info: batch size 28, ctc_weight: 0.3, acc_grad 4, 8 * v100 gpu, 43 epochs | ||
* Decoding info: ctc_weight 0.3, average_num 5 | ||
* Git hash: TBD | ||
|
||
| decoding mode | full | 16 | | ||
|---------------------------|--------|------| | ||
| ctc greedy search | 3.93 | 4.94 | | ||
| ctc prefix beam search | 3.93 | 4.94 | | ||
| paraformer greedy search | 2.08 | 2.41 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
encoder: sanm_encoder | ||
encoder_conf: | ||
attention_dropout_rate: 0.1 | ||
attention_heads: 4 | ||
dropout_rate: 0.1 | ||
input_layer: paraformer_dummy | ||
kernel_size: 11 | ||
linear_units: 2048 | ||
normalize_before: true | ||
num_blocks: 50 | ||
output_size: 512 | ||
pos_enc_layer_type: abs_pos_paraformer | ||
positional_dropout_rate: 0.1 | ||
sanm_shfit: 0 | ||
|
||
decoder: sanm_decoder | ||
decoder_conf: | ||
att_layer_num: 16 | ||
attention_heads: 4 | ||
dropout_rate: 0.1 | ||
kernel_size: 11 | ||
linear_units: 2048 | ||
num_blocks: 16 | ||
positional_dropout_rate: 0.1 | ||
sanm_shfit: 0 | ||
self_attention_dropout_rate: 0.1 | ||
src_attention_dropout_rate: 0.1 | ||
|
||
tokenizer: paraformer | ||
tokenizer_conf: | ||
seg_dict_path: exp/paraformer/large/seg_dict | ||
special_tokens: | ||
<blank>: 0 | ||
<eos>: 2 | ||
<sos>: 1 | ||
<unk>: 8403 | ||
symbol_table_path: exp/paraformer/large/units.txt | ||
|
||
ctc: ctc | ||
ctc_conf: | ||
ctc_blank_id: 0 | ||
|
||
cmvn: global_cmvn | ||
cmvn_conf: | ||
cmvn_file: exp/paraformer/large/global_cmvn | ||
is_json_cmvn: true | ||
|
||
model: paraformer | ||
model_conf: | ||
ctc_weight: 0.3 | ||
length_normalized_loss: false | ||
lsm_weight: 0.1 | ||
predictor_bias: 1 | ||
predictor_weight: 1.0 | ||
sampling_ratio: 0.75 | ||
|
||
predictor: paraformer_predictor | ||
predictor_conf: | ||
cnn_groups: 1 | ||
idim: 512 | ||
l_order: 1 | ||
noise_threshold2: 0.01 | ||
r_order: 1 | ||
residual: false | ||
smooth_factor2: 0.25 | ||
tail_threshold: 0.45 | ||
threshold: 1.0 | ||
upsample_times: 3 | ||
upsample_type: cnn_blstm | ||
use_cif1_cnn: false | ||
|
||
dataset: asr | ||
dataset_conf: | ||
filter_conf: | ||
max_length: 40960 | ||
min_length: 0 | ||
token_max_length: 200 | ||
token_min_length: 1 | ||
resample_conf: | ||
resample_rate: 16000 | ||
speed_perturb: true | ||
fbank_conf: | ||
num_mel_bins: 80 | ||
frame_shift: 10 | ||
frame_length: 25 | ||
dither: 0.1 | ||
spec_aug: true | ||
spec_aug_conf: | ||
num_t_mask: 2 | ||
num_f_mask: 2 | ||
max_t: 50 | ||
max_f: 10 | ||
shuffle: true | ||
shuffle_conf: | ||
shuffle_size: 1500 | ||
sort: true | ||
sort_conf: | ||
sort_size: 500 # sort_size should be less than shuffle_size | ||
batch_conf: | ||
batch_type: 'static' # static or dynamic | ||
batch_size: 28 | ||
|
||
grad_clip: 5 | ||
accum_grad: 1 | ||
max_epoch: 45 | ||
log_interval: 100 | ||
|
||
optim: adam | ||
optim_conf: | ||
lr: 0.0005 | ||
scheduler: warmuplr | ||
scheduler_conf: | ||
warmup_steps: 25000 |
114 changes: 114 additions & 0 deletions
114
examples/aishell/paraformer/conf/train_paraformer_dynamic.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
encoder: sanm_encoder | ||
encoder_conf: | ||
attention_dropout_rate: 0.1 | ||
attention_heads: 4 | ||
dropout_rate: 0.1 | ||
input_layer: paraformer_dummy | ||
kernel_size: 11 | ||
linear_units: 2048 | ||
normalize_before: true | ||
num_blocks: 50 | ||
output_size: 512 | ||
pos_enc_layer_type: abs_pos_paraformer | ||
positional_dropout_rate: 0.1 | ||
sanm_shfit: 0 | ||
use_dynamic_chunk: true | ||
|
||
decoder: sanm_decoder | ||
decoder_conf: | ||
att_layer_num: 16 | ||
attention_heads: 4 | ||
dropout_rate: 0.1 | ||
kernel_size: 11 | ||
linear_units: 2048 | ||
num_blocks: 16 | ||
positional_dropout_rate: 0.1 | ||
sanm_shfit: 0 | ||
self_attention_dropout_rate: 0.1 | ||
src_attention_dropout_rate: 0.1 | ||
|
||
tokenizer: paraformer | ||
tokenizer_conf: | ||
seg_dict_path: exp/paraformer/large/seg_dict | ||
special_tokens: | ||
<blank>: 0 | ||
<eos>: 2 | ||
<sos>: 1 | ||
<unk>: 8403 | ||
symbol_table_path: exp/paraformer/large/units.txt | ||
|
||
ctc: ctc | ||
ctc_conf: | ||
ctc_blank_id: 0 | ||
|
||
cmvn: global_cmvn | ||
cmvn_conf: | ||
cmvn_file: exp/paraformer/large/global_cmvn | ||
is_json_cmvn: true | ||
|
||
model: paraformer | ||
model_conf: | ||
ctc_weight: 0.3 | ||
length_normalized_loss: false | ||
lsm_weight: 0.1 | ||
predictor_bias: 1 | ||
predictor_weight: 1.0 | ||
sampling_ratio: 0.75 | ||
|
||
predictor: paraformer_predictor | ||
predictor_conf: | ||
cnn_groups: 1 | ||
idim: 512 | ||
l_order: 1 | ||
noise_threshold2: 0.01 | ||
r_order: 1 | ||
residual: false | ||
smooth_factor2: 0.25 | ||
tail_threshold: 0.45 | ||
threshold: 1.0 | ||
upsample_times: 3 | ||
upsample_type: cnn_blstm | ||
use_cif1_cnn: false | ||
|
||
dataset: asr | ||
dataset_conf: | ||
filter_conf: | ||
max_length: 40960 | ||
min_length: 0 | ||
token_max_length: 200 | ||
token_min_length: 1 | ||
resample_conf: | ||
resample_rate: 16000 | ||
speed_perturb: true | ||
fbank_conf: | ||
num_mel_bins: 80 | ||
frame_shift: 10 | ||
frame_length: 25 | ||
dither: 0.1 | ||
spec_aug: true | ||
spec_aug_conf: | ||
num_t_mask: 2 | ||
num_f_mask: 2 | ||
max_t: 50 | ||
max_f: 10 | ||
shuffle: true | ||
shuffle_conf: | ||
shuffle_size: 1500 | ||
sort: true | ||
sort_conf: | ||
sort_size: 500 # sort_size should be less than shuffle_size | ||
batch_conf: | ||
batch_type: 'static' # static or dynamic | ||
batch_size: 28 | ||
|
||
grad_clip: 5 | ||
accum_grad: 1 | ||
max_epoch: 45 | ||
log_interval: 100 | ||
|
||
optim: adam | ||
optim_conf: | ||
lr: 0.0005 | ||
scheduler: warmuplr | ||
scheduler_conf: | ||
warmup_steps: 25000 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../whisper/local |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
export WENET_DIR=$PWD/../../.. | ||
export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build | ||
export OPENFST_BIN=${BUILD_DIR}/../fc_base/openfst-build/src | ||
export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_BIN}/bin:$PATH | ||
|
||
# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C | ||
export PYTHONIOENCODING=UTF-8 | ||
export PYTHONPATH=../../../:$PYTHONPATH |
Oops, something went wrong.