Skip to content

Commit

Permalink
[examples] add paraformer fintune recipe (#2289)
Browse files Browse the repository at this point in the history
* [examples] add paraformer fintune recipe

* fix predictor type in yaml
  • Loading branch information
Mddct authored Jan 10, 2024
1 parent eaa3713 commit 50153d1
Show file tree
Hide file tree
Showing 8 changed files with 439 additions and 0 deletions.
40 changes: 40 additions & 0 deletions examples/aishell/paraformer/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Preliminary
1. Run below command to convert funasr-style ckpt to wenet-style ckpt:
```sh
output_dir=exp/paraformer/large
mkdir -p ${output_dir}
. ./path.sh && python wenet/paraformer/convert_paraformer_to_wenet_config_and_ckpt.py \
--output_dir ${output_dir}
```

# Performance Record

## Paraformer (original) Result

| decoding mode | CER |
|---------------------------|-------|
| paraformer greedy search | 1.95 |

## Paraformer (full-parameter tuning) Result

* Training info: batch size 28, ctc_weight: 0.3, acc_grad 4, 8 * v100 gpu, 40 epochs
* Decoding info: ctc_weight 0.3, average_num 5
* Git hash: TBD

| decoding mode | CER |
|---------------------------|-------|
| ctc greedy search | 4.00 |
| ctc prefix beam search | 4.00 |
| paraformer greedy search | 2.16 |

## Paraformer-dynamic training (full-parameter tuning) Result

* Training info: batch size 28, ctc_weight: 0.3, acc_grad 4, 8 * v100 gpu, 43 epochs
* Decoding info: ctc_weight 0.3, average_num 5
* Git hash: TBD

| decoding mode | full | 16 |
|---------------------------|--------|------|
| ctc greedy search | 3.93 | 4.94 |
| ctc prefix beam search | 3.93 | 4.94 |
| paraformer greedy search | 2.08 | 2.41 |
113 changes: 113 additions & 0 deletions examples/aishell/paraformer/conf/train_paraformer.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
encoder: sanm_encoder
encoder_conf:
attention_dropout_rate: 0.1
attention_heads: 4
dropout_rate: 0.1
input_layer: paraformer_dummy
kernel_size: 11
linear_units: 2048
normalize_before: true
num_blocks: 50
output_size: 512
pos_enc_layer_type: abs_pos_paraformer
positional_dropout_rate: 0.1
sanm_shfit: 0

decoder: sanm_decoder
decoder_conf:
att_layer_num: 16
attention_heads: 4
dropout_rate: 0.1
kernel_size: 11
linear_units: 2048
num_blocks: 16
positional_dropout_rate: 0.1
sanm_shfit: 0
self_attention_dropout_rate: 0.1
src_attention_dropout_rate: 0.1

tokenizer: paraformer
tokenizer_conf:
seg_dict_path: exp/paraformer/large/seg_dict
special_tokens:
<blank>: 0
<eos>: 2
<sos>: 1
<unk>: 8403
symbol_table_path: exp/paraformer/large/units.txt

ctc: ctc
ctc_conf:
ctc_blank_id: 0

cmvn: global_cmvn
cmvn_conf:
cmvn_file: exp/paraformer/large/global_cmvn
is_json_cmvn: true

model: paraformer
model_conf:
ctc_weight: 0.3
length_normalized_loss: false
lsm_weight: 0.1
predictor_bias: 1
predictor_weight: 1.0
sampling_ratio: 0.75

predictor: paraformer_predictor
predictor_conf:
cnn_groups: 1
idim: 512
l_order: 1
noise_threshold2: 0.01
r_order: 1
residual: false
smooth_factor2: 0.25
tail_threshold: 0.45
threshold: 1.0
upsample_times: 3
upsample_type: cnn_blstm
use_cif1_cnn: false

dataset: asr
dataset_conf:
filter_conf:
max_length: 40960
min_length: 0
token_max_length: 200
token_min_length: 1
resample_conf:
resample_rate: 16000
speed_perturb: true
fbank_conf:
num_mel_bins: 80
frame_shift: 10
frame_length: 25
dither: 0.1
spec_aug: true
spec_aug_conf:
num_t_mask: 2
num_f_mask: 2
max_t: 50
max_f: 10
shuffle: true
shuffle_conf:
shuffle_size: 1500
sort: true
sort_conf:
sort_size: 500 # sort_size should be less than shuffle_size
batch_conf:
batch_type: 'static' # static or dynamic
batch_size: 28

grad_clip: 5
accum_grad: 1
max_epoch: 45
log_interval: 100

optim: adam
optim_conf:
lr: 0.0005
scheduler: warmuplr
scheduler_conf:
warmup_steps: 25000
114 changes: 114 additions & 0 deletions examples/aishell/paraformer/conf/train_paraformer_dynamic.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
encoder: sanm_encoder
encoder_conf:
attention_dropout_rate: 0.1
attention_heads: 4
dropout_rate: 0.1
input_layer: paraformer_dummy
kernel_size: 11
linear_units: 2048
normalize_before: true
num_blocks: 50
output_size: 512
pos_enc_layer_type: abs_pos_paraformer
positional_dropout_rate: 0.1
sanm_shfit: 0
use_dynamic_chunk: true

decoder: sanm_decoder
decoder_conf:
att_layer_num: 16
attention_heads: 4
dropout_rate: 0.1
kernel_size: 11
linear_units: 2048
num_blocks: 16
positional_dropout_rate: 0.1
sanm_shfit: 0
self_attention_dropout_rate: 0.1
src_attention_dropout_rate: 0.1

tokenizer: paraformer
tokenizer_conf:
seg_dict_path: exp/paraformer/large/seg_dict
special_tokens:
<blank>: 0
<eos>: 2
<sos>: 1
<unk>: 8403
symbol_table_path: exp/paraformer/large/units.txt

ctc: ctc
ctc_conf:
ctc_blank_id: 0

cmvn: global_cmvn
cmvn_conf:
cmvn_file: exp/paraformer/large/global_cmvn
is_json_cmvn: true

model: paraformer
model_conf:
ctc_weight: 0.3
length_normalized_loss: false
lsm_weight: 0.1
predictor_bias: 1
predictor_weight: 1.0
sampling_ratio: 0.75

predictor: paraformer_predictor
predictor_conf:
cnn_groups: 1
idim: 512
l_order: 1
noise_threshold2: 0.01
r_order: 1
residual: false
smooth_factor2: 0.25
tail_threshold: 0.45
threshold: 1.0
upsample_times: 3
upsample_type: cnn_blstm
use_cif1_cnn: false

dataset: asr
dataset_conf:
filter_conf:
max_length: 40960
min_length: 0
token_max_length: 200
token_min_length: 1
resample_conf:
resample_rate: 16000
speed_perturb: true
fbank_conf:
num_mel_bins: 80
frame_shift: 10
frame_length: 25
dither: 0.1
spec_aug: true
spec_aug_conf:
num_t_mask: 2
num_f_mask: 2
max_t: 50
max_f: 10
shuffle: true
shuffle_conf:
shuffle_size: 1500
sort: true
sort_conf:
sort_size: 500 # sort_size should be less than shuffle_size
batch_conf:
batch_type: 'static' # static or dynamic
batch_size: 28

grad_clip: 5
accum_grad: 1
max_epoch: 45
log_interval: 100

optim: adam
optim_conf:
lr: 0.0005
scheduler: warmuplr
scheduler_conf:
warmup_steps: 25000
1 change: 1 addition & 0 deletions examples/aishell/paraformer/local
8 changes: 8 additions & 0 deletions examples/aishell/paraformer/path.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
export WENET_DIR=$PWD/../../..
export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
export OPENFST_BIN=${BUILD_DIR}/../fc_base/openfst-build/src
export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_BIN}/bin:$PATH

# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=../../../:$PYTHONPATH
Loading

0 comments on commit 50153d1

Please sign in to comment.