-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain_sm2.sh
30 lines (28 loc) · 1.17 KB
/
train_sm2.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
export CUDA_VISIBLE_DEVICES=$1
export model_dir=../model
src_lang=$2
tgt_lang=$3
SETTING=SM2-unid
confidence_weight=0.1
batch_max_tokens=8192
max_epochs=20
langpair=$src_lang-$tgt_lang
model_path=$model_dir/$langpair-$SETTING
# The '--arch' should be 'transformer_with_sm2_unidirectional' for SM2 with unidirectional encoder settings.
# If the used device supports bf16, '--bf16' is suggested.
# If source and target language share embeddings, use '--share-all-embeddings'
python train.py ../dataset/$langpair/preprocess --max-epoch $max_epochs --source-lang $src_lang --target-lang $tgt_lang \
--arch transformer_with_sm2_unidirectional \
--share-all-embeddings \
--confidence-weight $confidence_weight \
--min-prefix-src-len 1 \
--max-tokens $batch_max_tokens \
--optimizer adam \
--bf16 \
--save-interval 1 \
--lr 5e-4 --lr-scheduler inverse_sqrt \
--warmup-init-lr 1e-07 --warmup-updates 4000 \
--save-dir $model_path \
--stop-min-lr 1e-09 \
--criterion label_smoothed_cross_entropy_stream_confidence \
--label-smoothing 0.1