-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathconfig.yaml
98 lines (70 loc) · 2.71 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# random seed for batch sampling
seed: 0
# name for this experiment in the local run directory and on wandb
exp_name: ???
# the batch size for training; for FSDP, the batch size per GPU is batch_size / (grad_accumulation_steps * num_gpus)
batch_size: 4
# the batch size during evaluation and sampling, if enabled
eval_batch_size: 16
# debug mode (disables wandb, model checkpointing, etc.)
debug: false
# the port to use for FSDP
fsdp_port: null
# which dataset(s) to train on; can pass a list like datasets=[hh,shp]
datasets:
- hh
# wandb configuration
wandb:
enabled: true
entity: null
project: "beta-DPO"
# to create the local run directory and cache models/datasets,
# we will try each of these directories in order; if none exist,
# we will create the last one and use it
local_dirs:
- /scr-ssd
- /scr
- .cache
# whether or not to generate samples during evaluation; disable for FSDP/TensorParallel
# is recommended, because they are slow
sample_during_eval: true
# how many model samples to generate during evaluation
n_eval_model_samples: 16
# whether to eval at the very beginning of training
do_first_eval: true
# an OmegaConf resolver that returns the local run directory, calling a function in utils.py
local_run_dir: ${get_local_run_dir:${exp_name},${local_dirs}}
# the learning rate
lr: 5e-7
# number of steps to accumulate over for each batch
# (e.g. if batch_size=4 and gradient_accumulation_steps=2, then we will
# accumulate gradients over 2 microbatches of size 2)
gradient_accumulation_steps: 1
# the maximum gradient norm to clip to
max_grad_norm: 10.0
# the maximum allowed length for an input (prompt + response)
max_length: 512
# the maximum allowed length for a prompt
max_prompt_length: 256
# the number of epochs to train for; if null, must specify n_examples
n_epochs: 1
# the number of examples to train for; if null, must specify n_epochs
n_examples: null
# the number of examples to evaluate on (and sample from, if sample_during_eval is true)
n_eval_examples: 256
# the trainer class to use (e.g. BasicTrainer, FSDPTrainer, TensorParallelTrainer)
trainer: BasicTrainer
# The optimizer to use; we use RMSprop because it works about as well as Adam and is more memory-efficient
optimizer: RMSprop
# number of linear warmup steps for the learning rate
warmup_steps: 150
# whether or not to use activation/gradient checkpointing
activation_checkpointing: false
# evaluate and save model every eval_every steps
eval_every: 20_000
# prevent wandb from logging more than once per minimum_log_interval_secs
minimum_log_interval_secs: 1.0
defaults:
- _self_
- model: blank_model_fp32 # basic model configuration
- loss: sft # which loss function, either sft or dpo (specify loss.beta if using dpo)