Skip to content

Commit

Permalink
Abstract out out optimizer params and update foreach calling convention
Browse files Browse the repository at this point in the history
  • Loading branch information
drisspg committed Jun 7, 2024
1 parent baa678c commit 66f1725
Showing 1 changed file with 11 additions and 3 deletions.
14 changes: 11 additions & 3 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,15 +95,23 @@ def build_optimizer(model, job_config: JobConfig):
name = job_config.optimizer.name
lr = job_config.optimizer.lr
fused = job_config.optimizer.fused
# when fused = False, foreach = True by default.

# Common parameters for both optimizers
optimizer_kwargs = {
'lr': lr,
'betas': (0.9, 0.95),
'weight_decay': 0.1,
'fused': fused,
'foreach': not fused,
}
if name == "Adam":
# TODO: make the optimizer options configurable by toml/cmd args
optimizer = torch.optim.Adam(
model.parameters(), lr=lr, betas=(0.9, 0.95), weight_decay=0.1, fused=fused
model.parameters(), **optimizer_kwargs
)
elif name == "AdamW":
optimizer = torch.optim.AdamW(
model.parameters(), lr=lr, betas=(0.9, 0.95), weight_decay=0.1, fused=fused
model.parameters(), **optimizer_kwargs
)
else:
raise NotImplementedError(f"Optimizer {name} not added.")
Expand Down

0 comments on commit 66f1725

Please sign in to comment.