Skip to content

Commit

Permalink
Add dora recipes (NVIDIA#11330)
Browse files Browse the repository at this point in the history
* add dora recipes

Signed-off-by: Chen Cui <[email protected]>

* Apply isort and black reformatting

Signed-off-by: cuichenx <[email protected]>

* pylint

Signed-off-by: Chen Cui <[email protected]>

* Apply isort and black reformatting

Signed-off-by: cuichenx <[email protected]>

---------

Signed-off-by: Chen Cui <[email protected]>
Signed-off-by: cuichenx <[email protected]>
Co-authored-by: cuichenx <[email protected]>
  • Loading branch information
cuichenx and cuichenx authored Nov 21, 2024
1 parent 3765580 commit ebc6a88
Show file tree
Hide file tree
Showing 35 changed files with 277 additions and 177 deletions.
2 changes: 1 addition & 1 deletion nemo/collections/llm/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import json
import os
from copy import deepcopy
from pathlib import Path
Expand Down
12 changes: 7 additions & 5 deletions nemo/collections/llm/recipes/baichuan2_7b.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from nemo.collections.llm import Baichuan2Config7B, Baichuan2Model
from nemo.collections.llm.api import finetune, pretrain
from nemo.collections.llm.gpt.data.mock import MockDataModule
from nemo.collections.llm.peft.lora import LoRA
from nemo.collections.llm.peft import PEFT_STR2CLS
from nemo.collections.llm.recipes.finetune_default import default_finetune_recipe
from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger
from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing
Expand Down Expand Up @@ -254,8 +254,10 @@ def finetune_recipe(
name (str): Name of the fine-tuning run.
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning.
Allowed values: 'lora'/'dora'/'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training
efficiency. Default sequence length is 2048.
Returns:
run.Partial: Partial configuration for fine-tuning.
Expand All @@ -279,8 +281,8 @@ def finetune_recipe(
if peft_scheme is None or peft_scheme.lower() == 'none':
recipe.trainer.strategy.tensor_model_parallel_size = 2
recipe.optim.config.lr = 5e-6
elif peft_scheme.lower() == 'lora':
recipe.peft = run.Config(LoRA)
elif peft_scheme.lower() in ['lora', 'dora']:
recipe.peft = run.Config(PEFT_STR2CLS[peft_scheme.lower()])
recipe.optim.config.lr = 1e-4
else:
raise ValueError(f"Unrecognized peft scheme: {peft_scheme}")
Expand Down
12 changes: 7 additions & 5 deletions nemo/collections/llm/recipes/chatglm3_6b.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from nemo.collections.llm import ChatGLM3Config6B, ChatGLMModel
from nemo.collections.llm.api import finetune, pretrain
from nemo.collections.llm.gpt.data.mock import MockDataModule
from nemo.collections.llm.peft.lora import LoRA
from nemo.collections.llm.peft import PEFT_STR2CLS
from nemo.collections.llm.recipes.finetune_default import default_finetune_recipe
from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger
from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing
Expand Down Expand Up @@ -254,8 +254,10 @@ def finetune_recipe(
name (str): Name of the fine-tuning run.
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning.
Allowed values: 'lora'/'dora'/'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training
efficiency. Default sequence length is 2048.
Returns:
run.Partial: Partial configuration for fine-tuning.
Expand All @@ -279,8 +281,8 @@ def finetune_recipe(
if peft_scheme is None or peft_scheme.lower() == 'none':
recipe.trainer.strategy.tensor_model_parallel_size = 2
recipe.optim.config.lr = 5e-6
elif peft_scheme.lower() == 'lora':
recipe.peft = run.Config(LoRA)
elif peft_scheme.lower() in ['lora', 'dora']:
recipe.peft = run.Config(PEFT_STR2CLS[peft_scheme.lower()])
recipe.optim.config.lr = 1e-4
else:
raise ValueError(f"Unrecognized peft scheme: {peft_scheme}")
Expand Down
40 changes: 40 additions & 0 deletions nemo/collections/llm/recipes/finetune_default.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@
import nemo.lightning as nl
from nemo.collections import llm
from nemo.collections.llm.gpt.data.packed_sequence import PackedSequenceSpecs
from nemo.collections.llm.peft import DoRA, LoRA
from nemo.collections.llm.recipes.log.default import tensorboard_logger
from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing
from nemo.collections.llm.recipes.precision.mixed_precision import bf16_mixed
from nemo.lightning.pytorch.callbacks import PEFT


def default_finetune_recipe(
Expand Down Expand Up @@ -158,3 +160,41 @@ def nemo_resume(model_id: str) -> run.Config[nl.AutoResume]:
nl.AutoResume,
restore_config=run.Config(nl.RestoreConfig, path=f"nemo://{model_id}"),
)


@run.cli.factory(name='lora')
def lora() -> run.Config[PEFT]:
"""
Factory function to create a LoRA configuration.
Returns:
run.Config[PEFT]: Configuration for the LoRA class.
Examples:
CLI usage:
$ nemo llm finetune -f llama3_8b peft=lora
Python API usage:
>>> lora_config = lora()
>>> print(lora_config)
"""
return run.Config(LoRA)


@run.cli.factory(name='dora')
def dora() -> run.Config[PEFT]:
"""
Factory function to create a DoRA configuration.
Returns:
run.Config[PEFT]: Configuration for the DoRA class.
Examples:
CLI usage:
$ nemo llm finetune -f llama3_8b peft=dora
Python API usage:
>>> dora_config = dora()
>>> print(dora_config)
"""
return run.Config(DoRA)
12 changes: 7 additions & 5 deletions nemo/collections/llm/recipes/gemma2_27b.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

from nemo.collections.llm.api import finetune, pretrain
from nemo.collections.llm.gpt.data.mock import MockDataModule
from nemo.collections.llm.peft.lora import LoRA
from nemo.collections.llm.peft import PEFT_STR2CLS
from nemo.collections.llm.recipes.finetune_default import default_finetune_recipe
from nemo.collections.llm.recipes.gemma2 import gemma2_model, gemma2_trainer
from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger
Expand Down Expand Up @@ -191,8 +191,10 @@ def finetune_recipe(
name (str): Name of the fine-tuning run.
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning.
Allowed values: 'lora'/'dora'/'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training
efficiency. Default sequence length is 2048.
Returns:
run.Partial: Partial configuration for fine-tuning.
Expand Down Expand Up @@ -220,8 +222,8 @@ def finetune_recipe(
recipe.optim.config.lr = 5e-6
recipe.trainer.strategy.tensor_model_parallel_size = 8
recipe.trainer.strategy.pipeline_model_parallel_size = 2
elif peft_scheme.lower() == 'lora':
recipe.peft = run.Config(LoRA)
elif peft_scheme.lower() in ['lora', 'dora']:
recipe.peft = run.Config(PEFT_STR2CLS[peft_scheme.lower()])
recipe.trainer.strategy.tensor_model_parallel_size = 4
recipe.optim.config.lr = 1e-4
else:
Expand Down
12 changes: 7 additions & 5 deletions nemo/collections/llm/recipes/gemma2_2b.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

from nemo.collections.llm.api import finetune, pretrain
from nemo.collections.llm.gpt.data.mock import MockDataModule
from nemo.collections.llm.peft.lora import LoRA
from nemo.collections.llm.peft import PEFT_STR2CLS
from nemo.collections.llm.recipes.finetune_default import default_finetune_recipe
from nemo.collections.llm.recipes.gemma2 import gemma2_model, gemma2_trainer
from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger
Expand Down Expand Up @@ -191,8 +191,10 @@ def finetune_recipe(
name (str): Name of the fine-tuning run.
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning.
Allowed values: 'lora'/'dora'/'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training
efficiency. Default sequence length is 2048.
Returns:
run.Partial: Partial configuration for fine-tuning.
Expand All @@ -218,8 +220,8 @@ def finetune_recipe(

if peft_scheme is None or peft_scheme.lower() == 'none':
recipe.optim.config.lr = 5e-6
elif peft_scheme.lower() == 'lora':
recipe.peft = run.Config(LoRA)
elif peft_scheme.lower() in ['lora', 'dora']:
recipe.peft = run.Config(PEFT_STR2CLS[peft_scheme.lower()])
recipe.optim.config.lr = 1e-4
else:
raise ValueError(f"Unrecognized peft scheme: {peft_scheme}")
Expand Down
12 changes: 7 additions & 5 deletions nemo/collections/llm/recipes/gemma2_9b.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

from nemo.collections.llm.api import finetune, pretrain
from nemo.collections.llm.gpt.data.mock import MockDataModule
from nemo.collections.llm.peft.lora import LoRA
from nemo.collections.llm.peft import PEFT_STR2CLS
from nemo.collections.llm.recipes.finetune_default import default_finetune_recipe
from nemo.collections.llm.recipes.gemma2 import gemma2_model, gemma2_trainer
from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger
Expand Down Expand Up @@ -191,8 +191,10 @@ def finetune_recipe(
name (str): Name of the fine-tuning run.
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning.
Allowed values: 'lora'/'dora'/'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training
efficiency. Default sequence length is 2048.
Returns:
run.Partial: Partial configuration for fine-tuning.
Expand All @@ -219,8 +221,8 @@ def finetune_recipe(
if peft_scheme is None or peft_scheme.lower() == 'none':
recipe.optim.config.lr = 5e-6
recipe.trainer.strategy.tensor_model_parallel_size = 4
elif peft_scheme.lower() == 'lora':
recipe.peft = run.Config(LoRA)
elif peft_scheme.lower() in ['lora', 'dora']:
recipe.peft = run.Config(PEFT_STR2CLS[peft_scheme.lower()])
recipe.optim.config.lr = 1e-4
else:
raise ValueError(f"Unrecognized peft scheme: {peft_scheme}")
Expand Down
12 changes: 7 additions & 5 deletions nemo/collections/llm/recipes/gemma_2b.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from nemo.collections.llm import GemmaConfig2B, GemmaModel
from nemo.collections.llm.api import finetune, pretrain
from nemo.collections.llm.gpt.data.mock import MockDataModule
from nemo.collections.llm.peft.lora import LoRA
from nemo.collections.llm.peft import PEFT_STR2CLS
from nemo.collections.llm.recipes.finetune_default import default_finetune_recipe
from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger
from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing
Expand Down Expand Up @@ -253,8 +253,10 @@ def finetune_recipe(
name (str): Name of the fine-tuning run.
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None.
packed_sequence (Optional[bool]): If true, fine-tuning sequences will be packed into batches up to the given maximum seq_length for better efficiency.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning.
Allowed values: 'lora'/'dora'/'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training
efficiency. Default sequence length is 2048.
Returns:
run.Partial: Partial configuration for fine-tuning.
Expand Down Expand Up @@ -284,8 +286,8 @@ def finetune_recipe(
if peft_scheme is None or peft_scheme.lower() == 'none':
recipe.trainer.strategy.context_parallel_size = 2
recipe.optim.config.lr = 5e-6
elif peft_scheme.lower() == 'lora':
recipe.peft = run.Config(LoRA)
elif peft_scheme.lower() in ['lora', 'dora']:
recipe.peft = run.Config(PEFT_STR2CLS[peft_scheme.lower()])
recipe.optim.config.lr = 1e-4
else:
raise ValueError(f"Unrecognized peft scheme: {peft_scheme}")
Expand Down
12 changes: 7 additions & 5 deletions nemo/collections/llm/recipes/gemma_7b.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from nemo.collections.llm import GemmaConfig7B, GemmaModel
from nemo.collections.llm.api import finetune, pretrain
from nemo.collections.llm.gpt.data.mock import MockDataModule
from nemo.collections.llm.peft.lora import LoRA
from nemo.collections.llm.peft import PEFT_STR2CLS
from nemo.collections.llm.recipes.finetune_default import default_finetune_recipe
from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger
from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing
Expand Down Expand Up @@ -256,8 +256,10 @@ def finetune_recipe(
name (str): Name of the fine-tuning run.
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning.
Allowed values: 'lora'/'dora'/'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training
efficiency. Default sequence length is 2048.
Returns:
run.Partial: Partial configuration for fine-tuning.
Expand Down Expand Up @@ -287,8 +289,8 @@ def finetune_recipe(
if peft_scheme is None or peft_scheme.lower() == 'none':
recipe.trainer.strategy.tensor_model_parallel_size = 2
recipe.optim.config.lr = 5e-6
elif peft_scheme.lower() == 'lora':
recipe.peft = run.Config(LoRA)
elif peft_scheme.lower() in ['lora', 'dora']:
recipe.peft = run.Config(PEFT_STR2CLS[peft_scheme.lower()])
recipe.optim.config.lr = 1e-4
else:
raise ValueError(f"Unrecognized peft scheme: {peft_scheme}")
Expand Down
14 changes: 8 additions & 6 deletions nemo/collections/llm/recipes/llama31_405b.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from nemo.collections.llm.gpt.data.mock import MockDataModule
from nemo.collections.llm.gpt.data.packed_sequence import PackedSequenceSpecs
from nemo.collections.llm.gpt.model.llama import Llama31Config405B, LlamaModel
from nemo.collections.llm.peft.lora import LoRA
from nemo.collections.llm.peft import PEFT_STR2CLS
from nemo.collections.llm.recipes.finetune_default import default_finetune_recipe
from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger
from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing
Expand Down Expand Up @@ -266,7 +266,8 @@ def finetune_recipe(
name (str): Name of the fine-tuning run.
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
peft_scheme (Optional[str]): Name of the peft scheme to use for finetuning. Allowed values: 'lora'/'none'/None.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning.
Allowed values: 'lora'/'dora'/'none'/None.
seq_length (int): Maximum number of tokens per microbatch.
packed_sequence (Optional[bool]): If true, fine-tuning sequences will be packed into batches up to the given
maximum seq_length for better efficiency. By default, this value equals performance_mode.
Expand Down Expand Up @@ -296,7 +297,7 @@ def finetune_recipe(
if num_nodes is None:
if peft_scheme is None or peft_scheme.lower() == 'none':
num_nodes = 12
elif peft_scheme.lower() == 'lora':
elif peft_scheme.lower() in ['lora', 'dora']:
num_nodes = 3

recipe = default_finetune_recipe(
Expand All @@ -307,8 +308,8 @@ def finetune_recipe(
recipe.trainer.strategy.pipeline_model_parallel_size = 14
recipe.data.global_batch_size = 6
recipe.optim.config.lr = 5e-6
elif peft_scheme.lower() == 'lora':
recipe.peft = run.Config(LoRA)
elif peft_scheme.lower() in ['lora', 'dora']:
recipe.peft = run.Config(PEFT_STR2CLS[peft_scheme.lower()])
recipe.peft.dim = 16
recipe.peft.alpha = 32
recipe.optim.config.use_distributed_optimizer = False
Expand Down Expand Up @@ -348,7 +349,8 @@ def finetune_performance_optimizations(
Args:
recipe (run.Partial): Base fine-tuning recipe to which performance optimizations will be added
peft_scheme (str): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning.
Allowed values: 'lora'/'dora'/'none'/None.
Returns:
run.Partial: Partial configuration for performance-optimized fine-tuning.
Expand Down
Loading

0 comments on commit ebc6a88

Please sign in to comment.