From c37d2a7f4b0138a1c38d7ab33488e40e8170c7ab Mon Sep 17 00:00:00 2001
From: Selvaraj Anandaraj <selvaraja@login-eos01.eos.clusters.nvidia.com>
Date: Tue, 9 Jan 2024 23:19:18 -0800
Subject: [PATCH 1/3] Added sample cpu_offloading switch to YAML

Signed-off-by: Selvaraj Anandaraj <selvaraja@login-eos01.eos.clusters.nvidia.com>
---
 .../nlp/language_modeling/conf/megatron_gpt_config.yaml     | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
index 32cab48a68c8..0253bfc52f5f 100755
--- a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
+++ b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
@@ -194,6 +194,12 @@ model:
   ## Flash Attention
   use_flash_attention: False # Use flash attention in self-attention module, this config does nothing when transformer_engine=True
 
+  ##Offloading Activations/Weights to CPU
+  cpu_offloading: False
+  cpu_offloading_num_layers: 1
+  cpu_offloading_activations: True
+  cpu_offloading_weights: True
+
   ## Network
   sharp: False # Enable the use of SHARP for NCCL data-parallel communications. This is going to be ignored if the network doesn't support SHARP.
   

From 8ddfeeb349a7c8eb2912da67062f33a28b5b428d Mon Sep 17 00:00:00 2001
From: Selvaraj Anandaraj <selvaraja@login-eos01.eos.clusters.nvidia.com>
Date: Tue, 9 Jan 2024 23:39:56 -0800
Subject: [PATCH 2/3] Added comments

Signed-off-by: Selvaraj Anandaraj <selvaraja@login-eos01.eos.clusters.nvidia.com>
---
 examples/nlp/language_modeling/conf/megatron_gpt_config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
index 0253bfc52f5f..bbfe07539b83 100755
--- a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
+++ b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
@@ -196,7 +196,7 @@ model:
 
   ##Offloading Activations/Weights to CPU
   cpu_offloading: False
-  cpu_offloading_num_layers: 1
+  cpu_offloading_num_layers: ${subtract:${num_layers},1} #This value should be between [1,num_layers-1] as we don't want to offload the final layer's activations and expose any offloading duration for the final layer
   cpu_offloading_activations: True
   cpu_offloading_weights: True
 

From 3065ddfe4e727e27f2c6d24b9f7a18d8b313f62c Mon Sep 17 00:00:00 2001
From: Selvaraj Anandaraj <selvaraja@login-eos01.eos.clusters.nvidia.com>
Date: Tue, 23 Jan 2024 13:26:04 -0800
Subject: [PATCH 3/3] Removed arithmetic op

Signed-off-by: Selvaraj Anandaraj <selvaraja@login-eos01.eos.clusters.nvidia.com>
---
 examples/nlp/language_modeling/conf/megatron_gpt_config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
index bbfe07539b83..f5ef0eaf27ff 100755
--- a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
+++ b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
@@ -196,7 +196,7 @@ model:
 
   ##Offloading Activations/Weights to CPU
   cpu_offloading: False
-  cpu_offloading_num_layers: ${subtract:${num_layers},1} #This value should be between [1,num_layers-1] as we don't want to offload the final layer's activations and expose any offloading duration for the final layer
+  cpu_offloading_num_layers: 11 #This value should be between [1,num_layers-1] as we don't want to offload the final layer's activations and expose any offloading duration for the final layer
   cpu_offloading_activations: True
   cpu_offloading_weights: True