NVIDIA · cuichenx · Nov 7, 2023 · Nov 1, 2023 · Nov 1, 2023 · Nov 1, 2023
diff --git a/scripts/nlp_language_modeling/convert_nemo_gpt_to_mcore.py b/scripts/nlp_language_modeling/convert_nemo_gpt_to_mcore.py
@@ -38,15 +38,15 @@
 
 Then, run this conversion script:
 python convert_nemo_gpt_to_mcore.py \
- --in-file <path to extracted, TP1 PP1 legacy checkpoint folder> \
- --out-file <path to output nemo ile>
+ --in-folder <path to extracted, TP1 PP1 legacy checkpoint folder> \
+ --out-file <path to output nemo file>
 """
 
 
 def get_args():
     parser = ArgumentParser()
     parser.add_argument(
-        "--in-file", type=str, default=None, required=True, help="Path to extracted, TP1 PP1 NeMo GPT checkpoint.",
+        "--in-folder", type=str, default=None, required=True, help="Path to extracted, TP1 PP1 NeMo GPT checkpoint.",
     )
     parser.add_argument(
         "--out-file", type=str, default=None, required=True, help="Path to output mcore weights file (ends in .nemo)."
@@ -57,6 +57,11 @@ def get_args():
         help="Load model in cpu only. Useful if the model cannot fit in GPU memory, "
         "but this option makes the conversion script significantly slower.",
     )
+    parser.add_argument(
+        "--overwrite",
+        action="store_true",
+        help="Run conversion again and overwrite output file when the output file already exists",
+    )
     args = parser.parse_args()
     return args
 
@@ -179,6 +184,7 @@ def restore_model(nemo_file, cpu_only=False):
 def convert(input_nemo_file, output_nemo_file, skip_if_output_exists=True, cpu_only=False):
     if skip_if_output_exists and os.path.exists(output_nemo_file):
         logging.info(f"Output file already exists ({output_nemo_file}), skipping conversion...")
+        logging.info("If you want to overwrite the output file, please run with --overwrite flag")
         return
     nemo_model = restore_model(input_nemo_file, cpu_only=cpu_only)
 
@@ -193,6 +199,8 @@ def convert(input_nemo_file, output_nemo_file, skip_if_output_exists=True, cpu_o
                 mcore_state_dict[mcore_param] = torch.cat(
                     [nemo_state_dict[nemo_param], nemo_state_dict[second_param]], dim=0
                 )
+            else:
+                mcore_state_dict[mcore_param] = nemo_state_dict[nemo_param]
         else:
             mcore_state_dict[mcore_param] = nemo_state_dict[nemo_param]
 
@@ -205,7 +213,7 @@ def convert(input_nemo_file, output_nemo_file, skip_if_output_exists=True, cpu_o
 
     mcore_model.cfg.use_cpu_initialization = False
     mcore_model.save_to(output_nemo_file)
-    logging.info(f"Done. Model saved to {output_nemo_file}")
+    logging.info(f"✅ Done. Model saved to {output_nemo_file}")
 
 
 def run_sanity_checks(nemo_file, mcore_file, cpu_only=False):
@@ -239,7 +247,8 @@ def run_sanity_checks(nemo_file, mcore_file, cpu_only=False):
                 # linear_fc1.weight should map to concat(dense_h_to_4h.weight, dense_h_to_4h_2.weight)
                 # but build_key_mapping only maps it to dense_h_to_4h.weight, so we handle the concat here.
                 second_param = nemo_param.replace("dense_h_to_4h.weight", "dense_h_to_4h_2.weight")
-                nemo_weight = torch.cat([nemo_weight, nemo_state_dict.pop(second_param)])
+                if second_param in nemo_state_dict:
+                    nemo_weight = torch.cat([nemo_weight, nemo_state_dict.pop(second_param)])
             assert torch.allclose(mcore_weight, nemo_weight), f"❌ parameter {mcore_param} does not match"
         except KeyError:
             buffers = [k for k, v in mcore_model.named_buffers()]
@@ -261,11 +270,21 @@ def run_sanity_checks(nemo_file, mcore_file, cpu_only=False):
 if __name__ == '__main__':
     args = get_args()
 
-    input_nemo_file = args.in_file
+    input_nemo_file = args.in_folder
     output_nemo_file = args.out_file
     cpu_only = args.cpu_only
+    overwrite = args.overwrite
 
     os.makedirs(os.path.dirname(output_nemo_file), exist_ok=True)
-    convert(input_nemo_file, output_nemo_file, skip_if_output_exists=True, cpu_only=cpu_only)
+    try:
+        convert(input_nemo_file, output_nemo_file, skip_if_output_exists=not overwrite, cpu_only=cpu_only)
+    except torch.cuda.OutOfMemoryError:
+        logging.error("Could not convert due to torch.cuda.OutOfMemoryError.")
+        logging.error("Please run the script with --cpu-only flag")
+        exit(1)
     torch.cuda.empty_cache()
-    run_sanity_checks(input_nemo_file, output_nemo_file, cpu_only=cpu_only)
+    try:
+        run_sanity_checks(input_nemo_file, output_nemo_file, cpu_only=cpu_only)
+    except torch.cuda.OutOfMemoryError:
+        logging.info("✅ Conversion was successful, but could not run sanity check due to torch.cuda.OutOfMemoryError.")
+        logging.info("Please run the script with the same command again to run sanity check.")