Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix mcore conversion bug #7846

Merged
merged 10 commits into from
Nov 7, 2023
35 changes: 27 additions & 8 deletions scripts/nlp_language_modeling/convert_nemo_gpt_to_mcore.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,15 @@

Then, run this conversion script:
python convert_nemo_gpt_to_mcore.py \
--in-file <path to extracted, TP1 PP1 legacy checkpoint folder> \
--out-file <path to output nemo ile>
--in-folder <path to extracted, TP1 PP1 legacy checkpoint folder> \
--out-file <path to output nemo file>
"""


def get_args():
parser = ArgumentParser()
parser.add_argument(
"--in-file", type=str, default=None, required=True, help="Path to extracted, TP1 PP1 NeMo GPT checkpoint.",
"--in-folder", type=str, default=None, required=True, help="Path to extracted, TP1 PP1 NeMo GPT checkpoint.",
)
parser.add_argument(
"--out-file", type=str, default=None, required=True, help="Path to output mcore weights file (ends in .nemo)."
Expand All @@ -57,6 +57,11 @@ def get_args():
help="Load model in cpu only. Useful if the model cannot fit in GPU memory, "
"but this option makes the conversion script significantly slower.",
)
parser.add_argument(
"--overwrite",
action="store_true",
help="Run conversion again and overwrite output file when the output file already exists",
)
args = parser.parse_args()
return args

Expand Down Expand Up @@ -179,6 +184,7 @@ def restore_model(nemo_file, cpu_only=False):
def convert(input_nemo_file, output_nemo_file, skip_if_output_exists=True, cpu_only=False):
if skip_if_output_exists and os.path.exists(output_nemo_file):
logging.info(f"Output file already exists ({output_nemo_file}), skipping conversion...")
logging.info("If you want to overwrite the output file, please run with --overwrite flag")
return
nemo_model = restore_model(input_nemo_file, cpu_only=cpu_only)

Expand All @@ -193,6 +199,8 @@ def convert(input_nemo_file, output_nemo_file, skip_if_output_exists=True, cpu_o
mcore_state_dict[mcore_param] = torch.cat(
[nemo_state_dict[nemo_param], nemo_state_dict[second_param]], dim=0
)
else:
mcore_state_dict[mcore_param] = nemo_state_dict[nemo_param]
else:
mcore_state_dict[mcore_param] = nemo_state_dict[nemo_param]

Expand All @@ -205,7 +213,7 @@ def convert(input_nemo_file, output_nemo_file, skip_if_output_exists=True, cpu_o

mcore_model.cfg.use_cpu_initialization = False
mcore_model.save_to(output_nemo_file)
logging.info(f"Done. Model saved to {output_nemo_file}")
logging.info(f"Done. Model saved to {output_nemo_file}")


def run_sanity_checks(nemo_file, mcore_file, cpu_only=False):
Expand Down Expand Up @@ -239,7 +247,8 @@ def run_sanity_checks(nemo_file, mcore_file, cpu_only=False):
# linear_fc1.weight should map to concat(dense_h_to_4h.weight, dense_h_to_4h_2.weight)
# but build_key_mapping only maps it to dense_h_to_4h.weight, so we handle the concat here.
second_param = nemo_param.replace("dense_h_to_4h.weight", "dense_h_to_4h_2.weight")
nemo_weight = torch.cat([nemo_weight, nemo_state_dict.pop(second_param)])
if second_param in nemo_state_dict:
nemo_weight = torch.cat([nemo_weight, nemo_state_dict.pop(second_param)])
assert torch.allclose(mcore_weight, nemo_weight), f"❌ parameter {mcore_param} does not match"
except KeyError:
buffers = [k for k, v in mcore_model.named_buffers()]
Expand All @@ -261,11 +270,21 @@ def run_sanity_checks(nemo_file, mcore_file, cpu_only=False):
if __name__ == '__main__':
args = get_args()

input_nemo_file = args.in_file
input_nemo_file = args.in_folder
output_nemo_file = args.out_file
cpu_only = args.cpu_only
overwrite = args.overwrite

os.makedirs(os.path.dirname(output_nemo_file), exist_ok=True)
convert(input_nemo_file, output_nemo_file, skip_if_output_exists=True, cpu_only=cpu_only)
try:
convert(input_nemo_file, output_nemo_file, skip_if_output_exists=not overwrite, cpu_only=cpu_only)
except torch.cuda.OutOfMemoryError:
logging.error("Could not convert due to torch.cuda.OutOfMemoryError.")
logging.error("Please run the script with --cpu-only flag")
exit(1)
torch.cuda.empty_cache()
run_sanity_checks(input_nemo_file, output_nemo_file, cpu_only=cpu_only)
try:
run_sanity_checks(input_nemo_file, output_nemo_file, cpu_only=cpu_only)
except torch.cuda.OutOfMemoryError:
logging.info("✅ Conversion was successful, but could not run sanity check due to torch.cuda.OutOfMemoryError.")
logging.info("Please run the script with the same command again to run sanity check.")
Loading