From 3ddd9cdacfda4612c261da4afa7d8e9dd2798ce8 Mon Sep 17 00:00:00 2001 From: Yi Dong Date: Sat, 7 Oct 2023 01:13:37 +0000 Subject: [PATCH] fix non Signed-off-by: Yi Dong --- .../nlp/data/language_modeling/megatron/gpt_sft_dataset.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py index 36efb8997cab..9c6e50f5e43f 100644 --- a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py +++ b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py @@ -107,7 +107,8 @@ def __init__( "end_of_turn": "\n", "end_of_name": "\n", } - self.special_tokens = special_tokens + else: + self.special_tokens = special_tokens if hf_dataset: self.indexed_dataset = load_dataset(