From 715e1ac5e3f5db1c5db1df436a69c3ce433b492f Mon Sep 17 00:00:00 2001 From: iMountTai <2506700016@qq.com> Date: Thu, 10 Aug 2023 21:04:47 +0800 Subject: [PATCH 01/12] update text-generation-webui support --- README.md | 2 +- README_EN.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c4a3fa6..0bbe317 100644 --- a/README.md +++ b/README.md @@ -131,7 +131,7 @@ | [**🤗Transformers**](https://github.com/huggingface/transformers) | 原生transformers推理接口 | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/inference_with_transformers_zh) | | [**Colab Demo**](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) | 在Colab中启动交互界面 | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | [link](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) | | [**仿OpenAI API调用**](https://platform.openai.com/docs/api-reference) | 仿OpenAI API接口的服务器Demo | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/api_calls_zh) | -| [**text-generation-webui**](https://github.com/oobabooga/text-generation-webui) | 前端Web UI界面的部署方式 | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/text-generation-webui_zh) | +| [**text-generation-webui**](https://github.com/oobabooga/text-generation-webui) | 前端Web UI界面的部署方式 | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/text-generation-webui_zh) | | [**LangChain**](https://github.com/hwchase17/langchain) | 适合二次开发的大模型应用开源框架 | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/langchain_zh) | | [**privateGPT**](https://github.com/imartinez/privateGPT) | 基于LangChain的多文档本地问答框架 | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/privategpt_zh) | diff --git a/README_EN.md b/README_EN.md index 14f2391..29dea92 100644 --- a/README_EN.md +++ b/README_EN.md @@ -125,7 +125,7 @@ The models in this project mainly support the following quantization, inference, | [**🤗Transformers**](https://github.com/huggingface/transformers) | Native transformers inference interface | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/inference_with_transformers_en) | | [**Colab Demo**](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) | Running a Gradio web demo in Colab | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | [link](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) | | [**OpenAI API Calls**](https://platform.openai.com/docs/api-reference) | A server that implements OpenAI API | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/api_calls_en) | -| [**text-generation-webui**](https://github.com/oobabooga/text-generation-webui) | A tool for deploying model as a web UI | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/text-generation-webui_en) | +| [**text-generation-webui**](https://github.com/oobabooga/text-generation-webui) | A tool for deploying model as a web UI | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/text-generation-webui_en) | | [**LangChain**](https://github.com/hwchase17/langchain) | LLM application development framework, suitable for secondary development | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/langchain_en) | | [**privateGPT**](https://github.com/imartinez/privateGPT) | LangChain-based multi-document QA framework | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/privategpt_en) | From e9a29f0356f60848029ca2bd8e8f2d6ffa9fccc2 Mon Sep 17 00:00:00 2001 From: iMountTai <2506700016@qq.com> Date: Thu, 17 Aug 2023 11:38:35 +0800 Subject: [PATCH 02/12] fix data cache_path --- scripts/training/build_dataset.py | 2 +- scripts/training/run_clm_pt_with_peft.py | 4 +++- scripts/training/run_clm_sft_with_peft.py | 4 +++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/scripts/training/build_dataset.py b/scripts/training/build_dataset.py index 953a6d2..9fd1fdb 100644 --- a/scripts/training/build_dataset.py +++ b/scripts/training/build_dataset.py @@ -62,7 +62,7 @@ def tokenization(examples): if data_cache_dir is None: data_cache_dir = str(os.path.dirname(file)) - cache_path = os.path.join(data_cache_dir,os.path.basename(file).split('.')[0]) + cache_path = os.path.join(data_cache_dir,os.path.basename(file).split('.')[0]+f"_{max_seq_length}") os.makedirs(cache_path, exist_ok=True) try: processed_dataset = datasets.load_from_disk(cache_path) diff --git a/scripts/training/run_clm_pt_with_peft.py b/scripts/training/run_clm_pt_with_peft.py index 96f403c..7c18d64 100644 --- a/scripts/training/run_clm_pt_with_peft.py +++ b/scripts/training/run_clm_pt_with_peft.py @@ -528,6 +528,7 @@ def group_texts(examples): if model_args.torch_dtype in ["auto", None] else getattr(torch, model_args.torch_dtype) ) + device_map = {"":int(os.environ.get("LOCAL_RANK") or 0)} model = LlamaForCausalLM.from_pretrained( model_args.model_name_or_path, from_tf=bool(".ckpt" in model_args.model_name_or_path), @@ -536,7 +537,8 @@ def group_texts(examples): revision=model_args.model_revision, use_auth_token=True if model_args.use_auth_token else None, torch_dtype=torch_dtype, - low_cpu_mem_usage=True + low_cpu_mem_usage=True, + device_map=device_map ) else: model = AutoModelForCausalLM.from_config(config) diff --git a/scripts/training/run_clm_sft_with_peft.py b/scripts/training/run_clm_sft_with_peft.py index 4daf208..f0fbd62 100644 --- a/scripts/training/run_clm_sft_with_peft.py +++ b/scripts/training/run_clm_sft_with_peft.py @@ -337,6 +337,7 @@ def main(): if model_args.torch_dtype in ["auto", None] else getattr(torch, model_args.torch_dtype) ) + device_map = {"":int(os.environ.get("LOCAL_RANK") or 0)} model = LlamaForCausalLM.from_pretrained( model_args.model_name_or_path, from_tf=bool(".ckpt" in model_args.model_name_or_path), @@ -345,7 +346,8 @@ def main(): revision=model_args.model_revision, use_auth_token=True if model_args.use_auth_token else None, torch_dtype=torch_dtype, - low_cpu_mem_usage=True + low_cpu_mem_usage=True, + device_map=device_map ) else: model = AutoModelForCausalLM.from_config(config) From fa7707b2ad1861ce83b1a0254104d30f9e81fa8c Mon Sep 17 00:00:00 2001 From: iMountTai <2506700016@qq.com> Date: Thu, 17 Aug 2023 12:43:19 +0800 Subject: [PATCH 03/12] add device_map for training --- README.md | 1 + README_EN.md | 1 + scripts/training/run_clm_pt_with_peft.py | 2 +- scripts/training/run_clm_sft_with_peft.py | 3 +-- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 8f1adcb..a76f21c 100644 --- a/README.md +++ b/README.md @@ -249,6 +249,7 @@ 问题4:为什么不对模型做全量预训练而是用LoRA? 问题5:二代模型支不支持某些支持一代LLaMA的工具? 问题6:Chinese-Alpaca-2是Llama-2-Chat训练得到的吗? +问题7:为什么24G显存微调chinese-alpaca-2-7b OOM? ``` diff --git a/README_EN.md b/README_EN.md index 930faf8..4ac2c90 100644 --- a/README_EN.md +++ b/README_EN.md @@ -232,6 +232,7 @@ Question 3: Do you accept third-party Pull Requests? Question 4: Why not perform full pre-training but use LoRA instead? Question 5: Does Llama-2 series support tools that support the first-gen LLaMA? Question 6: Is Chinese-Alpaca-2 trained from Llama-2-Chat? +Question 7: Why does training with 24GB VRAM lead to an OOM error when fine-tuning chinese-alpaca-2-7b? ``` For specific questions and answers, please refer to the project >>> [📚 GitHub Wiki](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/faq_en) diff --git a/scripts/training/run_clm_pt_with_peft.py b/scripts/training/run_clm_pt_with_peft.py index 7c18d64..68f9032 100644 --- a/scripts/training/run_clm_pt_with_peft.py +++ b/scripts/training/run_clm_pt_with_peft.py @@ -557,7 +557,7 @@ def group_texts(examples): if training_args.peft_path is not None: logger.info("Peft from pre-trained model") - model = PeftModel.from_pretrained(model, training_args.peft_path) + model = PeftModel.from_pretrained(model, training_args.peft_path, device_map=device_map) else: logger.info("Init new peft model") target_modules = training_args.trainable.split(',') diff --git a/scripts/training/run_clm_sft_with_peft.py b/scripts/training/run_clm_sft_with_peft.py index f0fbd62..b6524fb 100644 --- a/scripts/training/run_clm_sft_with_peft.py +++ b/scripts/training/run_clm_sft_with_peft.py @@ -51,7 +51,6 @@ from peft import LoraConfig, TaskType, get_peft_model, PeftModel, get_peft_model_state_dict from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR -IGNORE_INDEX = -100 require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt") @@ -363,7 +362,7 @@ def main(): if training_args.peft_path is not None: logger.info("Peft from pre-trained model") - model = PeftModel.from_pretrained(model, training_args.peft_path) + model = PeftModel.from_pretrained(model, training_args.peft_path, device_map=device_map) else: logger.info("Init new peft model") target_modules = training_args.trainable.split(',') From e3731fb67a35c10efc17c0b9d8b29bb45a15043c Mon Sep 17 00:00:00 2001 From: Xin Yao <35353688+iMountTai@users.noreply.github.com> Date: Thu, 17 Aug 2023 13:01:56 +0800 Subject: [PATCH 04/12] Update README.md --- README.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/README.md b/README.md index a76f21c..8444263 100644 --- a/README.md +++ b/README.md @@ -144,9 +144,6 @@ | [**🤗Transformers**](https://github.com/huggingface/transformers) | 原生transformers推理接口 | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/inference_with_transformers_zh) | | [**Colab Demo**](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) | 在Colab中启动交互界面 | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | [link](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) | | [**仿OpenAI API调用**](https://platform.openai.com/docs/api-reference) | 仿OpenAI API接口的服务器Demo | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/api_calls_zh) | -<<<<<<< HEAD -| [**text-generation-webui**](https://github.com/oobabooga/text-generation-webui) | 前端Web UI界面的部署方式 | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/text-generation-webui_zh) | -======= | [**text-generation-webui**](https://github.com/oobabooga/text-generation-webui) | 前端Web UI界面的部署方式 | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/text-generation-webui_zh) | >>>>>>> fced4ce77289f8104e7c434e70ad56540b854dcf | [**LangChain**](https://github.com/hwchase17/langchain) | 适合二次开发的大模型应用开源框架 | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/langchain_zh) | From 94d00603842900d992efdec27f55f470d739bbf3 Mon Sep 17 00:00:00 2001 From: Xin Yao <35353688+iMountTai@users.noreply.github.com> Date: Thu, 17 Aug 2023 13:02:37 +0800 Subject: [PATCH 05/12] Update README.md --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 8444263..7b7b083 100644 --- a/README.md +++ b/README.md @@ -145,7 +145,6 @@ | [**Colab Demo**](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) | 在Colab中启动交互界面 | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | [link](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) | | [**仿OpenAI API调用**](https://platform.openai.com/docs/api-reference) | 仿OpenAI API接口的服务器Demo | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/api_calls_zh) | | [**text-generation-webui**](https://github.com/oobabooga/text-generation-webui) | 前端Web UI界面的部署方式 | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/text-generation-webui_zh) | ->>>>>>> fced4ce77289f8104e7c434e70ad56540b854dcf | [**LangChain**](https://github.com/hwchase17/langchain) | 适合二次开发的大模型应用开源框架 | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/langchain_zh) | | [**privateGPT**](https://github.com/imartinez/privateGPT) | 基于LangChain的多文档本地问答框架 | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/privategpt_zh) | From 1d668b7484c0417c3799f95100edf89966bd423e Mon Sep 17 00:00:00 2001 From: Xin Yao <35353688+iMountTai@users.noreply.github.com> Date: Thu, 17 Aug 2023 13:03:21 +0800 Subject: [PATCH 06/12] Update README_EN.md --- README_EN.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/README_EN.md b/README_EN.md index 4ac2c90..51cee13 100644 --- a/README_EN.md +++ b/README_EN.md @@ -138,11 +138,7 @@ The models in this project mainly support the following quantization, inference, | [**🤗Transformers**](https://github.com/huggingface/transformers) | Native transformers inference interface | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/inference_with_transformers_en) | | [**Colab Demo**](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) | Running a Gradio web demo in Colab | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | [link](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) | | [**OpenAI API Calls**](https://platform.openai.com/docs/api-reference) | A server that implements OpenAI API | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/api_calls_en) | -<<<<<<< HEAD -| [**text-generation-webui**](https://github.com/oobabooga/text-generation-webui) | A tool for deploying model as a web UI | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/text-generation-webui_en) | -======= | [**text-generation-webui**](https://github.com/oobabooga/text-generation-webui) | A tool for deploying model as a web UI | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/text-generation-webui_en) | ->>>>>>> fced4ce77289f8104e7c434e70ad56540b854dcf | [**LangChain**](https://github.com/hwchase17/langchain) | LLM application development framework, suitable for secondary development | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/langchain_en) | | [**privateGPT**](https://github.com/imartinez/privateGPT) | LangChain-based multi-document QA framework | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/privategpt_en) | From 8ef6dd8dc6cd673d503cf5b4b75a90a4616ccca3 Mon Sep 17 00:00:00 2001 From: iMountTai <2506700016@qq.com> Date: Thu, 17 Aug 2023 14:11:36 +0800 Subject: [PATCH 07/12] delete unused params --- scripts/training/run_clm_pt_with_peft.py | 44 ++++++++++------------- scripts/training/run_clm_sft_with_peft.py | 41 ++++++++++----------- scripts/training/run_pt.sh | 6 ++-- scripts/training/run_sft.sh | 4 +-- 4 files changed, 41 insertions(+), 54 deletions(-) diff --git a/scripts/training/run_clm_pt_with_peft.py b/scripts/training/run_clm_pt_with_peft.py index 68f9032..3ca05c3 100644 --- a/scripts/training/run_clm_pt_with_peft.py +++ b/scripts/training/run_clm_pt_with_peft.py @@ -467,13 +467,13 @@ def group_texts(examples): for idx, file in enumerate(files): data_file = os.path.join(path, file) filename = ''.join(file.split(".")[:-1]) - cache_path = os.path.join(data_args.data_cache_dir, filename) + cache_path = os.path.join(data_args.data_cache_dir, filename+f"_{block_size}") os.makedirs(cache_path, exist_ok=True) try: processed_dataset = datasets.load_from_disk(cache_path, keep_in_memory=False) logger.info(f'training datasets-{filename} has been loaded from disk') except Exception: - cache_dir = os.path.join(data_args.data_cache_dir, filename+"_text") + cache_dir = os.path.join(data_args.data_cache_dir, filename+f"_text_{block_size}") os.makedirs(cache_dir, exist_ok=True) raw_dataset = load_dataset("text", data_files=data_file, cache_dir=cache_dir, keep_in_memory=False) logger.info(f"{file} has been loaded") @@ -503,7 +503,6 @@ def group_texts(examples): else: assert lm_datasets.features.type == processed_dataset["train"].features.type lm_datasets = concatenate_datasets([lm_datasets, processed_dataset["train"]]) - lm_datasets = lm_datasets.train_test_split(test_size = data_args.validation_split_percentage) if training_args.do_train: @@ -522,28 +521,23 @@ def group_texts(examples): logger.info(f"Num eval_samples {len(eval_dataset)}") logger.info("Evaluation example:") logger.info(tokenizer.decode(eval_dataset[0]['input_ids'])) - if model_args.model_name_or_path: - torch_dtype = ( - model_args.torch_dtype - if model_args.torch_dtype in ["auto", None] - else getattr(torch, model_args.torch_dtype) - ) - device_map = {"":int(os.environ.get("LOCAL_RANK") or 0)} - model = LlamaForCausalLM.from_pretrained( - model_args.model_name_or_path, - from_tf=bool(".ckpt" in model_args.model_name_or_path), - config=config, - cache_dir=model_args.cache_dir, - revision=model_args.model_revision, - use_auth_token=True if model_args.use_auth_token else None, - torch_dtype=torch_dtype, - low_cpu_mem_usage=True, - device_map=device_map - ) - else: - model = AutoModelForCausalLM.from_config(config) - n_params = sum({p.data_ptr(): p.numel() for p in model.parameters()}.values()) - logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params") + torch_dtype = ( + model_args.torch_dtype + if model_args.torch_dtype in ["auto", None] + else getattr(torch, model_args.torch_dtype) + ) + device_map = {"":int(os.environ.get("LOCAL_RANK") or 0)} + model = LlamaForCausalLM.from_pretrained( + model_args.model_name_or_path, + from_tf=bool(".ckpt" in model_args.model_name_or_path), + config=config, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + torch_dtype=torch_dtype, + low_cpu_mem_usage=True, + device_map=device_map + ) model_vocab_size = model.get_output_embeddings().weight.size(0) tokenizer_vocab_size = len(tokenizer) diff --git a/scripts/training/run_clm_sft_with_peft.py b/scripts/training/run_clm_sft_with_peft.py index b6524fb..d71c411 100644 --- a/scripts/training/run_clm_sft_with_peft.py +++ b/scripts/training/run_clm_sft_with_peft.py @@ -294,7 +294,7 @@ def main(): "You can do it from another script, save it, and load it from here, using --tokenizer_name." ) - if (len(tokenizer))!=55296: + if (len(tokenizer)) != 55296: raise ValueError(f"The vocab size of the tokenizer should be 55296, but found {len(tokenizer)}.\n" "Please use Chinese-LLaMA-2 tokenizer.") @@ -330,28 +330,23 @@ def main(): logger.info("Evaluation example:") logger.info(tokenizer.decode(eval_dataset[0]['input_ids'])) - if model_args.model_name_or_path: - torch_dtype = ( - model_args.torch_dtype - if model_args.torch_dtype in ["auto", None] - else getattr(torch, model_args.torch_dtype) - ) - device_map = {"":int(os.environ.get("LOCAL_RANK") or 0)} - model = LlamaForCausalLM.from_pretrained( - model_args.model_name_or_path, - from_tf=bool(".ckpt" in model_args.model_name_or_path), - config=config, - cache_dir=model_args.cache_dir, - revision=model_args.model_revision, - use_auth_token=True if model_args.use_auth_token else None, - torch_dtype=torch_dtype, - low_cpu_mem_usage=True, - device_map=device_map - ) - else: - model = AutoModelForCausalLM.from_config(config) - n_params = sum({p.data_ptr(): p.numel() for p in model.parameters()}.values()) - logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params") + torch_dtype = ( + model_args.torch_dtype + if model_args.torch_dtype in ["auto", None] + else getattr(torch, model_args.torch_dtype) + ) + device_map = {"":int(os.environ.get("LOCAL_RANK") or 0)} + model = LlamaForCausalLM.from_pretrained( + model_args.model_name_or_path, + from_tf=bool(".ckpt" in model_args.model_name_or_path), + config=config, + cache_dir=model_args.cache_dir, + revision=model_args.model_revision, + use_auth_token=True if model_args.use_auth_token else None, + torch_dtype=torch_dtype, + low_cpu_mem_usage=True, + device_map=device_map + ) model_vocab_size = model.get_input_embeddings().weight.shape[0] logger.info(f"Model vocab size: {model_vocab_size}") diff --git a/scripts/training/run_pt.sh b/scripts/training/run_pt.sh index b409eac..56fc540 100644 --- a/scripts/training/run_pt.sh +++ b/scripts/training/run_pt.sh @@ -10,8 +10,8 @@ chinese_tokenizer_path=path/to/chinese/llama-2/tokenizer/dir dataset_dir=path/to/pt/data/dir data_cache=temp_data_cache_dir per_device_train_batch_size=1 -per_device_eval_batch_size=1 gradient_accumulation_steps=8 +block_size=512 output_dir=output_dir deepspeed_config_file=ds_zero2_no_offload.json @@ -22,9 +22,7 @@ torchrun --nnodes 1 --nproc_per_node 1 run_clm_pt_with_peft.py \ --tokenizer_name_or_path ${chinese_tokenizer_path} \ --dataset_dir ${dataset_dir} \ --data_cache_dir ${data_cache} \ - --validation_split_percentage 0.001 \ --per_device_train_batch_size ${per_device_train_batch_size} \ - --per_device_eval_batch_size ${per_device_eval_batch_size} \ --do_train \ --seed $RANDOM \ --fp16 \ @@ -40,7 +38,7 @@ torchrun --nnodes 1 --nproc_per_node 1 run_clm_pt_with_peft.py \ --save_steps 200 \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --preprocessing_num_workers 8 \ - --block_size 1024 \ + --block_size ${block_size} \ --output_dir ${output_dir} \ --overwrite_output_dir \ --ddp_timeout 30000 \ diff --git a/scripts/training/run_sft.sh b/scripts/training/run_sft.sh index 0c31a8b..73a5ead 100644 --- a/scripts/training/run_sft.sh +++ b/scripts/training/run_sft.sh @@ -11,6 +11,7 @@ dataset_dir=path/to/sft/data/dir per_device_train_batch_size=1 per_device_eval_batch_size=1 gradient_accumulation_steps=8 +max_seq_length=512 output_dir=output_dir peft_model=path/to/peft/model/dir validation_file=validation_file_name @@ -22,7 +23,6 @@ torchrun --nnodes 1 --nproc_per_node 1 run_clm_sft_with_peft.py \ --model_name_or_path ${pretrained_model} \ --tokenizer_name_or_path ${chinese_tokenizer_path} \ --dataset_dir ${dataset_dir} \ - --validation_split_percentage 0.001 \ --per_device_train_batch_size ${per_device_train_batch_size} \ --per_device_eval_batch_size ${per_device_eval_batch_size} \ --do_train \ @@ -43,7 +43,7 @@ torchrun --nnodes 1 --nproc_per_node 1 run_clm_sft_with_peft.py \ --save_steps 200 \ --gradient_accumulation_steps ${gradient_accumulation_steps} \ --preprocessing_num_workers 8 \ - --max_seq_length 1024 \ + --max_seq_length ${max_seq_length} \ --output_dir ${output_dir} \ --overwrite_output_dir \ --ddp_timeout 30000 \ From 796bb874f7f312b474aef7c54204ce1364e0a2a8 Mon Sep 17 00:00:00 2001 From: iMountTai <2506700016@qq.com> Date: Thu, 17 Aug 2023 14:20:27 +0800 Subject: [PATCH 08/12] add use_cahce=false --- scripts/training/run_clm_pt_with_peft.py | 1 + scripts/training/run_clm_sft_with_peft.py | 1 + 2 files changed, 2 insertions(+) diff --git a/scripts/training/run_clm_pt_with_peft.py b/scripts/training/run_clm_pt_with_peft.py index 3ca05c3..cd36b7a 100644 --- a/scripts/training/run_clm_pt_with_peft.py +++ b/scripts/training/run_clm_pt_with_peft.py @@ -538,6 +538,7 @@ def group_texts(examples): low_cpu_mem_usage=True, device_map=device_map ) + model.config.use_cache = False model_vocab_size = model.get_output_embeddings().weight.size(0) tokenizer_vocab_size = len(tokenizer) diff --git a/scripts/training/run_clm_sft_with_peft.py b/scripts/training/run_clm_sft_with_peft.py index d71c411..fea0879 100644 --- a/scripts/training/run_clm_sft_with_peft.py +++ b/scripts/training/run_clm_sft_with_peft.py @@ -347,6 +347,7 @@ def main(): low_cpu_mem_usage=True, device_map=device_map ) + model.config.use_cache = False model_vocab_size = model.get_input_embeddings().weight.shape[0] logger.info(f"Model vocab size: {model_vocab_size}") From e16d943078a57838d3b3548b274515bafe7c5468 Mon Sep 17 00:00:00 2001 From: Xin Yao <35353688+iMountTai@users.noreply.github.com> Date: Thu, 17 Aug 2023 14:45:48 +0800 Subject: [PATCH 09/12] Update run_pt.sh --- scripts/training/run_pt.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/training/run_pt.sh b/scripts/training/run_pt.sh index 56fc540..40813a3 100644 --- a/scripts/training/run_pt.sh +++ b/scripts/training/run_pt.sh @@ -22,6 +22,7 @@ torchrun --nnodes 1 --nproc_per_node 1 run_clm_pt_with_peft.py \ --tokenizer_name_or_path ${chinese_tokenizer_path} \ --dataset_dir ${dataset_dir} \ --data_cache_dir ${data_cache} \ + --validation_split_percentage 0.001 \ --per_device_train_batch_size ${per_device_train_batch_size} \ --do_train \ --seed $RANDOM \ From 74b701015bf954c3d5ade6e819807054605e40e3 Mon Sep 17 00:00:00 2001 From: iMountTai <2506700016@qq.com> Date: Fri, 18 Aug 2023 11:48:22 +0800 Subject: [PATCH 10/12] delete modules_to_save --- scripts/training/run_pt.sh | 10 +++------- scripts/training/run_sft.sh | 12 +++--------- 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/scripts/training/run_pt.sh b/scripts/training/run_pt.sh index 56fc540..7c4a7dd 100644 --- a/scripts/training/run_pt.sh +++ b/scripts/training/run_pt.sh @@ -2,11 +2,10 @@ lr=2e-4 lora_rank=64 lora_alpha=128 lora_trainable="q_proj,v_proj,k_proj,o_proj,gate_proj,down_proj,up_proj" -modules_to_save="embed_tokens,lm_head" lora_dropout=0.05 -pretrained_model=path/to/hf/llama-2/dir -chinese_tokenizer_path=path/to/chinese/llama-2/tokenizer/dir +pretrained_model=path/to/hf/chinese-llama-2/dir +chinese_tokenizer_path=path/to/chinese/chinese-llama-2/tokenizer/dir dataset_dir=path/to/pt/data/dir data_cache=temp_data_cache_dir per_device_train_batch_size=1 @@ -46,8 +45,5 @@ torchrun --nnodes 1 --nproc_per_node 1 run_clm_pt_with_peft.py \ --lora_rank ${lora_rank} \ --lora_alpha ${lora_alpha} \ --trainable ${lora_trainable} \ - --modules_to_save ${modules_to_save} \ --lora_dropout ${lora_dropout} \ - --torch_dtype float16 \ - --gradient_checkpointing \ - --ddp_find_unused_parameters False + --torch_dtype float16 diff --git a/scripts/training/run_sft.sh b/scripts/training/run_sft.sh index 73a5ead..c180a42 100644 --- a/scripts/training/run_sft.sh +++ b/scripts/training/run_sft.sh @@ -2,18 +2,16 @@ lr=1e-4 lora_rank=64 lora_alpha=128 lora_trainable="q_proj,v_proj,k_proj,o_proj,gate_proj,down_proj,up_proj" -modules_to_save="embed_tokens,lm_head" lora_dropout=0.05 -pretrained_model=path/to/hf/llama-2/or/merged/llama-2/dir/or/model_id -chinese_tokenizer_path=path/to/chinese/llama-2/tokenizer/dir +pretrained_model=path/to/hf/chinese-alpaca-2/dir/or/model_id +chinese_tokenizer_path=path/to/chinese/chinese-alpaca-2/tokenizer/dir dataset_dir=path/to/sft/data/dir per_device_train_batch_size=1 per_device_eval_batch_size=1 gradient_accumulation_steps=8 max_seq_length=512 output_dir=output_dir -peft_model=path/to/peft/model/dir validation_file=validation_file_name deepspeed_config_file=ds_zero2_no_offload.json @@ -51,10 +49,6 @@ torchrun --nnodes 1 --nproc_per_node 1 run_clm_sft_with_peft.py \ --lora_rank ${lora_rank} \ --lora_alpha ${lora_alpha} \ --trainable ${lora_trainable} \ - --modules_to_save ${modules_to_save} \ --lora_dropout ${lora_dropout} \ --torch_dtype float16 \ - --validation_file ${validation_file} \ - --peft_path ${peft_model} \ - --gradient_checkpointing \ - --ddp_find_unused_parameters False + --validation_file ${validation_file} From d6ba233386d6c1b44d0b903c55cc4e7ffbb7c5ca Mon Sep 17 00:00:00 2001 From: iMountTai <2506700016@qq.com> Date: Wed, 23 Aug 2023 00:37:57 +0800 Subject: [PATCH 11/12] add some suggestions fot training --- scripts/training/run_pt.sh | 6 +++++- scripts/training/run_sft.sh | 8 ++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/scripts/training/run_pt.sh b/scripts/training/run_pt.sh index 8852ea4..e103ab5 100644 --- a/scripts/training/run_pt.sh +++ b/scripts/training/run_pt.sh @@ -1,10 +1,13 @@ +# 运行脚本前请仔细阅读wiki(https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/pt_scripts_zh) +# Read the wiki(https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/pt_scripts_zh) carefully before running the script lr=2e-4 lora_rank=64 lora_alpha=128 lora_trainable="q_proj,v_proj,k_proj,o_proj,gate_proj,down_proj,up_proj" +modules_to_save="embed_tokens,lm_head" lora_dropout=0.05 -pretrained_model=path/to/hf/chinese-llama-2/dir +pretrained_model=path/to/hf/llama-2/dir chinese_tokenizer_path=path/to/chinese/chinese-llama-2/tokenizer/dir dataset_dir=path/to/pt/data/dir data_cache=temp_data_cache_dir @@ -47,4 +50,5 @@ torchrun --nnodes 1 --nproc_per_node 1 run_clm_pt_with_peft.py \ --lora_alpha ${lora_alpha} \ --trainable ${lora_trainable} \ --lora_dropout ${lora_dropout} \ + --modules_to_save ${modules_to_save} \ --torch_dtype float16 diff --git a/scripts/training/run_sft.sh b/scripts/training/run_sft.sh index c180a42..a74986d 100644 --- a/scripts/training/run_sft.sh +++ b/scripts/training/run_sft.sh @@ -1,11 +1,14 @@ +# 运行脚本前请仔细阅读wiki(https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/sft_scripts_zh) +# Read the wiki(https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/sft_scripts_zh) carefully before running the script lr=1e-4 lora_rank=64 lora_alpha=128 lora_trainable="q_proj,v_proj,k_proj,o_proj,gate_proj,down_proj,up_proj" +modules_to_save="embed_tokens,lm_head" lora_dropout=0.05 -pretrained_model=path/to/hf/chinese-alpaca-2/dir/or/model_id -chinese_tokenizer_path=path/to/chinese/chinese-alpaca-2/tokenizer/dir +pretrained_model=path/to/hf/llama-2/or/chinese-llama-2/dir/or/model_id +chinese_tokenizer_path=path/to/chinese-llama-2/tokenizer/dir dataset_dir=path/to/sft/data/dir per_device_train_batch_size=1 per_device_eval_batch_size=1 @@ -50,5 +53,6 @@ torchrun --nnodes 1 --nproc_per_node 1 run_clm_sft_with_peft.py \ --lora_alpha ${lora_alpha} \ --trainable ${lora_trainable} \ --lora_dropout ${lora_dropout} \ + --modules_to_save ${modules_to_save} \ --torch_dtype float16 \ --validation_file ${validation_file} From 7ff746c94f0adcc57e019a47c1cf554ace141182 Mon Sep 17 00:00:00 2001 From: Xin Yao <35353688+iMountTai@users.noreply.github.com> Date: Wed, 23 Aug 2023 00:47:44 +0800 Subject: [PATCH 12/12] Update run_pt.sh --- scripts/training/run_pt.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/training/run_pt.sh b/scripts/training/run_pt.sh index e103ab5..663a326 100644 --- a/scripts/training/run_pt.sh +++ b/scripts/training/run_pt.sh @@ -8,7 +8,7 @@ modules_to_save="embed_tokens,lm_head" lora_dropout=0.05 pretrained_model=path/to/hf/llama-2/dir -chinese_tokenizer_path=path/to/chinese/chinese-llama-2/tokenizer/dir +chinese_tokenizer_path=path/to/chinese-llama-2/tokenizer/dir dataset_dir=path/to/pt/data/dir data_cache=temp_data_cache_dir per_device_train_batch_size=1