From a59e4276fdae9f3e0d3bb3a863f998f90132852f Mon Sep 17 00:00:00 2001 From: yuiseki Date: Thu, 2 May 2024 09:23:02 +0900 Subject: [PATCH] tweak recipe --- .../coder-math-ja-wikipedia.yaml | 6 +++--- .../coder-python-ja-wikipedia-amenokaku.yaml | 8 +++++--- .../i18n-ja-wikipedia-databricks-dolly.yaml | 18 ++++++++++++++++++ .../i18n-ja-wikipedia-jimba.yaml | 16 ++++++++++++++++ 4 files changed, 42 insertions(+), 6 deletions(-) create mode 100644 recipes/A5000_24GB_x8/Mistral-7B-v0.1/i18n-ja-wikipedia-databricks-dolly.yaml create mode 100644 recipes/A5000_24GB_x8/Mistral-7B-v0.1/i18n-ja-wikipedia-jimba.yaml diff --git a/recipes/A5000_24GB_x8/Mistral-7B-v0.1/coder-math-ja-wikipedia.yaml b/recipes/A5000_24GB_x8/Mistral-7B-v0.1/coder-math-ja-wikipedia.yaml index db7e3cf..8da6e3a 100644 --- a/recipes/A5000_24GB_x8/Mistral-7B-v0.1/coder-math-ja-wikipedia.yaml +++ b/recipes/A5000_24GB_x8/Mistral-7B-v0.1/coder-math-ja-wikipedia.yaml @@ -10,7 +10,7 @@ dataset_train_split_test_size: 0.2 lora_r: 8 lora_alpha: 16 lora_dropout: 0.05 -train_claim_gpu_num: 4 -train_per_device_train_batch_size: 8 -train_gradient_accumulation_steps: 2 +train_claim_gpu_num: 8 +train_per_device_train_batch_size: 1 +train_gradient_accumulation_steps: 16 train_num_train_epochs: 4 diff --git a/recipes/A5000_24GB_x8/Mistral-7B-v0.1/coder-python-ja-wikipedia-amenokaku.yaml b/recipes/A5000_24GB_x8/Mistral-7B-v0.1/coder-python-ja-wikipedia-amenokaku.yaml index 992cb42..bdeae1a 100644 --- a/recipes/A5000_24GB_x8/Mistral-7B-v0.1/coder-python-ja-wikipedia-amenokaku.yaml +++ b/recipes/A5000_24GB_x8/Mistral-7B-v0.1/coder-python-ja-wikipedia-amenokaku.yaml @@ -3,6 +3,8 @@ base_model_id: yuiseki/Mistral-7B-v0.1-ja-wikipedia-v0.1 model_name: Mistral-7B-v0.1-ja-wikipedia-amenokaku-v0.1 output_base_dir: /data/output dataset_id: kunishou/amenokaku-code-instruct +dataset_filter_field_name: liscence +dataset_filter_field_value: MIT dataset_input_field_name: instruction dataset_context_field_name: input dataset_output_field_name: output @@ -11,7 +13,7 @@ dataset_train_split_test_size: 0.2 lora_r: 8 lora_alpha: 16 lora_dropout: 0.05 -train_claim_gpu_num: 4 -train_per_device_train_batch_size: 8 -train_gradient_accumulation_steps: 4 +train_claim_gpu_num: 8 +train_per_device_train_batch_size: 1 +train_gradient_accumulation_steps: 16 train_num_train_epochs: 4 diff --git a/recipes/A5000_24GB_x8/Mistral-7B-v0.1/i18n-ja-wikipedia-databricks-dolly.yaml b/recipes/A5000_24GB_x8/Mistral-7B-v0.1/i18n-ja-wikipedia-databricks-dolly.yaml new file mode 100644 index 0000000..a80f5ec --- /dev/null +++ b/recipes/A5000_24GB_x8/Mistral-7B-v0.1/i18n-ja-wikipedia-databricks-dolly.yaml @@ -0,0 +1,18 @@ +target_task: tasks/i18n/ja.md +base_model_id: yuiseki/Mistral-7B-v0.1-ja-wikipedia-v0.1 +model_name: Mistral-7B-v0.1-ja-wikipedia-databricks-dolly-v0.1 +output_base_dir: /data/output +dataset_id: llm-jp/databricks-dolly-15k-ja +dataset_context_field_name: context +dataset_input_field_name: instruction +dataset_output_field_name: response +dataset_filter_field_value: 0 +dataset_train_split_seed: 42 +dataset_train_split_test_size: 0.2 +lora_r: 8 +lora_alpha: 16 +lora_dropout: 0.05 +train_claim_gpu_num: 8 +train_per_device_train_batch_size: 1 +train_gradient_accumulation_steps: 16 +train_num_train_epochs: 4 diff --git a/recipes/A5000_24GB_x8/Mistral-7B-v0.1/i18n-ja-wikipedia-jimba.yaml b/recipes/A5000_24GB_x8/Mistral-7B-v0.1/i18n-ja-wikipedia-jimba.yaml new file mode 100644 index 0000000..b651951 --- /dev/null +++ b/recipes/A5000_24GB_x8/Mistral-7B-v0.1/i18n-ja-wikipedia-jimba.yaml @@ -0,0 +1,16 @@ +target_task: tasks/i18n/ja.md +base_model_id: yuiseki/Mistral-7B-v0.1-ja-wikipedia-v0.1 +model_name: yuiseki/Mistral-7B-v0.1-ja-wikipedia-jimba-v0.1 +output_base_dir: /data/output +dataset_id: Kendamarron/jimba-instuction-1k-beta +dataset_input_field_name: instruction +dataset_output_field_name: output +dataset_train_split_seed: 42 +dataset_train_split_test_size: 0.2 +lora_r: 8 +lora_alpha: 16 +lora_dropout: 0.05 +train_claim_gpu_num: 4 +train_per_device_train_batch_size: 8 +train_gradient_accumulation_steps: 4 +train_num_train_epochs: 4 \ No newline at end of file