From 0eb25ffde209a0ff55def7d6b3fb9033fda926b5 Mon Sep 17 00:00:00 2001 From: Costa Huang Date: Sat, 15 Jun 2024 22:16:54 -0400 Subject: [PATCH 1/7] add a test case for num_train_epochs --- tests/test_ppov2_trainer.py | 21 ++++++++++++++++++++- tests/test_rloo_trainer.py | 2 +- trl/trainer/ppov2_trainer.py | 2 +- trl/trainer/rloo_trainer.py | 2 +- 4 files changed, 23 insertions(+), 4 deletions(-) diff --git a/tests/test_ppov2_trainer.py b/tests/test_ppov2_trainer.py index 7e8fe2f3fd..2b1569f990 100644 --- a/tests/test_ppov2_trainer.py +++ b/tests/test_ppov2_trainer.py @@ -16,7 +16,7 @@ def test(): command = """\ -python -i examples/scripts/ppo/ppo.py \ +python examples/scripts/ppo/ppo.py \ --learning_rate 3e-6 \ --output_dir models/minimal/ppo \ --per_device_train_batch_size 5 \ @@ -31,3 +31,22 @@ def test(): shell=True, check=True, ) + + +def test_num_train_epochs(): + command = """\ +python examples/scripts/ppo/ppo.py \ + --learning_rate 3e-6 \ + --output_dir models/minimal/ppo \ + --per_device_train_batch_size 5 \ + --gradient_accumulation_steps 1 \ + --num_train_epochs 0.003 \ + --model_name_or_path EleutherAI/pythia-14m \ + --non_eos_penalty \ + --stop_token eos \ +""" + subprocess.run( + command, + shell=True, + check=True, + ) diff --git a/tests/test_rloo_trainer.py b/tests/test_rloo_trainer.py index fbeec86125..1fec7f06c0 100644 --- a/tests/test_rloo_trainer.py +++ b/tests/test_rloo_trainer.py @@ -18,7 +18,7 @@ def test(): command = """\ -python -i examples/scripts/rloo/rloo.py \ +python examples/scripts/rloo/rloo.py \ --learning_rate 3e-6 \ --output_dir models/minimal/rloo \ --per_device_train_batch_size 5 \ diff --git a/trl/trainer/ppov2_trainer.py b/trl/trainer/ppov2_trainer.py index dc74f3b352..8eb7c7f5b4 100644 --- a/trl/trainer/ppov2_trainer.py +++ b/trl/trainer/ppov2_trainer.py @@ -101,7 +101,7 @@ def __init__( # calculate various batch sizes ######### if args.total_episodes is None: # allow the users to define episodes in terms of epochs. - args.total_episodes = args.num_train_epochs * self.train_dataset_len + args.total_episodes = int(args.num_train_epochs * self.train_dataset_len) accelerator = Accelerator(gradient_accumulation_steps=args.gradient_accumulation_steps) self.accelerator = accelerator args.world_size = accelerator.num_processes diff --git a/trl/trainer/rloo_trainer.py b/trl/trainer/rloo_trainer.py index 02f69df5e5..d24092016b 100644 --- a/trl/trainer/rloo_trainer.py +++ b/trl/trainer/rloo_trainer.py @@ -83,7 +83,7 @@ def __init__( # calculate various batch sizes ######### if args.total_episodes is None: # allow the users to define episodes in terms of epochs. - args.total_episodes = args.num_train_epochs * self.train_dataset_len + args.total_episodes = int(args.num_train_epochs * self.train_dataset_len) accelerator = Accelerator(gradient_accumulation_steps=args.gradient_accumulation_steps) self.accelerator = accelerator args.world_size = accelerator.num_processes From 2fadff850a40be1956950328d3d753350163936a Mon Sep 17 00:00:00 2001 From: Costa Huang Date: Mon, 17 Jun 2024 10:48:45 -0400 Subject: [PATCH 2/7] fix ci --- trl/trainer/cpo_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trl/trainer/cpo_config.py b/trl/trainer/cpo_config.py index 99c74e2c87..e5a9e4b7ba 100644 --- a/trl/trainer/cpo_config.py +++ b/trl/trainer/cpo_config.py @@ -83,4 +83,4 @@ class CPOConfig(TrainingArguments): def __post_init__(self): if self.loss_type == "kto_pair": raise ValueError("Support for kto_pair has been removed in CPOTrainer. Please use KTOTrainer.") - return super().__post_init__() \ No newline at end of file + return super().__post_init__() From 3efb8afb05aae0c1d67105c5daff96f92fc60fac Mon Sep 17 00:00:00 2001 From: Costa Huang Date: Tue, 18 Jun 2024 16:54:00 -0400 Subject: [PATCH 3/7] quick change --- tests/test_ppov2_trainer.py | 4 ++-- tests/test_rloo_trainer.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_ppov2_trainer.py b/tests/test_ppov2_trainer.py index 2b1569f990..c911130658 100644 --- a/tests/test_ppov2_trainer.py +++ b/tests/test_ppov2_trainer.py @@ -19,7 +19,7 @@ def test(): python examples/scripts/ppo/ppo.py \ --learning_rate 3e-6 \ --output_dir models/minimal/ppo \ - --per_device_train_batch_size 5 \ + --per_device_train_batch_size 4 \ --gradient_accumulation_steps 1 \ --total_episodes 10 \ --model_name_or_path EleutherAI/pythia-14m \ @@ -38,7 +38,7 @@ def test_num_train_epochs(): python examples/scripts/ppo/ppo.py \ --learning_rate 3e-6 \ --output_dir models/minimal/ppo \ - --per_device_train_batch_size 5 \ + --per_device_train_batch_size 4 \ --gradient_accumulation_steps 1 \ --num_train_epochs 0.003 \ --model_name_or_path EleutherAI/pythia-14m \ diff --git a/tests/test_rloo_trainer.py b/tests/test_rloo_trainer.py index 1fec7f06c0..5e42e8a026 100644 --- a/tests/test_rloo_trainer.py +++ b/tests/test_rloo_trainer.py @@ -21,7 +21,7 @@ def test(): python examples/scripts/rloo/rloo.py \ --learning_rate 3e-6 \ --output_dir models/minimal/rloo \ - --per_device_train_batch_size 5 \ + --per_device_train_batch_size 4 \ --gradient_accumulation_steps 1 \ --total_episodes 10 \ --model_name_or_path EleutherAI/pythia-14m \ From 195dbd9159b61232df3df31b7abd019048fdf28b Mon Sep 17 00:00:00 2001 From: Costa Huang Date: Thu, 20 Jun 2024 09:18:16 -0400 Subject: [PATCH 4/7] disable push to hub --- examples/scripts/ppo/ppo.py | 3 ++- examples/scripts/ppo/ppo_tldr.py | 3 ++- examples/scripts/rloo/rloo.py | 3 ++- examples/scripts/rloo/rloo_tldr.py | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/examples/scripts/ppo/ppo.py b/examples/scripts/ppo/ppo.py index e74d2e52a5..417905ff29 100644 --- a/examples/scripts/ppo/ppo.py +++ b/examples/scripts/ppo/ppo.py @@ -104,5 +104,6 @@ def tokenize(element): ) trainer.train() trainer.save_model(config.output_dir) - trainer.push_to_hub() + if config.push_to_hub: + trainer.push_to_hub() trainer.generate_completions() diff --git a/examples/scripts/ppo/ppo_tldr.py b/examples/scripts/ppo/ppo_tldr.py index d9ed61f60f..baa2f404b8 100644 --- a/examples/scripts/ppo/ppo_tldr.py +++ b/examples/scripts/ppo/ppo_tldr.py @@ -115,5 +115,6 @@ def tokenize(element): ) trainer.train() trainer.save_model(config.output_dir) - trainer.push_to_hub() + if config.push_to_hub: + trainer.push_to_hub() trainer.generate_completions() diff --git a/examples/scripts/rloo/rloo.py b/examples/scripts/rloo/rloo.py index c8d6f193e4..d0dded14b9 100644 --- a/examples/scripts/rloo/rloo.py +++ b/examples/scripts/rloo/rloo.py @@ -104,5 +104,6 @@ def tokenize(element): ) trainer.train() trainer.save_model(config.output_dir) - trainer.push_to_hub() + if config.push_to_hub: + trainer.push_to_hub() trainer.generate_completions() diff --git a/examples/scripts/rloo/rloo_tldr.py b/examples/scripts/rloo/rloo_tldr.py index 98e5f1bf58..02c95e3ea4 100644 --- a/examples/scripts/rloo/rloo_tldr.py +++ b/examples/scripts/rloo/rloo_tldr.py @@ -115,5 +115,6 @@ def tokenize(element): ) trainer.train() trainer.save_model(config.output_dir) - trainer.push_to_hub() + if config.push_to_hub: + trainer.push_to_hub() trainer.generate_completions() From 0492009fd07a06b461525021e0b4d59d65a4d417 Mon Sep 17 00:00:00 2001 From: Costa Huang Date: Thu, 20 Jun 2024 09:51:15 -0400 Subject: [PATCH 5/7] debug windows ci --- tests/test_rloo_trainer.py | 112 +++++++++++++++++++++++++++++++------ 1 file changed, 96 insertions(+), 16 deletions(-) diff --git a/tests/test_rloo_trainer.py b/tests/test_rloo_trainer.py index 5e42e8a026..4c21af4e48 100644 --- a/tests/test_rloo_trainer.py +++ b/tests/test_rloo_trainer.py @@ -11,28 +11,108 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import subprocess + +import shutil import torch +from datasets import load_dataset +from transformers import ( + AutoModelForCausalLM, + AutoModelForSequenceClassification, + AutoTokenizer, +) + +from trl import ModelConfig +from trl.trainer.rloo_trainer import RLOOConfig, RLOOTrainer +from trl.trainer.utils import SIMPLE_QUERY_CHAT_TEMPLATE def test(): - command = """\ -python examples/scripts/rloo/rloo.py \ - --learning_rate 3e-6 \ - --output_dir models/minimal/rloo \ - --per_device_train_batch_size 4 \ - --gradient_accumulation_steps 1 \ - --total_episodes 10 \ - --model_name_or_path EleutherAI/pythia-14m \ - --non_eos_penalty \ - --stop_token eos \ -""" - subprocess.run( - command, - shell=True, - check=True, + # command = """\ + # python examples/scripts/rloo/rloo.py \ + # --learning_rate 3e-6 \ + # --output_dir models/minimal/rloo \ + # --per_device_train_batch_size 4 \ + # --gradient_accumulation_steps 1 \ + # --total_episodes 10 \ + # --model_name_or_path EleutherAI/pythia-14m \ + # --non_eos_penalty \ + # --stop_token eos \ + # """ + # subprocess.run( + # command, + # shell=True, + # check=True, + # ) + config, model_config = RLOOConfig(), ModelConfig() + config.learning_rate = 3e-6 + config.output_dir = "models/minimal/rloo" + config.per_device_train_batch_size = 4 + config.gradient_accumulation_steps = 1 + config.total_episodes = 10 + config.non_eos_penalty = True + config.stop_token = "eos" + + model_config.model_name_or_path = "EleutherAI/pythia-14m" + + # remove output_dir if exists + shutil.rmtree(config.output_dir, ignore_errors=True) + + tokenizer = AutoTokenizer.from_pretrained( + model_config.model_name_or_path, + padding_side="left", + trust_remote_code=True, + ) + tokenizer.add_special_tokens({"pad_token": "[PAD]"}) + if tokenizer.chat_template is None: + tokenizer.chat_template = SIMPLE_QUERY_CHAT_TEMPLATE + reward_model = AutoModelForSequenceClassification.from_pretrained(config.reward_model_path, num_labels=1) + ref_policy = AutoModelForCausalLM.from_pretrained(config.sft_model_path) + policy = AutoModelForCausalLM.from_pretrained(config.sft_model_path) + ################ + # Dataset + ################ + raw_datasets = load_dataset("trl-internal-testing/descriptiveness-sentiment-trl-style", split="descriptiveness") + eval_samples = 20 + train_dataset = raw_datasets.select(range(len(raw_datasets) - eval_samples)) + eval_dataset = raw_datasets.select(range(len(raw_datasets) - eval_samples, len(raw_datasets))) + dataset_text_field = "prompt" + + def prepare_dataset(dataset, tokenizer): + """pre-tokenize the dataset before training; only collate during training""" + + def tokenize(element): + outputs = tokenizer( + element[dataset_text_field], + padding=False, + ) + return {"input_ids": outputs["input_ids"]} + + return dataset.map( + tokenize, + remove_columns=dataset.column_names, + batched=True, + num_proc=4, # multiprocessing.cpu_count(), + load_from_cache_file=False, + ) + + ################ + # Training + ################ + trainer = RLOOTrainer( + config=config, + tokenizer=tokenizer, + policy=policy, + ref_policy=ref_policy, + reward_model=reward_model, + train_dataset=prepare_dataset(train_dataset, tokenizer), + eval_dataset=prepare_dataset(eval_dataset, tokenizer), ) + trainer.train() + trainer.save_model(config.output_dir) + if config.push_to_hub: + trainer.push_to_hub() + trainer.generate_completions() def test_rloo_reward(): From 1dcfb50b9b35d211d0980869266226df09a26c41 Mon Sep 17 00:00:00 2001 From: Costa Huang Date: Thu, 20 Jun 2024 11:15:34 -0400 Subject: [PATCH 6/7] try another fix --- tests/test_rloo_trainer.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/test_rloo_trainer.py b/tests/test_rloo_trainer.py index 4c21af4e48..299b78ba23 100644 --- a/tests/test_rloo_trainer.py +++ b/tests/test_rloo_trainer.py @@ -44,16 +44,16 @@ def test(): # shell=True, # check=True, # ) - config, model_config = RLOOConfig(), ModelConfig() - config.learning_rate = 3e-6 - config.output_dir = "models/minimal/rloo" - config.per_device_train_batch_size = 4 - config.gradient_accumulation_steps = 1 - config.total_episodes = 10 - config.non_eos_penalty = True - config.stop_token = "eos" - - model_config.model_name_or_path = "EleutherAI/pythia-14m" + config = RLOOConfig( + learning_rate=3e-6, + output_dir="models/minimal/rloo", + per_device_train_batch_size=4, + gradient_accumulation_steps=1, + total_episodes=10, + non_eos_penalty=True, + stop_token="eos", + ) + model_config = ModelConfig(model_name_or_path="EleutherAI/pythia-14m") # remove output_dir if exists shutil.rmtree(config.output_dir, ignore_errors=True) From d74e9b6d384bcd0071779cdf7e27d4a2e3aba70a Mon Sep 17 00:00:00 2001 From: Costa Huang Date: Thu, 20 Jun 2024 12:58:14 -0400 Subject: [PATCH 7/7] skip subprocess tests on windows --- tests/test_ppov2_trainer.py | 9 +++ tests/test_rloo_trainer.py | 117 +++++++----------------------------- 2 files changed, 30 insertions(+), 96 deletions(-) diff --git a/tests/test_ppov2_trainer.py b/tests/test_ppov2_trainer.py index c911130658..519220a9c0 100644 --- a/tests/test_ppov2_trainer.py +++ b/tests/test_ppov2_trainer.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import platform import subprocess @@ -26,6 +27,10 @@ def test(): --non_eos_penalty \ --stop_token eos \ """ + if platform.system() == "Windows": + # windows CI does not work with subprocesses for some reason + # e.g., https://github.com/huggingface/trl/actions/runs/9600036224/job/26475286210?pr=1743 + return subprocess.run( command, shell=True, @@ -45,6 +50,10 @@ def test_num_train_epochs(): --non_eos_penalty \ --stop_token eos \ """ + if platform.system() == "Windows": + # windows CI does not work with subprocesses for some reason + # e.g., https://github.com/huggingface/trl/actions/runs/9600036224/job/26475286210?pr=1743 + return subprocess.run( command, shell=True, diff --git a/tests/test_rloo_trainer.py b/tests/test_rloo_trainer.py index 299b78ba23..9e5498f628 100644 --- a/tests/test_rloo_trainer.py +++ b/tests/test_rloo_trainer.py @@ -11,108 +11,33 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -import shutil +import platform +import subprocess import torch -from datasets import load_dataset -from transformers import ( - AutoModelForCausalLM, - AutoModelForSequenceClassification, - AutoTokenizer, -) - -from trl import ModelConfig -from trl.trainer.rloo_trainer import RLOOConfig, RLOOTrainer -from trl.trainer.utils import SIMPLE_QUERY_CHAT_TEMPLATE def test(): - # command = """\ - # python examples/scripts/rloo/rloo.py \ - # --learning_rate 3e-6 \ - # --output_dir models/minimal/rloo \ - # --per_device_train_batch_size 4 \ - # --gradient_accumulation_steps 1 \ - # --total_episodes 10 \ - # --model_name_or_path EleutherAI/pythia-14m \ - # --non_eos_penalty \ - # --stop_token eos \ - # """ - # subprocess.run( - # command, - # shell=True, - # check=True, - # ) - config = RLOOConfig( - learning_rate=3e-6, - output_dir="models/minimal/rloo", - per_device_train_batch_size=4, - gradient_accumulation_steps=1, - total_episodes=10, - non_eos_penalty=True, - stop_token="eos", - ) - model_config = ModelConfig(model_name_or_path="EleutherAI/pythia-14m") - - # remove output_dir if exists - shutil.rmtree(config.output_dir, ignore_errors=True) - - tokenizer = AutoTokenizer.from_pretrained( - model_config.model_name_or_path, - padding_side="left", - trust_remote_code=True, - ) - tokenizer.add_special_tokens({"pad_token": "[PAD]"}) - if tokenizer.chat_template is None: - tokenizer.chat_template = SIMPLE_QUERY_CHAT_TEMPLATE - reward_model = AutoModelForSequenceClassification.from_pretrained(config.reward_model_path, num_labels=1) - ref_policy = AutoModelForCausalLM.from_pretrained(config.sft_model_path) - policy = AutoModelForCausalLM.from_pretrained(config.sft_model_path) - ################ - # Dataset - ################ - raw_datasets = load_dataset("trl-internal-testing/descriptiveness-sentiment-trl-style", split="descriptiveness") - eval_samples = 20 - train_dataset = raw_datasets.select(range(len(raw_datasets) - eval_samples)) - eval_dataset = raw_datasets.select(range(len(raw_datasets) - eval_samples, len(raw_datasets))) - dataset_text_field = "prompt" - - def prepare_dataset(dataset, tokenizer): - """pre-tokenize the dataset before training; only collate during training""" - - def tokenize(element): - outputs = tokenizer( - element[dataset_text_field], - padding=False, - ) - return {"input_ids": outputs["input_ids"]} - - return dataset.map( - tokenize, - remove_columns=dataset.column_names, - batched=True, - num_proc=4, # multiprocessing.cpu_count(), - load_from_cache_file=False, - ) - - ################ - # Training - ################ - trainer = RLOOTrainer( - config=config, - tokenizer=tokenizer, - policy=policy, - ref_policy=ref_policy, - reward_model=reward_model, - train_dataset=prepare_dataset(train_dataset, tokenizer), - eval_dataset=prepare_dataset(eval_dataset, tokenizer), + command = """\ +python examples/scripts/rloo/rloo.py \ + --learning_rate 3e-6 \ + --output_dir models/minimal/rloo \ + --per_device_train_batch_size 4 \ + --gradient_accumulation_steps 1 \ + --total_episodes 10 \ + --model_name_or_path EleutherAI/pythia-14m \ + --non_eos_penalty \ + --stop_token eos \ +""" + if platform.system() == "Windows": + # windows CI does not work with subprocesses for some reason + # e.g., https://github.com/huggingface/trl/actions/runs/9600036224/job/26475286210?pr=1743 + return + subprocess.run( + command, + shell=True, + check=True, ) - trainer.train() - trainer.save_model(config.output_dir) - if config.push_to_hub: - trainer.push_to_hub() - trainer.generate_completions() def test_rloo_reward():