From aa30928acf352cb4917168de130003925c6a15fc Mon Sep 17 00:00:00 2001 From: mark-vaykhansky Date: Wed, 11 Sep 2024 11:38:54 +0300 Subject: [PATCH 1/3] Log prompts --- token_benchmark_ray.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/token_benchmark_ray.py b/token_benchmark_ray.py index a5909aa..a9c8b88 100644 --- a/token_benchmark_ray.py +++ b/token_benchmark_ray.py @@ -40,8 +40,9 @@ def get_token_throughput_latencies( additional_sampling_params: Optional[Dict[str, Any]] = None, num_concurrent_requests: int = 1, max_num_completed_requests: int = 500, - test_timeout_s=90, - llm_api="openai", + test_timeout_s: int =90, + llm_api: str = "openai", + log_prompts: bool = False ) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]: """Get the token throughput and latencies for the given model. @@ -90,6 +91,11 @@ def get_token_throughput_latencies( prompt_tokens_stddev=stddev_input_tokens, expect_output_tokens=num_output_tokens, )) + + if log_prompts: + print("Sending the following prompts:") + print(prompts) + start_time = time.monotonic() iter = 0 pbar = tqdm(total=max_num_completed_requests) @@ -289,6 +295,7 @@ def run_token_benchmark( additional_sampling_params: str, results_dir: str, user_metadata: Dict[str, Any], + log_prompts: True, ): """ Args: @@ -324,6 +331,7 @@ def run_token_benchmark( stddev_output_tokens=stddev_output_tokens, num_concurrent_requests=num_concurrent_requests, additional_sampling_params=json.loads(additional_sampling_params), + log_prompts=log_prompts, ) if results_dir: @@ -459,6 +467,15 @@ def run_token_benchmark( "name=foo,bar=1. These will be added to the metadata field of the results. " ), ) +args.add_argument( + "--log-prompts", + type=bool, + default=False, + help=( + "If True will log all prompts send to the model" + ), +) + if __name__ == "__main__": env_vars = dict(os.environ) @@ -485,4 +502,5 @@ def run_token_benchmark( additional_sampling_params=args.additional_sampling_params, results_dir=args.results_dir, user_metadata=user_metadata, + log_prompts=args.log_prompt ) From 5760f1573e55f866a74140d4ba86fdadc0a58870 Mon Sep 17 00:00:00 2001 From: mark-vaykhansky Date: Wed, 11 Sep 2024 11:52:35 +0300 Subject: [PATCH 2/3] Fix typo --- token_benchmark_ray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/token_benchmark_ray.py b/token_benchmark_ray.py index a9c8b88..011bc76 100644 --- a/token_benchmark_ray.py +++ b/token_benchmark_ray.py @@ -295,7 +295,7 @@ def run_token_benchmark( additional_sampling_params: str, results_dir: str, user_metadata: Dict[str, Any], - log_prompts: True, + log_prompts: bool, ): """ Args: From 5a275adbf44ff5cd4091adb8f4bcc2f4bf269485 Mon Sep 17 00:00:00 2001 From: mark-vaykhansky Date: Wed, 11 Sep 2024 13:27:57 +0300 Subject: [PATCH 3/3] CR Fixes --- token_benchmark_ray.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/token_benchmark_ray.py b/token_benchmark_ray.py index 011bc76..db9ac85 100644 --- a/token_benchmark_ray.py +++ b/token_benchmark_ray.py @@ -95,6 +95,10 @@ def get_token_throughput_latencies( if log_prompts: print("Sending the following prompts:") print(prompts) + else: + # 'prompts' is an array of tuples where each item is (prompt, token_length) + print("Sending the following prompt sizes:") + print(list(map(lambda prompt_with_token_count: prompt_with_token_count[1], prompts))) start_time = time.monotonic() iter = 0 @@ -472,7 +476,7 @@ def run_token_benchmark( type=bool, default=False, help=( - "If True will log all prompts send to the model" + "If True will log all prompts sent to the model" ), )