From aa30928acf352cb4917168de130003925c6a15fc Mon Sep 17 00:00:00 2001
From: mark-vaykhansky <prhing@gmail.com>
Date: Wed, 11 Sep 2024 11:38:54 +0300
Subject: [PATCH 1/3] Log prompts

---
 token_benchmark_ray.py | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/token_benchmark_ray.py b/token_benchmark_ray.py
index a5909aa..a9c8b88 100644
--- a/token_benchmark_ray.py
+++ b/token_benchmark_ray.py
@@ -40,8 +40,9 @@ def get_token_throughput_latencies(
     additional_sampling_params: Optional[Dict[str, Any]] = None,
     num_concurrent_requests: int = 1,
     max_num_completed_requests: int = 500,
-    test_timeout_s=90,
-    llm_api="openai",
+    test_timeout_s: int =90,
+    llm_api: str = "openai",
+    log_prompts: bool = False
 ) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
     """Get the token throughput and latencies for the given model.
 
@@ -90,6 +91,11 @@ def get_token_throughput_latencies(
             prompt_tokens_stddev=stddev_input_tokens,
             expect_output_tokens=num_output_tokens,
         ))
+
+    if log_prompts:
+        print("Sending the following prompts:")
+        print(prompts)
+
     start_time = time.monotonic()
     iter = 0
     pbar = tqdm(total=max_num_completed_requests)
@@ -289,6 +295,7 @@ def run_token_benchmark(
     additional_sampling_params: str,
     results_dir: str,
     user_metadata: Dict[str, Any],
+    log_prompts: True,
 ):
     """
     Args:
@@ -324,6 +331,7 @@ def run_token_benchmark(
         stddev_output_tokens=stddev_output_tokens,
         num_concurrent_requests=num_concurrent_requests,
         additional_sampling_params=json.loads(additional_sampling_params),
+        log_prompts=log_prompts,
     )
 
     if results_dir:
@@ -459,6 +467,15 @@ def run_token_benchmark(
         "name=foo,bar=1. These will be added to the metadata field of the results. "
     ),
 )
+args.add_argument(
+    "--log-prompts",
+    type=bool,
+    default=False,
+    help=(
+        "If True will log all prompts send to the model"
+    ),
+)
+
 
 if __name__ == "__main__":
     env_vars = dict(os.environ)
@@ -485,4 +502,5 @@ def run_token_benchmark(
         additional_sampling_params=args.additional_sampling_params,
         results_dir=args.results_dir,
         user_metadata=user_metadata,
+        log_prompts=args.log_prompt
     )

From 5760f1573e55f866a74140d4ba86fdadc0a58870 Mon Sep 17 00:00:00 2001
From: mark-vaykhansky <prhing@gmail.com>
Date: Wed, 11 Sep 2024 11:52:35 +0300
Subject: [PATCH 2/3] Fix typo

---
 token_benchmark_ray.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/token_benchmark_ray.py b/token_benchmark_ray.py
index a9c8b88..011bc76 100644
--- a/token_benchmark_ray.py
+++ b/token_benchmark_ray.py
@@ -295,7 +295,7 @@ def run_token_benchmark(
     additional_sampling_params: str,
     results_dir: str,
     user_metadata: Dict[str, Any],
-    log_prompts: True,
+    log_prompts: bool,
 ):
     """
     Args:

From 5a275adbf44ff5cd4091adb8f4bcc2f4bf269485 Mon Sep 17 00:00:00 2001
From: mark-vaykhansky <prhing@gmail.com>
Date: Wed, 11 Sep 2024 13:27:57 +0300
Subject: [PATCH 3/3] CR Fixes

---
 token_benchmark_ray.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/token_benchmark_ray.py b/token_benchmark_ray.py
index 011bc76..db9ac85 100644
--- a/token_benchmark_ray.py
+++ b/token_benchmark_ray.py
@@ -95,6 +95,10 @@ def get_token_throughput_latencies(
     if log_prompts:
         print("Sending the following prompts:")
         print(prompts)
+    else:
+        # 'prompts' is an array of tuples where each item is (prompt, token_length)
+        print("Sending the following prompt sizes:")
+        print(list(map(lambda prompt_with_token_count: prompt_with_token_count[1], prompts)))
 
     start_time = time.monotonic()
     iter = 0
@@ -472,7 +476,7 @@ def run_token_benchmark(
     type=bool,
     default=False,
     help=(
-        "If True will log all prompts send to the model"
+        "If True will log all prompts sent to the model"
     ),
 )