From 7ec9132e3483f143c061df25a1bba09bbc4b0601 Mon Sep 17 00:00:00 2001 From: hnyls2002 Date: Fri, 4 Oct 2024 08:02:44 +0000 Subject: [PATCH] optimize --- python/sglang/srt/managers/scheduler.py | 3 +++ test/srt/test_bench_serving.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index ac3b6c33fa9..2f14a712bd3 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -444,6 +444,9 @@ def handle_embedding_request( self.waiting_queue.append(req) def get_new_prefill_batch(self) -> Optional[ScheduleBatch]: + if len(self.waiting_queue) == 0 and self.current_inflight_req is None: + return None + running_bs = ( len(self.running_batch.reqs) if self.running_batch is not None else 0 ) diff --git a/test/srt/test_bench_serving.py b/test/srt/test_bench_serving.py index b3a4d347472..99d588383c0 100644 --- a/test/srt/test_bench_serving.py +++ b/test/srt/test_bench_serving.py @@ -25,7 +25,7 @@ def test_offline_throughput_default(self): def test_offline_throughput_non_stream_small_batch_size(self): res = run_bench_serving( model=DEFAULT_MODEL_NAME_FOR_TEST, - num_prompts=50, + num_prompts=200, request_rate=float("inf"), dataset_name="sharegpt", random_input_len=None,