diff --git a/python/sglang/bench_serving.py b/python/sglang/bench_serving.py index e5f61b9a17a..0eac3813383 100644 --- a/python/sglang/bench_serving.py +++ b/python/sglang/bench_serving.py @@ -84,6 +84,9 @@ async def async_request_trt_llm( "min_length": request_func_input.output_len, "end_id": 1048576, } + if args.disable_ignore_eos: + del payload["min_length"] + del payload["end_id"] output = RequestFuncOutput() output.prompt_len = request_func_input.prompt_len @@ -149,7 +152,7 @@ async def async_request_openai_completions( "best_of": 1, "max_tokens": request_func_input.output_len, "stream": not args.disable_stream, - "ignore_eos": True, + "ignore_eos": not args.disable_ignore_eos, } headers = {"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}"} @@ -969,6 +972,11 @@ def set_ulimit(target_soft_limit=65535): action="store_true", help="Disable streaming mode.", ) + parser.add_argument( + "--disable-ignore-eos", + action="store_true", + help="Disable ignoring EOS.", + ) set_ulimit()