Skip to content

Commit

Permalink
benchmark_serving support --served-model-name param (vllm-project#12109)
Browse files Browse the repository at this point in the history
Signed-off-by: zibai <[email protected]>
Co-authored-by: Roger Wang <[email protected]>
  • Loading branch information
2 people authored and Ubuntu committed Jan 19, 2025
1 parent 4286078 commit b7b0865
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 3 deletions.
9 changes: 6 additions & 3 deletions benchmarks/backend_request_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class RequestFuncInput:
prompt_len: int
output_len: int
model: str
model_name: Optional[str] = None
best_of: int = 1
logprobs: Optional[int] = None
extra_body: Optional[dict] = None
Expand Down Expand Up @@ -78,7 +79,7 @@ async def async_request_tgi(
continue
chunk_bytes = chunk_bytes.decode("utf-8")

#NOTE: Sometimes TGI returns a ping response without
# NOTE: Sometimes TGI returns a ping response without
# any data, we should skip it.
if chunk_bytes.startswith(":"):
continue
Expand Down Expand Up @@ -235,7 +236,8 @@ async def async_request_openai_completions(

async with aiohttp.ClientSession(timeout=AIOHTTP_TIMEOUT) as session:
payload = {
"model": request_func_input.model,
"model": request_func_input.model_name \
if request_func_input.model_name else request_func_input.model,
"prompt": request_func_input.prompt,
"temperature": 0.0,
"best_of": request_func_input.best_of,
Expand Down Expand Up @@ -328,7 +330,8 @@ async def async_request_openai_chat_completions(
if request_func_input.multi_modal_content:
content.append(request_func_input.multi_modal_content)
payload = {
"model": request_func_input.model,
"model": request_func_input.model_name \
if request_func_input.model_name else request_func_input.model,
"messages": [
{
"role": "user",
Expand Down
13 changes: 13 additions & 0 deletions benchmarks/benchmark_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,7 @@ async def benchmark(
api_url: str,
base_url: str,
model_id: str,
model_name: str,
tokenizer: PreTrainedTokenizerBase,
input_requests: List[Tuple[str, int, int]],
logprobs: Optional[int],
Expand Down Expand Up @@ -553,6 +554,7 @@ async def benchmark(
"Multi-modal content is only supported on 'openai-chat' backend.")
test_input = RequestFuncInput(
model=model_id,
model_name=model_name,
prompt=test_prompt,
api_url=api_url,
prompt_len=test_prompt_len,
Expand All @@ -573,6 +575,7 @@ async def benchmark(
if profile:
print("Starting profiler...")
profile_input = RequestFuncInput(model=model_id,
model_name=model_name,
prompt=test_prompt,
api_url=base_url + "/start_profile",
prompt_len=test_prompt_len,
Expand Down Expand Up @@ -616,6 +619,7 @@ async def limited_request_func(request_func_input, pbar):
async for request in get_request(input_requests, request_rate, burstiness):
prompt, prompt_len, output_len, mm_content = request
request_func_input = RequestFuncInput(model=model_id,
model_name=model_name,
prompt=prompt,
api_url=api_url,
prompt_len=prompt_len,
Expand Down Expand Up @@ -780,6 +784,7 @@ def main(args: argparse.Namespace):

backend = args.backend
model_id = args.model
model_name = args.served_model_name
tokenizer_id = args.tokenizer if args.tokenizer is not None else args.model
tokenizer_mode = args.tokenizer_mode

Expand Down Expand Up @@ -877,6 +882,7 @@ def main(args: argparse.Namespace):
api_url=api_url,
base_url=base_url,
model_id=model_id,
model_name=model_name,
tokenizer=tokenizer,
input_requests=input_requests,
logprobs=args.logprobs,
Expand Down Expand Up @@ -1222,5 +1228,12 @@ def main(args: argparse.Namespace):
'always use the slow tokenizer. \n* '
'"mistral" will always use the `mistral_common` tokenizer.')

parser.add_argument("--served-model-name",
type=str,
default=None,
help="The model name used in the API. "
"If not specified, the model name will be the "
"same as the ``--model`` argument. ")

args = parser.parse_args()
main(args)

0 comments on commit b7b0865

Please sign in to comment.