From 786cf2944a90c9ce32ccfd998493d0fefa20d914 Mon Sep 17 00:00:00 2001 From: Roger Wang <136131678+ywang96@users.noreply.github.com> Date: Sat, 11 Jan 2025 22:36:38 -0800 Subject: [PATCH] [V1] Avoid sending text prompt to core engine (#11963) Signed-off-by: Roger Wang --- vllm/v1/engine/__init__.py | 4 ++-- vllm/v1/engine/core_client.py | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/vllm/v1/engine/__init__.py b/vllm/v1/engine/__init__.py index 5e3c5e327ef63..3ce9db0e47eed 100644 --- a/vllm/v1/engine/__init__.py +++ b/vllm/v1/engine/__init__.py @@ -19,8 +19,8 @@ class EngineCoreRequest: # due to circular imports and typing we have in data.py request_id: str - #NOTE(Nick): I don't think we need to pass prompt here since it should - # always be tokenized? + # NOTE(ywang96): original text prompt is needed when a request is added to + # Detokenizer, but set to None when it is added to EngineCoreClient. prompt: Optional[str] prompt_token_ids: List[int] mm_inputs: Optional[List[Optional["MultiModalKwargs"]]] diff --git a/vllm/v1/engine/core_client.py b/vllm/v1/engine/core_client.py index a4a45ae05ff9e..4ed7f944b058f 100644 --- a/vllm/v1/engine/core_client.py +++ b/vllm/v1/engine/core_client.py @@ -219,6 +219,9 @@ def _send_input(self, request_type: EngineCoreRequestType, self.input_socket.send_multipart(msg, copy=False) def add_request(self, request: EngineCoreRequest) -> None: + # NOTE: text prompt is not needed in the core engine as it has been + # tokenized. + request.prompt = None self._send_input(EngineCoreRequestType.ADD, request) def abort_requests(self, request_ids: List[str]) -> None: @@ -257,6 +260,9 @@ async def _send_input(self, request_type: EngineCoreRequestType, await self.input_socket.send_multipart(msg, copy=False) async def add_request_async(self, request: EngineCoreRequest) -> None: + # NOTE: text prompt is not needed in the core engine as it has been + # tokenized. + request.prompt = None await self._send_input(EngineCoreRequestType.ADD, request) async def abort_requests_async(self, request_ids: List[str]) -> None: