diff --git a/python/sglang/srt/conversation.py b/python/sglang/srt/conversation.py index b3988cc8237..c0f95b5d041 100644 --- a/python/sglang/srt/conversation.py +++ b/python/sglang/srt/conversation.py @@ -6,7 +6,7 @@ from enum import IntEnum, auto from typing import Dict, List, Optional, Tuple, Union -from sglang.srt.openai_protocol import ChatCompletionRequest +from sglang.srt.openai_api.protocol import ChatCompletionRequest class SeparatorStyle(IntEnum): diff --git a/python/sglang/srt/openai_api_adapter.py b/python/sglang/srt/openai_api/adapter.py similarity index 99% rename from python/sglang/srt/openai_api_adapter.py rename to python/sglang/srt/openai_api/adapter.py index f1f09c91988..ebb95ea241e 100644 --- a/python/sglang/srt/openai_api_adapter.py +++ b/python/sglang/srt/openai_api/adapter.py @@ -16,7 +16,7 @@ register_conv_template, ) from sglang.srt.managers.io_struct import GenerateReqInput -from sglang.srt.openai_protocol import ( +from sglang.srt.openai_api.protocol import ( ChatCompletionRequest, ChatCompletionResponse, ChatCompletionResponseChoice, @@ -106,6 +106,7 @@ async def v1_completions(tokenizer_manager, raw_request: Request): "frequency_penalty": request.frequency_penalty, "regex": request.regex, "n": request.n, + "ignore_eos": request.ignore_eos, }, return_logprob=request.logprobs is not None and request.logprobs > 0, top_logprobs_num=request.logprobs if request.logprobs is not None else 0, diff --git a/python/sglang/srt/openai_protocol.py b/python/sglang/srt/openai_api/protocol.py similarity index 99% rename from python/sglang/srt/openai_protocol.py rename to python/sglang/srt/openai_api/protocol.py index 1149352b402..b91179203d6 100644 --- a/python/sglang/srt/openai_protocol.py +++ b/python/sglang/srt/openai_api/protocol.py @@ -68,6 +68,7 @@ class CompletionRequest(BaseModel): # Extra parameters for SRT backend only and will be ignored by OpenAI models. regex: Optional[str] = None + ignore_eos: Optional[bool] = False class CompletionResponseChoice(BaseModel): diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py index 4c1b5f4a476..ac62f89ae64 100644 --- a/python/sglang/srt/server.py +++ b/python/sglang/srt/server.py @@ -39,12 +39,12 @@ from sglang.srt.managers.detokenizer_manager import start_detokenizer_process from sglang.srt.managers.io_struct import GenerateReqInput from sglang.srt.managers.tokenizer_manager import TokenizerManager -from sglang.srt.openai_api_adapter import ( +from sglang.srt.openai_api.adapter import ( load_chat_template_for_openai_api, v1_chat_completions, v1_completions, ) -from sglang.srt.openai_protocol import ModelCard, ModelList +from sglang.srt.openai_api.protocol import ModelCard, ModelList from sglang.srt.server_args import PortArgs, ServerArgs from sglang.srt.utils import ( API_KEY_HEADER_NAME, diff --git a/python/sglang/test/test_conversation.py b/python/sglang/test/test_conversation.py index 11e837ddbde..e6d9f396aa7 100644 --- a/python/sglang/test/test_conversation.py +++ b/python/sglang/test/test_conversation.py @@ -1,5 +1,5 @@ from sglang.srt.conversation import generate_chat_conv -from sglang.srt.managers.openai_protocol import ( +from sglang.srt.managers.openai_api.protocol import ( ChatCompletionMessageContentImagePart, ChatCompletionMessageContentImageURL, ChatCompletionMessageContentTextPart, diff --git a/python/sglang/test/test_openai_protocol.py b/python/sglang/test/test_openai_protocol.py index 99e7a8089cf..cade4728cba 100644 --- a/python/sglang/test/test_openai_protocol.py +++ b/python/sglang/test/test_openai_protocol.py @@ -1,4 +1,4 @@ -from sglang.srt.managers.openai_protocol import ( +from sglang.srt.managers.openai_api.protocol import ( ChatCompletionMessageContentImagePart, ChatCompletionMessageContentImageURL, ChatCompletionMessageContentTextPart,