From 1f801a915ce2d13e95fd8765550a508f5f8d6d3c Mon Sep 17 00:00:00 2001 From: Lianmin Zheng <lianminzheng@gmail.com> Date: Mon, 9 Dec 2024 02:06:56 -0800 Subject: [PATCH 1/3] improve style --- .../srt/managers/detokenizer_manager.py | 1 - .../sglang/srt/managers/tokenizer_manager.py | 20 +++++++++---------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/python/sglang/srt/managers/detokenizer_manager.py b/python/sglang/srt/managers/detokenizer_manager.py index bc9e4a53b5c..b4bc1e7a448 100644 --- a/python/sglang/srt/managers/detokenizer_manager.py +++ b/python/sglang/srt/managers/detokenizer_manager.py @@ -29,7 +29,6 @@ BatchStrOut, BatchTokenIDOut, ) -from sglang.srt.managers.schedule_batch import FINISH_MATCHED_STR, FINISH_MATCHED_TOKEN from sglang.srt.server_args import PortArgs, ServerArgs from sglang.srt.utils import configure_logger, get_zmq_socket from sglang.utils import find_printable_text, get_exception_traceback diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py index 29b98df2efa..8f147bf8bd7 100644 --- a/python/sglang/srt/managers/tokenizer_manager.py +++ b/python/sglang/srt/managers/tokenizer_manager.py @@ -623,23 +623,23 @@ async def handle_loop(self): i, ) + if not isinstance(recv_obj, BatchEmbeddingOut): + meta_info.update( + { + "completion_tokens": recv_obj.completion_tokens[i], + "cached_tokens": recv_obj.cached_tokens[i], + } + ) + if isinstance(recv_obj, BatchStrOut): out_dict = { "text": recv_obj.output_strs[i], - "meta_info": { - **meta_info, - "completion_tokens": recv_obj.completion_tokens[i], - "cached_tokens": recv_obj.cached_tokens[i], - }, + "meta_info": meta_info, } elif isinstance(recv_obj, BatchTokenIDOut): out_dict = { "token_ids": recv_obj.output_ids[i], - "meta_info": { - **meta_info, - "completion_tokens": recv_obj.completion_tokens[i], - "cached_tokens": recv_obj.cached_tokens[i], - }, + "meta_info": meta_info, } else: assert isinstance(recv_obj, BatchEmbeddingOut) From 9a872891eddcc81fe25283c1955515bf145a96ed Mon Sep 17 00:00:00 2001 From: Lianmin Zheng <lianminzheng@gmail.com> Date: Mon, 9 Dec 2024 02:33:22 -0800 Subject: [PATCH 2/3] improve the style --- python/sglang/srt/managers/scheduler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index 4ece8786878..4680b042df9 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -1198,6 +1198,7 @@ def stream_output( decode_ids_list = [] read_offsets = [] output_ids = [] + skip_special_tokens = [] spaces_between_special_tokens = [] no_stop_trim = [] From f1e7c2cc40ee2c50b43ff8a459fd75e2d8835aca Mon Sep 17 00:00:00 2001 From: Lianmin Zheng <lianminzheng@gmail.com> Date: Mon, 9 Dec 2024 03:00:54 -0800 Subject: [PATCH 3/3] fix mem style --- python/sglang/srt/model_executor/model_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index 3f0cbecac15..ebda816dbaf 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -114,7 +114,7 @@ def __init__( server_args.chunked_prefill_size = -1 self.mem_fraction_static *= 0.95 logger.info( - f"Automatically reduce --mem-fraction-static to {self.mem_fraction_static} " + f"Automatically reduce --mem-fraction-static to {self.mem_fraction_static:.3f} " f"and turn off chunked prefill " f"because this is a multimodal model." )