From 1f801a915ce2d13e95fd8765550a508f5f8d6d3c Mon Sep 17 00:00:00 2001
From: Lianmin Zheng <lianminzheng@gmail.com>
Date: Mon, 9 Dec 2024 02:06:56 -0800
Subject: [PATCH 1/3] improve style

---
 .../srt/managers/detokenizer_manager.py       |  1 -
 .../sglang/srt/managers/tokenizer_manager.py  | 20 +++++++++----------
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/python/sglang/srt/managers/detokenizer_manager.py b/python/sglang/srt/managers/detokenizer_manager.py
index bc9e4a53b5c..b4bc1e7a448 100644
--- a/python/sglang/srt/managers/detokenizer_manager.py
+++ b/python/sglang/srt/managers/detokenizer_manager.py
@@ -29,7 +29,6 @@
     BatchStrOut,
     BatchTokenIDOut,
 )
-from sglang.srt.managers.schedule_batch import FINISH_MATCHED_STR, FINISH_MATCHED_TOKEN
 from sglang.srt.server_args import PortArgs, ServerArgs
 from sglang.srt.utils import configure_logger, get_zmq_socket
 from sglang.utils import find_printable_text, get_exception_traceback
diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py
index 29b98df2efa..8f147bf8bd7 100644
--- a/python/sglang/srt/managers/tokenizer_manager.py
+++ b/python/sglang/srt/managers/tokenizer_manager.py
@@ -623,23 +623,23 @@ async def handle_loop(self):
                             i,
                         )
 
+                    if not isinstance(recv_obj, BatchEmbeddingOut):
+                        meta_info.update(
+                            {
+                                "completion_tokens": recv_obj.completion_tokens[i],
+                                "cached_tokens": recv_obj.cached_tokens[i],
+                            }
+                        )
+
                     if isinstance(recv_obj, BatchStrOut):
                         out_dict = {
                             "text": recv_obj.output_strs[i],
-                            "meta_info": {
-                                **meta_info,
-                                "completion_tokens": recv_obj.completion_tokens[i],
-                                "cached_tokens": recv_obj.cached_tokens[i],
-                            },
+                            "meta_info": meta_info,
                         }
                     elif isinstance(recv_obj, BatchTokenIDOut):
                         out_dict = {
                             "token_ids": recv_obj.output_ids[i],
-                            "meta_info": {
-                                **meta_info,
-                                "completion_tokens": recv_obj.completion_tokens[i],
-                                "cached_tokens": recv_obj.cached_tokens[i],
-                            },
+                            "meta_info": meta_info,
                         }
                     else:
                         assert isinstance(recv_obj, BatchEmbeddingOut)

From 9a872891eddcc81fe25283c1955515bf145a96ed Mon Sep 17 00:00:00 2001
From: Lianmin Zheng <lianminzheng@gmail.com>
Date: Mon, 9 Dec 2024 02:33:22 -0800
Subject: [PATCH 2/3] improve the style

---
 python/sglang/srt/managers/scheduler.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py
index 4ece8786878..4680b042df9 100644
--- a/python/sglang/srt/managers/scheduler.py
+++ b/python/sglang/srt/managers/scheduler.py
@@ -1198,6 +1198,7 @@ def stream_output(
             decode_ids_list = []
             read_offsets = []
             output_ids = []
+
             skip_special_tokens = []
             spaces_between_special_tokens = []
             no_stop_trim = []

From f1e7c2cc40ee2c50b43ff8a459fd75e2d8835aca Mon Sep 17 00:00:00 2001
From: Lianmin Zheng <lianminzheng@gmail.com>
Date: Mon, 9 Dec 2024 03:00:54 -0800
Subject: [PATCH 3/3] fix mem style

---
 python/sglang/srt/model_executor/model_runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py
index 3f0cbecac15..ebda816dbaf 100644
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -114,7 +114,7 @@ def __init__(
             server_args.chunked_prefill_size = -1
             self.mem_fraction_static *= 0.95
             logger.info(
-                f"Automatically reduce --mem-fraction-static to {self.mem_fraction_static} "
+                f"Automatically reduce --mem-fraction-static to {self.mem_fraction_static:.3f} "
                 f"and turn off chunked prefill "
                 f"because this is a multimodal model."
             )