From b2a1812df2d0d616a228c0ac6c8b31a71ef54e6d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Szabolcs=20Foszt=C3=B3?= <foszto@gmail.com>
Date: Wed, 7 Aug 2024 13:17:24 +0200
Subject: [PATCH 1/3] #590 Increase default , track changes in examples and
 documentation

---
 README.md                                   | 2 +-
 docs/en/sampling_params.md                  | 6 +++---
 python/sglang/lang/compiler.py              | 4 ++--
 python/sglang/lang/ir.py                    | 6 +++---
 python/sglang/srt/sampling_params.py        | 2 +-
 scripts/deprecated/test_curl.sh             | 2 +-
 scripts/deprecated/test_httpserver_llava.py | 2 +-
 7 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index 1bb6f13d0f8..2b0e52c4c82 100644
--- a/README.md
+++ b/README.md
@@ -99,7 +99,7 @@ curl http://localhost:30000/generate \
   -d '{
     "text": "Once upon a time,",
     "sampling_params": {
-      "max_new_tokens": 16,
+      "max_new_tokens": 128,
       "temperature": 0
     }
   }'
diff --git a/docs/en/sampling_params.md b/docs/en/sampling_params.md
index 0ea07c01759..5aad20a6e27 100644
--- a/docs/en/sampling_params.md
+++ b/docs/en/sampling_params.md
@@ -33,7 +33,7 @@ The `sampling_params` follows this format
 
 ```python
 # The maximum number of output tokens
-max_new_tokens: int = 16,
+max_new_tokens: int = 128,
 # Stop when hitting any of the strings in this list.
 stop: Optional[Union[str, List[str]]] = None,
 # Sampling temperature
@@ -90,7 +90,7 @@ response = requests.post(
         "text": "The capital of France is",
         "sampling_params": {
             "temperature": 0,
-            "max_new_tokens": 256,
+            "max_new_tokens": 32,
         },
         "stream": True,
     },
@@ -133,7 +133,7 @@ response = requests.post(
         "image_data": "example_image.png",
         "sampling_params": {
             "temperature": 0,
-            "max_new_tokens": 32,
+            "max_new_tokens": 128,
         },
     },
 )
diff --git a/python/sglang/lang/compiler.py b/python/sglang/lang/compiler.py
index 36287cd397c..95af04adb0a 100644
--- a/python/sglang/lang/compiler.py
+++ b/python/sglang/lang/compiler.py
@@ -125,7 +125,7 @@ def run_internal(
     def run(
         self,
         *,
-        max_new_tokens: int = 16,
+        max_new_tokens: int = 128,
         stop: Union[str, List[str]] = (),
         temperature: float = 1.0,
         top_p: float = 1.0,
@@ -155,7 +155,7 @@ def run_batch(
         self,
         batch_kwargs,
         *,
-        max_new_tokens: int = 16,
+        max_new_tokens: int = 128,
         stop: Union[str, List[str]] = (),
         temperature: float = 1.0,
         top_p: float = 1.0,
diff --git a/python/sglang/lang/ir.py b/python/sglang/lang/ir.py
index d902497c76e..135110c1e0d 100644
--- a/python/sglang/lang/ir.py
+++ b/python/sglang/lang/ir.py
@@ -16,7 +16,7 @@
 
 @dataclasses.dataclass
 class SglSamplingParams:
-    max_new_tokens: int = 16
+    max_new_tokens: int = 128
     stop: Union[str, List[str]] = ()
     temperature: float = 1.0
     top_p: float = 1.0
@@ -140,7 +140,7 @@ def bind(self, **kwargs):
     def run(
         self,
         *args,
-        max_new_tokens: int = 16,
+        max_new_tokens: int = 128,
         stop: Union[str, List[str]] = (),
         temperature: float = 1.0,
         top_p: float = 1.0,
@@ -179,7 +179,7 @@ def run_batch(
         self,
         batch_kwargs,
         *,
-        max_new_tokens: int = 16,
+        max_new_tokens: int = 128,
         stop: Union[str, List[str]] = (),
         temperature: float = 1.0,
         top_p: float = 1.0,
diff --git a/python/sglang/srt/sampling_params.py b/python/sglang/srt/sampling_params.py
index f6582cf41bd..89091b7ae3f 100644
--- a/python/sglang/srt/sampling_params.py
+++ b/python/sglang/srt/sampling_params.py
@@ -23,7 +23,7 @@
 class SamplingParams:
     def __init__(
         self,
-        max_new_tokens: int = 16,
+        max_new_tokens: int = 128,
         stop: Optional[Union[str, List[str]]] = None,
         temperature: float = 1.0,
         top_p: float = 1.0,
diff --git a/scripts/deprecated/test_curl.sh b/scripts/deprecated/test_curl.sh
index 4362eaa9355..1c83208a759 100644
--- a/scripts/deprecated/test_curl.sh
+++ b/scripts/deprecated/test_curl.sh
@@ -3,7 +3,7 @@ curl http://localhost:30000/generate \
   -d '{
     "text": "Once upon a time,",
     "sampling_params": {
-      "max_new_tokens": 16,
+      "max_new_tokens": 64,
       "temperature": 0
     }
   }'
diff --git a/scripts/deprecated/test_httpserver_llava.py b/scripts/deprecated/test_httpserver_llava.py
index a7912fcc2f9..791fc6deb1f 100644
--- a/scripts/deprecated/test_httpserver_llava.py
+++ b/scripts/deprecated/test_httpserver_llava.py
@@ -36,7 +36,7 @@ async def test_concurrent(args):
                     "image_data": "example_image.png",
                     "sampling_params": {
                         "temperature": 0,
-                        "max_new_tokens": 16,
+                        "max_new_tokens": 64,
                     },
                 },
             )

From 1231e880bec43570addc9f25d39443a0720a6d12 Mon Sep 17 00:00:00 2001
From: Ying Sheng <sqy1415@gmail.com>
Date: Wed, 7 Aug 2024 17:15:59 -0700
Subject: [PATCH 2/3] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 2b0e52c4c82..1bb6f13d0f8 100644
--- a/README.md
+++ b/README.md
@@ -99,7 +99,7 @@ curl http://localhost:30000/generate \
   -d '{
     "text": "Once upon a time,",
     "sampling_params": {
-      "max_new_tokens": 128,
+      "max_new_tokens": 16,
       "temperature": 0
     }
   }'

From a3ef9017c4bb2675baee11e00bf3e23ae115ebd8 Mon Sep 17 00:00:00 2001
From: Ying Sheng <sqy1415@gmail.com>
Date: Wed, 7 Aug 2024 17:16:04 -0700
Subject: [PATCH 3/3] Update docs/en/sampling_params.md

---
 docs/en/sampling_params.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sampling_params.md b/docs/en/sampling_params.md
index 5aad20a6e27..782bb1fb687 100644
--- a/docs/en/sampling_params.md
+++ b/docs/en/sampling_params.md
@@ -133,7 +133,7 @@ response = requests.post(
         "image_data": "example_image.png",
         "sampling_params": {
             "temperature": 0,
-            "max_new_tokens": 128,
+            "max_new_tokens": 32,
         },
     },
 )