From b2a1812df2d0d616a228c0ac6c8b31a71ef54e6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Szabolcs=20Foszt=C3=B3?= Date: Wed, 7 Aug 2024 13:17:24 +0200 Subject: [PATCH 1/3] #590 Increase default , track changes in examples and documentation --- README.md | 2 +- docs/en/sampling_params.md | 6 +++--- python/sglang/lang/compiler.py | 4 ++-- python/sglang/lang/ir.py | 6 +++--- python/sglang/srt/sampling_params.py | 2 +- scripts/deprecated/test_curl.sh | 2 +- scripts/deprecated/test_httpserver_llava.py | 2 +- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 1bb6f13d0f8..2b0e52c4c82 100644 --- a/README.md +++ b/README.md @@ -99,7 +99,7 @@ curl http://localhost:30000/generate \ -d '{ "text": "Once upon a time,", "sampling_params": { - "max_new_tokens": 16, + "max_new_tokens": 128, "temperature": 0 } }' diff --git a/docs/en/sampling_params.md b/docs/en/sampling_params.md index 0ea07c01759..5aad20a6e27 100644 --- a/docs/en/sampling_params.md +++ b/docs/en/sampling_params.md @@ -33,7 +33,7 @@ The `sampling_params` follows this format ```python # The maximum number of output tokens -max_new_tokens: int = 16, +max_new_tokens: int = 128, # Stop when hitting any of the strings in this list. stop: Optional[Union[str, List[str]]] = None, # Sampling temperature @@ -90,7 +90,7 @@ response = requests.post( "text": "The capital of France is", "sampling_params": { "temperature": 0, - "max_new_tokens": 256, + "max_new_tokens": 32, }, "stream": True, }, @@ -133,7 +133,7 @@ response = requests.post( "image_data": "example_image.png", "sampling_params": { "temperature": 0, - "max_new_tokens": 32, + "max_new_tokens": 128, }, }, ) diff --git a/python/sglang/lang/compiler.py b/python/sglang/lang/compiler.py index 36287cd397c..95af04adb0a 100644 --- a/python/sglang/lang/compiler.py +++ b/python/sglang/lang/compiler.py @@ -125,7 +125,7 @@ def run_internal( def run( self, *, - max_new_tokens: int = 16, + max_new_tokens: int = 128, stop: Union[str, List[str]] = (), temperature: float = 1.0, top_p: float = 1.0, @@ -155,7 +155,7 @@ def run_batch( self, batch_kwargs, *, - max_new_tokens: int = 16, + max_new_tokens: int = 128, stop: Union[str, List[str]] = (), temperature: float = 1.0, top_p: float = 1.0, diff --git a/python/sglang/lang/ir.py b/python/sglang/lang/ir.py index d902497c76e..135110c1e0d 100644 --- a/python/sglang/lang/ir.py +++ b/python/sglang/lang/ir.py @@ -16,7 +16,7 @@ @dataclasses.dataclass class SglSamplingParams: - max_new_tokens: int = 16 + max_new_tokens: int = 128 stop: Union[str, List[str]] = () temperature: float = 1.0 top_p: float = 1.0 @@ -140,7 +140,7 @@ def bind(self, **kwargs): def run( self, *args, - max_new_tokens: int = 16, + max_new_tokens: int = 128, stop: Union[str, List[str]] = (), temperature: float = 1.0, top_p: float = 1.0, @@ -179,7 +179,7 @@ def run_batch( self, batch_kwargs, *, - max_new_tokens: int = 16, + max_new_tokens: int = 128, stop: Union[str, List[str]] = (), temperature: float = 1.0, top_p: float = 1.0, diff --git a/python/sglang/srt/sampling_params.py b/python/sglang/srt/sampling_params.py index f6582cf41bd..89091b7ae3f 100644 --- a/python/sglang/srt/sampling_params.py +++ b/python/sglang/srt/sampling_params.py @@ -23,7 +23,7 @@ class SamplingParams: def __init__( self, - max_new_tokens: int = 16, + max_new_tokens: int = 128, stop: Optional[Union[str, List[str]]] = None, temperature: float = 1.0, top_p: float = 1.0, diff --git a/scripts/deprecated/test_curl.sh b/scripts/deprecated/test_curl.sh index 4362eaa9355..1c83208a759 100644 --- a/scripts/deprecated/test_curl.sh +++ b/scripts/deprecated/test_curl.sh @@ -3,7 +3,7 @@ curl http://localhost:30000/generate \ -d '{ "text": "Once upon a time,", "sampling_params": { - "max_new_tokens": 16, + "max_new_tokens": 64, "temperature": 0 } }' diff --git a/scripts/deprecated/test_httpserver_llava.py b/scripts/deprecated/test_httpserver_llava.py index a7912fcc2f9..791fc6deb1f 100644 --- a/scripts/deprecated/test_httpserver_llava.py +++ b/scripts/deprecated/test_httpserver_llava.py @@ -36,7 +36,7 @@ async def test_concurrent(args): "image_data": "example_image.png", "sampling_params": { "temperature": 0, - "max_new_tokens": 16, + "max_new_tokens": 64, }, }, ) From 1231e880bec43570addc9f25d39443a0720a6d12 Mon Sep 17 00:00:00 2001 From: Ying Sheng Date: Wed, 7 Aug 2024 17:15:59 -0700 Subject: [PATCH 2/3] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2b0e52c4c82..1bb6f13d0f8 100644 --- a/README.md +++ b/README.md @@ -99,7 +99,7 @@ curl http://localhost:30000/generate \ -d '{ "text": "Once upon a time,", "sampling_params": { - "max_new_tokens": 128, + "max_new_tokens": 16, "temperature": 0 } }' From a3ef9017c4bb2675baee11e00bf3e23ae115ebd8 Mon Sep 17 00:00:00 2001 From: Ying Sheng Date: Wed, 7 Aug 2024 17:16:04 -0700 Subject: [PATCH 3/3] Update docs/en/sampling_params.md --- docs/en/sampling_params.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sampling_params.md b/docs/en/sampling_params.md index 5aad20a6e27..782bb1fb687 100644 --- a/docs/en/sampling_params.md +++ b/docs/en/sampling_params.md @@ -133,7 +133,7 @@ response = requests.post( "image_data": "example_image.png", "sampling_params": { "temperature": 0, - "max_new_tokens": 128, + "max_new_tokens": 32, }, }, )