diff --git a/docs/en/sampling_params.md b/docs/en/sampling_params.md index 0ea07c01759..782bb1fb687 100644 --- a/docs/en/sampling_params.md +++ b/docs/en/sampling_params.md @@ -33,7 +33,7 @@ The `sampling_params` follows this format ```python # The maximum number of output tokens -max_new_tokens: int = 16, +max_new_tokens: int = 128, # Stop when hitting any of the strings in this list. stop: Optional[Union[str, List[str]]] = None, # Sampling temperature @@ -90,7 +90,7 @@ response = requests.post( "text": "The capital of France is", "sampling_params": { "temperature": 0, - "max_new_tokens": 256, + "max_new_tokens": 32, }, "stream": True, }, diff --git a/python/sglang/lang/compiler.py b/python/sglang/lang/compiler.py index 36287cd397c..95af04adb0a 100644 --- a/python/sglang/lang/compiler.py +++ b/python/sglang/lang/compiler.py @@ -125,7 +125,7 @@ def run_internal( def run( self, *, - max_new_tokens: int = 16, + max_new_tokens: int = 128, stop: Union[str, List[str]] = (), temperature: float = 1.0, top_p: float = 1.0, @@ -155,7 +155,7 @@ def run_batch( self, batch_kwargs, *, - max_new_tokens: int = 16, + max_new_tokens: int = 128, stop: Union[str, List[str]] = (), temperature: float = 1.0, top_p: float = 1.0, diff --git a/python/sglang/lang/ir.py b/python/sglang/lang/ir.py index d902497c76e..135110c1e0d 100644 --- a/python/sglang/lang/ir.py +++ b/python/sglang/lang/ir.py @@ -16,7 +16,7 @@ @dataclasses.dataclass class SglSamplingParams: - max_new_tokens: int = 16 + max_new_tokens: int = 128 stop: Union[str, List[str]] = () temperature: float = 1.0 top_p: float = 1.0 @@ -140,7 +140,7 @@ def bind(self, **kwargs): def run( self, *args, - max_new_tokens: int = 16, + max_new_tokens: int = 128, stop: Union[str, List[str]] = (), temperature: float = 1.0, top_p: float = 1.0, @@ -179,7 +179,7 @@ def run_batch( self, batch_kwargs, *, - max_new_tokens: int = 16, + max_new_tokens: int = 128, stop: Union[str, List[str]] = (), temperature: float = 1.0, top_p: float = 1.0, diff --git a/python/sglang/srt/sampling_params.py b/python/sglang/srt/sampling_params.py index f6582cf41bd..89091b7ae3f 100644 --- a/python/sglang/srt/sampling_params.py +++ b/python/sglang/srt/sampling_params.py @@ -23,7 +23,7 @@ class SamplingParams: def __init__( self, - max_new_tokens: int = 16, + max_new_tokens: int = 128, stop: Optional[Union[str, List[str]]] = None, temperature: float = 1.0, top_p: float = 1.0, diff --git a/scripts/deprecated/test_curl.sh b/scripts/deprecated/test_curl.sh index 4362eaa9355..1c83208a759 100644 --- a/scripts/deprecated/test_curl.sh +++ b/scripts/deprecated/test_curl.sh @@ -3,7 +3,7 @@ curl http://localhost:30000/generate \ -d '{ "text": "Once upon a time,", "sampling_params": { - "max_new_tokens": 16, + "max_new_tokens": 64, "temperature": 0 } }' diff --git a/scripts/deprecated/test_httpserver_llava.py b/scripts/deprecated/test_httpserver_llava.py index a7912fcc2f9..791fc6deb1f 100644 --- a/scripts/deprecated/test_httpserver_llava.py +++ b/scripts/deprecated/test_httpserver_llava.py @@ -36,7 +36,7 @@ async def test_concurrent(args): "image_data": "example_image.png", "sampling_params": { "temperature": 0, - "max_new_tokens": 16, + "max_new_tokens": 64, }, }, )