From bffba8ec8be8dd1e6f51de31dc0f3844ed08eb8d Mon Sep 17 00:00:00 2001 From: Ying Sheng Date: Thu, 1 Aug 2024 19:13:35 +0000 Subject: [PATCH 1/8] fix unit tests --- python/sglang/test/test_programs.py | 9 +++--- test/lang/{run_all.py => run_suite.py} | 17 ++++++++++- test/lang/test_openai_backend.py | 40 ++++++++++++-------------- test/lang/test_srt_backend.py | 28 +++++++----------- 4 files changed, 49 insertions(+), 45 deletions(-) rename test/lang/{run_all.py => run_suite.py} (77%) diff --git a/python/sglang/test/test_programs.py b/python/sglang/test/test_programs.py index 9ba794ac99c..014616bcf41 100644 --- a/python/sglang/test/test_programs.py +++ b/python/sglang/test/test_programs.py @@ -113,15 +113,14 @@ def decode_json(s): s += ' "population": ' + sgl.gen(regex=REGEX_INT + ",") + "\n" s += ' "area": ' + sgl.gen(regex=REGEX_INT + ",") + "\n" s += ' "latitude": ' + sgl.gen(regex=REGEX_FLOAT + ",") + "\n" - s += ' "country": ' + sgl.gen(regex=REGEX_STRING + ",") + "\n" - s += ' "timezone": ' + sgl.gen(regex=REGEX_STRING) + "\n" + s += ' "country": ' + sgl.gen(regex=REGEX_STRING) + "\n" s += "}" - ret = decode_json.run() + ret = decode_json.run(temperature=0.0) try: js_obj = json.loads(ret["json_output"]) except json.decoder.JSONDecodeError: - print(ret["json_output"]) + print("JSONDecodeError", ret["json_output"]) raise assert isinstance(js_obj["name"], str) assert isinstance(js_obj["population"], int) @@ -141,7 +140,7 @@ def decode_json(s): s += ' "timezone": ' + sgl.gen(dtype=str) + "\n" s += "}" - ret = decode_json.run() + ret = decode_json.run(max_new_tokens=64) js_obj = json.loads(ret["json_output"]) assert isinstance(js_obj["name"], str) assert isinstance(js_obj["population"], int) diff --git a/test/lang/run_all.py b/test/lang/run_suite.py similarity index 77% rename from test/lang/run_all.py rename to test/lang/run_suite.py index cb5da15850b..263b2a75c08 100644 --- a/test/lang/run_all.py +++ b/test/lang/run_suite.py @@ -8,6 +8,11 @@ from sglang.utils import run_with_timeout +suites = { + "minimal": ["test_openai_backend.py", "test_srt_backend.py"], +} + + def run_unittest_files(files, args): for filename in files: @@ -45,9 +50,19 @@ def run_one_file(): default=1000, help="The time limit for running one file in seconds.", ) + arg_parser.add_argument( + "--suite", + type=str, + default=list(suites.keys())[0], + choices=list(suites.keys()) + ["all"], + help="The suite to run", + ) args = arg_parser.parse_args() - files = glob.glob("**/test_*.py", recursive=True) + if args.suite == "all": + files = glob.glob("**/test_*.py", recursive=True) + else: + files = suites[args.suite] tic = time.time() success = run_unittest_files(files, args) diff --git a/test/lang/test_openai_backend.py b/test/lang/test_openai_backend.py index d35495e4d75..b1bb47b82f6 100644 --- a/test/lang/test_openai_backend.py +++ b/test/lang/test_openai_backend.py @@ -20,20 +20,18 @@ class TestOpenAIBackend(unittest.TestCase): - backend = None + instruct_backend = None chat_backend = None chat_vision_backend = None - def setUp(self): - cls = type(self) - - if cls.backend is None: - cls.backend = OpenAI("gpt-3.5-turbo-instruct") - cls.chat_backend = OpenAI("gpt-3.5-turbo") - cls.chat_vision_backend = OpenAI("gpt-4-turbo") + @classmethod + def setUpClass(cls): + cls.instruct_backend = OpenAI("gpt-3.5-turbo-instruct") + cls.chat_backend = OpenAI("gpt-3.5-turbo") + cls.chat_vision_backend = OpenAI("gpt-4-turbo") def test_few_shot_qa(self): - set_default_backend(self.backend) + set_default_backend(self.instruct_backend) test_few_shot_qa() def test_mt_bench(self): @@ -41,35 +39,35 @@ def test_mt_bench(self): test_mt_bench() def test_select(self): - set_default_backend(self.backend) + set_default_backend(self.instruct_backend) test_select(check_answer=True) def test_decode_int(self): - set_default_backend(self.backend) + set_default_backend(self.instruct_backend) test_decode_int() def test_decode_json(self): - set_default_backend(self.backend) + set_default_backend(self.instruct_backend) test_decode_json() def test_expert_answer(self): - set_default_backend(self.backend) + set_default_backend(self.instruct_backend) test_expert_answer() def test_tool_use(self): - set_default_backend(self.backend) + set_default_backend(self.instruct_backend) test_tool_use() def test_react(self): - set_default_backend(self.backend) + set_default_backend(self.instruct_backend) test_react() def test_parallel_decoding(self): - set_default_backend(self.backend) + set_default_backend(self.instruct_backend) test_parallel_decoding() def test_parallel_encoding(self): - set_default_backend(self.backend) + set_default_backend(self.instruct_backend) test_parallel_encoding() def test_image_qa(self): @@ -77,11 +75,11 @@ def test_image_qa(self): test_image_qa() def test_stream(self): - set_default_backend(self.backend) + set_default_backend(self.instruct_backend) test_stream() def test_completion_speculative(self): - set_default_backend(self.backend) + set_default_backend(self.instruct_backend) test_completion_speculative() def test_chat_completion_speculative(self): @@ -96,5 +94,5 @@ def test_chat_completion_speculative(self): # global_config.verbosity = 2 # t = TestOpenAIBackend() - # t.setUp() - # t.test_chat_completion_speculative() + # t.setUpClass() + # t.test_stream() diff --git a/test/lang/test_srt_backend.py b/test/lang/test_srt_backend.py index 9d2b8fd7a3c..fae53f0b16b 100644 --- a/test/lang/test_srt_backend.py +++ b/test/lang/test_srt_backend.py @@ -1,9 +1,3 @@ -""" -Usage: -python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --port 30000 -python3 test_srt_backend.py -""" - import json import unittest @@ -15,8 +9,6 @@ test_few_shot_qa, test_mt_bench, test_parallel_decoding, - test_parallel_encoding, - test_react, test_regex, test_select, test_stream, @@ -27,12 +19,14 @@ class TestSRTBackend(unittest.TestCase): backend = None - def setUp(self): - cls = type(self) + @classmethod + def setUpClass(cls): + cls.backend = sgl.Runtime(model_path="meta-llama/Meta-Llama-3-8B-Instruct") + sgl.set_default_backend(cls.backend) - if cls.backend is None: - cls.backend = sgl.RuntimeEndpoint(base_url="http://localhost:30000") - sgl.set_default_backend(cls.backend) + @classmethod + def tearDownClass(cls): + cls.backend.shutdown() def test_few_shot_qa(self): test_few_shot_qa() @@ -64,9 +58,6 @@ def test_stream(self): def test_regex(self): test_regex() - # def test_parallel_encoding(self): - # test_parallel_encoding(check_answer=False) - if __name__ == "__main__": unittest.main(warnings="ignore") @@ -75,5 +66,6 @@ def test_regex(self): # global_config.verbosity = 2 # t = TestSRTBackend() - # t.setUp() - # t.test_regex() + # t.setUpClass() + # t.test_few_shot_qa() + # t.tearDownClass() From e77ee8225f06dc3ef11296f23471689da390d755 Mon Sep 17 00:00:00 2001 From: Ying Sheng Date: Thu, 1 Aug 2024 19:17:30 +0000 Subject: [PATCH 2/8] rewrite test style --- python/sglang/srt/server.py | 3 +++ test/lang/test_bind_cache.py | 20 ++++++++------------ 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py index 6a7f8f420d8..2f7cee46341 100644 --- a/python/sglang/srt/server.py +++ b/python/sglang/srt/server.py @@ -479,6 +479,9 @@ def shutdown(self): parent.wait(timeout=5) self.pid = None + def cache_prefix(self, prefix: str): + self.endpoint.cache_pprefix(prefix) + def get_tokenizer(self): return get_tokenizer( self.server_args.tokenizer_path, diff --git a/test/lang/test_bind_cache.py b/test/lang/test_bind_cache.py index 378378175e3..702f27c06f2 100644 --- a/test/lang/test_bind_cache.py +++ b/test/lang/test_bind_cache.py @@ -1,23 +1,19 @@ -""" -Usage: -python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --port 30000 -python3 test_bind_cache.py -""" - import unittest import sglang as sgl -from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint class TestBind(unittest.TestCase): backend = None - def setUp(self): - cls = type(self) + @classmethod + def setUpClass(cls): + cls.backend = sgl.Runtime(model_path="meta-llama/Meta-Llama-3-8B-Instruct") + sgl.set_default_backend(cls.backend) - if cls.backend is None: - cls.backend = RuntimeEndpoint(base_url="http://localhost:30000") + @classmethod + def tearDownClass(cls): + cls.backend.shutdown() def test_bind(self): @sgl.function @@ -54,5 +50,5 @@ def few_shot_qa(s, prompt, question): unittest.main(warnings="ignore") # t = TestBind() - # t.setUp() + # t.setUpClass() # t.test_cache() From 050ca0c3fbb735b38c6e2f5b6bbfaa795b80381a Mon Sep 17 00:00:00 2001 From: Ying Sheng Date: Thu, 1 Aug 2024 19:17:56 +0000 Subject: [PATCH 3/8] fix --- python/sglang/srt/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py index 2f7cee46341..8c7b8ee610d 100644 --- a/python/sglang/srt/server.py +++ b/python/sglang/srt/server.py @@ -480,7 +480,7 @@ def shutdown(self): self.pid = None def cache_prefix(self, prefix: str): - self.endpoint.cache_pprefix(prefix) + self.endpoint.cache_prefix(prefix) def get_tokenizer(self): return get_tokenizer( From fb05cbc3d2565f28f401168ab5157a7a75de47f9 Mon Sep 17 00:00:00 2001 From: Ying Sheng Date: Thu, 1 Aug 2024 19:18:58 +0000 Subject: [PATCH 4/8] fix lint --- test/lang/run_suite.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/lang/run_suite.py b/test/lang/run_suite.py index 263b2a75c08..4b0c961ef7c 100644 --- a/test/lang/run_suite.py +++ b/test/lang/run_suite.py @@ -7,7 +7,6 @@ from sglang.utils import run_with_timeout - suites = { "minimal": ["test_openai_backend.py", "test_srt_backend.py"], } From cb30e5e28af92995c8f7d5e174dd61de7475e71a Mon Sep 17 00:00:00 2001 From: Ying Sheng Date: Thu, 1 Aug 2024 19:25:33 +0000 Subject: [PATCH 5/8] update other backends --- python/sglang/lang/ir.py | 1 - test/lang/test_anthropic_backend.py | 11 ++++------- test/lang/test_litellm_backend.py | 11 ++++------- 3 files changed, 8 insertions(+), 15 deletions(-) diff --git a/python/sglang/lang/ir.py b/python/sglang/lang/ir.py index 23537f35018..2ee167f86a6 100644 --- a/python/sglang/lang/ir.py +++ b/python/sglang/lang/ir.py @@ -99,7 +99,6 @@ def to_litellm_kwargs(self): "stop": self.stop or None, "temperature": self.temperature, "top_p": self.top_p, - "top_k": self.top_k, "frequency_penalty": self.frequency_penalty, "presence_penalty": self.presence_penalty, } diff --git a/test/lang/test_anthropic_backend.py b/test/lang/test_anthropic_backend.py index 3eb4051d739..9c2dcbd276b 100644 --- a/test/lang/test_anthropic_backend.py +++ b/test/lang/test_anthropic_backend.py @@ -7,14 +7,11 @@ class TestAnthropicBackend(unittest.TestCase): backend = None - chat_backend = None - def setUp(self): - cls = type(self) - - if cls.backend is None: - cls.backend = Anthropic("claude-3-haiku-20240307") - set_default_backend(cls.backend) + @classmethod + def setUpClass(cls): + cls.backend = Anthropic("claude-3-haiku-20240307") + set_default_backend(cls.backend) def test_mt_bench(self): test_mt_bench() diff --git a/test/lang/test_litellm_backend.py b/test/lang/test_litellm_backend.py index 15d83bd517a..3c7f5db2182 100644 --- a/test/lang/test_litellm_backend.py +++ b/test/lang/test_litellm_backend.py @@ -6,15 +6,12 @@ class TestAnthropicBackend(unittest.TestCase): - backend = None chat_backend = None - def setUp(self): - cls = type(self) - - if cls.backend is None: - cls.backend = LiteLLM("gpt-3.5-turbo") - set_default_backend(cls.backend) + @classmethod + def setUpClass(cls): + cls.chat_backend = LiteLLM("gpt-3.5-turbo") + set_default_backend(cls.chat_backend) def test_mt_bench(self): test_mt_bench() From 4887c127c84ce64316267d5889affb20cdc35889 Mon Sep 17 00:00:00 2001 From: Ying Sheng Date: Thu, 1 Aug 2024 19:28:02 +0000 Subject: [PATCH 6/8] update --- test/lang/test_anthropic_backend.py | 2 +- test/lang/test_vertexai_backend.py | 14 ++++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/test/lang/test_anthropic_backend.py b/test/lang/test_anthropic_backend.py index 9c2dcbd276b..87b27a765a3 100644 --- a/test/lang/test_anthropic_backend.py +++ b/test/lang/test_anthropic_backend.py @@ -27,5 +27,5 @@ def test_stream(self): # global_config.verbosity = 2 # t = TestAnthropicBackend() - # t.setUp() + # t.setUpClass() # t.test_mt_bench() diff --git a/test/lang/test_vertexai_backend.py b/test/lang/test_vertexai_backend.py index aae840101ac..b29efaa75ad 100644 --- a/test/lang/test_vertexai_backend.py +++ b/test/lang/test_vertexai_backend.py @@ -17,13 +17,11 @@ class TestVertexAIBackend(unittest.TestCase): chat_backend = None chat_vision_backend = None - def setUp(self): - cls = type(self) - - if cls.backend is None: - cls.backend = VertexAI("gemini-pro") - cls.chat_backend = VertexAI("gemini-pro") - cls.chat_vision_backend = VertexAI("gemini-pro-vision") + @classmethod + def setUpClass(cls): + cls.backend = VertexAI("gemini-pro") + cls.chat_backend = VertexAI("gemini-pro") + cls.chat_vision_backend = VertexAI("gemini-pro-vision") def test_few_shot_qa(self): set_default_backend(self.backend) @@ -61,5 +59,5 @@ def test_stream(self): # global_config.verbosity = 2 # t = TestVertexAIBackend() - # t.setUp() + # t.setUpClass() # t.test_stream() From 2476fd1d72dd5b3ab26d87eafa28bcf3959450b2 Mon Sep 17 00:00:00 2001 From: Ying Sheng Date: Thu, 1 Aug 2024 19:32:52 +0000 Subject: [PATCH 7/8] fix oai json decode --- python/sglang/test/test_programs.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/sglang/test/test_programs.py b/python/sglang/test/test_programs.py index 014616bcf41..6ab41d10f7f 100644 --- a/python/sglang/test/test_programs.py +++ b/python/sglang/test/test_programs.py @@ -141,7 +141,11 @@ def decode_json(s): s += "}" ret = decode_json.run(max_new_tokens=64) - js_obj = json.loads(ret["json_output"]) + try: + js_obj = json.loads(ret["json_output"]) + except json.decoder.JSONDecodeError: + print("JSONDecodeError", ret["json_output"]) + raise assert isinstance(js_obj["name"], str) assert isinstance(js_obj["population"], int) From c75a601d89fef2284a0ffdf84d12094d7d40f082 Mon Sep 17 00:00:00 2001 From: Ying Sheng Date: Thu, 1 Aug 2024 19:34:04 +0000 Subject: [PATCH 8/8] update --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index c0b83dd3a6a..07614050640 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,4 +1,4 @@ -name: lint +name: Lint on: [push, pull_request]