diff --git a/README.md b/README.md index ebb7f1288f6..c9cdc86a143 100644 --- a/README.md +++ b/README.md @@ -41,15 +41,10 @@ The core features include: - **Active Community**: SGLang is open-source and backed by an active community with industry adoption. ## Getting Started -Install SGLang: See [https://sgl-project.github.io/start/install.html](https://sgl-project.github.io/start/install.html) - -Send requests: See [https://sgl-project.github.io/start/send_request.html](https://sgl-project.github.io/start/send_request.html) - -## Backend: SGLang Runtime (SRT) -See [https://sgl-project.github.io/backend/backend.html](https://sgl-project.github.io/backend/backend.html) - -## Frontend: Structured Generation Language (SGLang) -See [https://sgl-project.github.io/frontend/frontend.html](https://sgl-project.github.io/frontend/frontend.html) +- [Install SGLang](https://sgl-project.github.io/start/install.html) +- [Send requests](https://sgl-project.github.io/start/send_request.html) +- [Backend: SGLang Runtime (SRT)](https://sgl-project.github.io/backend/backend.html) +- [Frontend: Structured Generation Language (SGLang)](https://sgl-project.github.io/frontend/frontend.html) ## Benchmark And Performance Learn more in our release blogs: [v0.2 blog](https://lmsys.org/blog/2024-07-25-sglang-llama3/), [v0.3 blog](https://lmsys.org/blog/2024-09-04-sglang-v0-3/) @@ -57,6 +52,9 @@ Learn more in our release blogs: [v0.2 blog](https://lmsys.org/blog/2024-07-25-s ## Roadmap [Development Roadmap (2024 Q4)](https://github.com/sgl-project/sglang/issues/1487) -## Citation And Acknowledgment +## Adoption and Sponsorship +The project is supported by (alphabetically): AMD, Baseten, Etched, Hyperbolic, Jam & Tea Studios, LinkedIn, NVIDIA, RunPod, Stanford, UC Berkeley, and xAI. + +## Acknowledgment and Citation +We learned from the design and reused code from the following projects: [Guidance](https://github.com/guidance-ai/guidance), [vLLM](https://github.com/vllm-project/vllm), [LightLLM](https://github.com/ModelTC/lightllm), [FlashInfer](https://github.com/flashinfer-ai/flashinfer), [Outlines](https://github.com/outlines-dev/outlines), and [LMQL](https://github.com/eth-sri/lmql). Please cite our paper, [SGLang: Efficient Execution of Structured Language Model Programs](https://arxiv.org/abs/2312.07104), if you find the project useful. -We also learned from the design and reused code from the following projects: [Guidance](https://github.com/guidance-ai/guidance), [vLLM](https://github.com/vllm-project/vllm), [LightLLM](https://github.com/ModelTC/lightllm), [FlashInfer](https://github.com/flashinfer-ai/flashinfer), [Outlines](https://github.com/outlines-dev/outlines), and [LMQL](https://github.com/eth-sri/lmql). diff --git a/docs/backend/offline_engine_api.ipynb b/docs/backend/offline_engine_api.ipynb index cdb2aa19013..7ce89d435d5 100644 --- a/docs/backend/offline_engine_api.ipynb +++ b/docs/backend/offline_engine_api.ipynb @@ -39,7 +39,6 @@ "# launch the offline engine\n", "\n", "import sglang as sgl\n", - "from sglang.utils import print_highlight\n", "import asyncio\n", "\n", "llm = sgl.Engine(model_path=\"meta-llama/Meta-Llama-3.1-8B-Instruct\")" @@ -69,8 +68,8 @@ "\n", "outputs = llm.generate(prompts, sampling_params)\n", "for prompt, output in zip(prompts, outputs):\n", - " print_highlight(\"===============================\")\n", - " print_highlight(f\"Prompt: {prompt}\\nGenerated text: {output['text']}\")" + " print(\"===============================\")\n", + " print(f\"Prompt: {prompt}\\nGenerated text: {output['text']}\")" ] }, { @@ -93,10 +92,10 @@ "]\n", "sampling_params = {\"temperature\": 0.8, \"top_p\": 0.95}\n", "\n", - "print_highlight(\"\\n=== Testing synchronous streaming generation ===\")\n", + "print(\"\\n=== Testing synchronous streaming generation ===\")\n", "\n", "for prompt in prompts:\n", - " print_highlight(f\"\\nPrompt: {prompt}\")\n", + " print(f\"\\nPrompt: {prompt}\")\n", " print(\"Generated text: \", end=\"\", flush=True)\n", "\n", " for chunk in llm.generate(prompt, sampling_params, stream=True):\n", @@ -125,15 +124,15 @@ "\n", "sampling_params = {\"temperature\": 0.8, \"top_p\": 0.95}\n", "\n", - "print_highlight(\"\\n=== Testing asynchronous batch generation ===\")\n", + "print(\"\\n=== Testing asynchronous batch generation ===\")\n", "\n", "\n", "async def main():\n", " outputs = await llm.async_generate(prompts, sampling_params)\n", "\n", " for prompt, output in zip(prompts, outputs):\n", - " print_highlight(f\"\\nPrompt: {prompt}\")\n", - " print_highlight(f\"Generated text: {output['text']}\")\n", + " print(f\"\\nPrompt: {prompt}\")\n", + " print(f\"Generated text: {output['text']}\")\n", "\n", "\n", "asyncio.run(main())" @@ -159,12 +158,12 @@ "]\n", "sampling_params = {\"temperature\": 0.8, \"top_p\": 0.95}\n", "\n", - "print_highlight(\"\\n=== Testing asynchronous streaming generation ===\")\n", + "print(\"\\n=== Testing asynchronous streaming generation ===\")\n", "\n", "\n", "async def main():\n", " for prompt in prompts:\n", - " print_highlight(f\"\\nPrompt: {prompt}\")\n", + " print(f\"\\nPrompt: {prompt}\")\n", " print(\"Generated text: \", end=\"\", flush=True)\n", "\n", " generator = await llm.async_generate(prompt, sampling_params, stream=True)\n",