sgl-project · merrymercy · Aug 26, 2024 · Aug 26, 2024 · Aug 26, 2024 · Aug 26, 2024
diff --git a/.github/workflows/accuracy-test.yml b/.github/workflows/accuracy-test.yml
@@ -23,21 +23,21 @@ jobs:
     runs-on: 1-gpu-runner
 
     steps:
-    - name: Checkout code
-      uses: actions/checkout@v3
+      - name: Checkout code
+        uses: actions/checkout@v3
 
-    - name: Install dependencies
-      run: |
-        pip install --upgrade pip
-        pip install -e "python[all]"
-        pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install -e "python[all]"
+          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
 
-        git clone https://github.com/merrymercy/human-eval.git
-        cd human-eval
-        pip install -e .
+          git clone https://github.com/merrymercy/human-eval.git
+          cd human-eval
+          pip install -e .
 
-    - name: Evaluate Accuracy
-      timeout-minutes: 20
-      run: |
-        cd test/srt
-        python3 test_eval_accuracy_large.py
+      - name: Evaluate Accuracy
+        timeout-minutes: 20
+        run: |
+          cd test/srt
+          python3 test_eval_accuracy_large.py
diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml
@@ -23,29 +23,29 @@ jobs:
     runs-on: 1-gpu-runner
 
     steps:
-    - name: Checkout code
-      uses: actions/checkout@v3
-
-    - name: Install dependencies
-      run: |
-        pip install --upgrade pip
-        pip install -e "python[all]"
-        pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
-
-    - name: Benchmark Serving Throughput
-      timeout-minutes: 10
-      run: |
-        cd test/srt
-        python3 -m unittest test_serving_throughput.TestServingThroughput.test_default
-
-    - name: Benchmark Serving Throughput (w/o RadixAttention)
-      timeout-minutes: 10
-      run: |
-        cd test/srt
-        python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache
-
-    - name: Benchmark Serving Throughput (w/o ChunkedPrefill)
-      timeout-minutes: 10
-      run: |
-        cd test/srt
-        python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install -e "python[all]"
+          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+
+      - name: Benchmark Serving Throughput
+        timeout-minutes: 10
+        run: |
+          cd test/srt
+          python3 -m unittest test_serving_throughput.TestServingThroughput.test_default
+
+      - name: Benchmark Serving Throughput (w/o RadixAttention)
+        timeout-minutes: 10
+        run: |
+          cd test/srt
+          python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache
+
+      - name: Benchmark Serving Throughput (w/o ChunkedPrefill)
+        timeout-minutes: 10
+        run: |
+          cd test/srt
+          python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill
diff --git a/.github/workflows/moe-test.yml b/.github/workflows/moe-test.yml
@@ -23,23 +23,23 @@ jobs:
     runs-on: 2-gpu-runner
 
     steps:
-    - name: Checkout code
-      uses: actions/checkout@v3
+      - name: Checkout code
+        uses: actions/checkout@v3
 
-    - name: Install dependencies
-      run: |
-        pip install --upgrade pip
-        pip install -e "python[all]"
-        pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install -e "python[all]"
+          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
 
-    - name: Benchmark MoE Serving Throughput
-      timeout-minutes: 10
-      run: |
-        cd test/srt
-        python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
+      - name: Benchmark MoE Serving Throughput
+        timeout-minutes: 10
+        run: |
+          cd test/srt
+          python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
 
-    - name: Benchmark MoE Serving Throughput (w/o RadixAttention)
-      timeout-minutes: 10
-      run: |
-        cd test/srt
-        python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
+      - name: Benchmark MoE Serving Throughput (w/o RadixAttention)
+        timeout-minutes: 10
+        run: |
+          cd test/srt
+          python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml
@@ -18,28 +18,38 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  unit-test:
+  unit-test-jobs:
     if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
     runs-on: 1-gpu-runner
-
+    strategy:
+      matrix:
+        test_type: ['backend-0', 'backend-1', 'frontend']
     steps:
-    - name: Checkout code
-      uses: actions/checkout@v3
+      - name: Checkout code
+        uses: actions/checkout@v3
 
-    - name: Install dependencies
-      run: |
-        pip install --upgrade pip
-        pip install -e "python[dev]"
-        pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install -e "python[dev]"
+          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
 
-    - name: Test Backend Runtime
-      timeout-minutes: 20
-      run: |
-        cd test/srt
-        python3 run_suite.py --suite minimal
+      - name: Run test
+        timeout-minutes: 20
+        run: |
+          if [ "${{ matrix.test_type }}" = "frontend" ]; then
+            cd test/lang
+            python3 run_suite.py --suite minimal
+          elif [ "${{ matrix.test_type }}" = "backend-0" ]; then
+            cd test/srt
+            python3 run_suite.py --suite minimal --range-begin 0 --range-end 8
+          elif [ "${{ matrix.test_type }}" = "backend-1" ]; then
+            cd test/srt
+            python3 run_suite.py --suite minimal --range-begin 8
+          fi
 
-    - name: Test Frontend Language
-      timeout-minutes: 10
-      run: |
-        cd test/lang
-        python3 run_suite.py --suite minimal
+  unit-test:
+    needs: unit-test
+    runs-on: ubuntu-latest
+    steps:
+      # Intentionally left empty
diff --git a/python/sglang/test/test_utils.py b/python/sglang/test/test_utils.py
@@ -465,7 +465,7 @@ def run_unittest_files(files: List[str], timeout_per_file: float):
 
         def run_one_file(filename):
             filename = os.path.join(os.getcwd(), filename)
-            print(f"\n\nRun {filename}\n\n", flush=True)
+            print(f"\n\nRun:\npython3 {filename}\n\n", flush=True)
             process = subprocess.Popen(
                 ["python3", filename], stdout=None, stderr=None, env=os.environ
             )

@@ -1,6 +1,5 @@
 import argparse
 import glob
-import multiprocessing as mp
 
 from sglang.test.test_utils import run_unittest_files
 
@@ -49,12 +48,26 @@
         choices=list(suites.keys()) + ["all"],
         help="The suite to run",
     )
+    arg_parser.add_argument(
+        "--range-begin",
+        type=int,
+        default=0,
+        help="The begin index of the range of the files to run.",
+    )
+    arg_parser.add_argument(
+        "--range-end",
+        type=int,
+        default=None,
+        help="The end index of the range of the files to run.",
+    )
     args = arg_parser.parse_args()
 
     if args.suite == "all":
         files = glob.glob("**/test_*.py", recursive=True)
     else:
         files = suites[args.suite]
 
+    files = files[args.range_begin : args.range_end]
+
     exit_code = run_unittest_files(files, args.timeout_per_file)
     exit(exit_code)