[ci/build] fix gh200 test (vllm-project#12681)

Signed-off-by: youkaichao <[email protected]>
neuralmagic · Feb 3, 2025 · 1298a40 · 1298a40
1 parent ad4a9dc
commit 1298a40
Show file tree

Hide file tree

Showing 3 changed files with 5 additions and 5 deletions.
diff --git a/.buildkite/check-wheel-size.py b/.buildkite/check-wheel-size.py
@@ -4,11 +4,11 @@
 import sys
 import zipfile
 
-# Read the VLLM_MAX_SIZE_MB environment variable, defaulting to 300 MiB
+# Read the VLLM_MAX_SIZE_MB environment variable, defaulting to 400 MiB
 # Note that we have 400 MiB quota, please use it wisely.
 # See https://github.com/pypi/support/issues/3792 .
 # Please also sync the value with the one in Dockerfile.
-VLLM_MAX_SIZE_MB = int(os.environ.get('VLLM_MAX_SIZE_MB', 300))
+VLLM_MAX_SIZE_MB = int(os.environ.get('VLLM_MAX_SIZE_MB', 400))
 
 
 def print_top_10_largest_files(zip_file):

diff --git a/.buildkite/run-gh200-test.sh b/.buildkite/run-gh200-test.sh
@@ -23,6 +23,6 @@ trap remove_docker_container EXIT
 remove_docker_container
 
 # Run the image and test offline inference
-docker run --name gh200-test --gpus=all --entrypoint="" gh200-test bash -c '
-    python3 examples/offline_inference/basic.py
+docker run -e HF_TOKEN -v /root/.cache/huggingface:/root/.cache/huggingface --name gh200-test --gpus=all --entrypoint="" gh200-test bash -c '
+    python3 examples/offline_inference/cli.py --model meta-llama/Llama-3.2-1B
 '
diff --git a/Dockerfile b/Dockerfile
@@ -127,7 +127,7 @@ RUN --mount=type=cache,target=/root/.cache/ccache \
 # Check the size of the wheel if RUN_WHEEL_CHECK is true
 COPY .buildkite/check-wheel-size.py check-wheel-size.py
 # sync the default value with .buildkite/check-wheel-size.py
-ARG VLLM_MAX_SIZE_MB=300
+ARG VLLM_MAX_SIZE_MB=400
 ENV VLLM_MAX_SIZE_MB=$VLLM_MAX_SIZE_MB
 ARG RUN_WHEEL_CHECK=true
 RUN if [ "$RUN_WHEEL_CHECK" = "true" ]; then \