Refs/heads/eliasj42/llama runner migration (#938)

moving llama workflows to mi300x machine moved PkgCI - shark-ai (this workkflow was already on the OSSCI cluster but was using an outdated runner name), CI - sharktank perplexity short, CI - sharktank perplexity and Llama Benchmarking 8B Tests to new OSSCI cluster arc runners on mi300x machines --------- Signed-off-by: Elias Joseph <[email protected]> Signed-off-by: root <[email protected]> Co-authored-by: Elias Joseph <[email protected]> Co-authored-by: root <[email protected]>
nod-ai · Feb 8, 2025 · 3ad85b3 · 3ad85b3
1 parent 2c61420
commit 3ad85b3
Show file tree

Hide file tree

Showing 4 changed files with 8 additions and 8 deletions.
diff --git a/.github/workflows/ci-llama-quick-tests.yaml b/.github/workflows/ci-llama-quick-tests.yaml
@@ -28,7 +28,7 @@ jobs:
       matrix:
         version: [3.11]
       fail-fast: false
-    runs-on: llama-mi300x-1
+    runs-on: linux-mi300-1gpu-ossci
     defaults:
       run:
         shell: bash

diff --git a/.github/workflows/ci_eval.yaml b/.github/workflows/ci_eval.yaml
@@ -28,7 +28,7 @@ jobs:
     strategy:
       matrix:
         version: [3.11]
-        runs-on: [llama-mi300x-3]
+        runs-on: [linux-mi300-1gpu-ossci]
       fail-fast: false
     runs-on: ${{matrix.runs-on}}
     defaults:
@@ -65,7 +65,7 @@ jobs:
       - name: Run perplexity test with IREE
         run: |
           source ${VENV_DIR}/bin/activate
-          pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_iree_test.py --run-nightly-llama-tests --bs=100 --iree-device=hip://0 --iree-hip-target=gfx942 --iree-hal-target-device=hip --llama3-8b-f16-model-path=/data/llama3.1/weights/8b/fp16/llama3.1_8b_fp16_instruct.irpa --llama3-8b-tokenizer-path=/data/llama3.1/weights/8b/fp16/tokenizer_config.json --html=out/llm/llama/perplexity/iree_perplexity/index.html --log-cli-level=INFO
+          pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_iree_test.py --run-nightly-llama-tests --bs=100 --iree-device=hip://0 --iree-hip-target=gfx942 --iree-hal-target-device=hip --llama3-8b-f16-model-path=/shark-dev/data/llama3.1/weights/8b/fp16/llama3.1_8b_fp16_instruct.irpa --llama3-8b-tokenizer-path=/shark-dev/data/llama3.1/weights/8b/fp16/tokenizer_config.json --html=out/llm/llama/perplexity/iree_perplexity/index.html --log-cli-level=INFO
           ls -lha ${{ github.workspace }}/perplexity_ci_artifacts
 
 
@@ -84,7 +84,7 @@ jobs:
     strategy:
       matrix:
         version: [3.11]
-        runs-on: [llama-mi300x-3]
+        runs-on: [linux-mi300-1gpu-ossci]
       fail-fast: false
     runs-on: ${{matrix.runs-on}}
     defaults:
@@ -121,7 +121,7 @@ jobs:
       - name: Run perplexity test with Torch
         run: |
           source ${VENV_DIR}/bin/activate
-          pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_torch_test.py --longrun --llama3-8b-f16-model-path=/data/llama3.1/weights/8b/fp16/llama3.1_8b_fp16_instruct.irpa --llama3-8b-tokenizer-path=/data/llama3.1/weights/8b/fp16/tokenizer_config.json --html=out/llm/llama/perplexity/torch_perplexity/index.html
+          pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_torch_test.py --longrun --llama3-8b-f16-model-path=/shark-dev/data/llama3.1/weights/8b/fp16/llama3.1_8b_fp16_instruct.irpa --llama3-8b-tokenizer-path=/shark-dev/data/llama3.1/weights/8b/fp16/tokenizer_config.json --html=out/llm/llama/perplexity/torch_perplexity/index.html
 
       - name: Deploy to GitHub Pages
         uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0

diff --git a/.github/workflows/ci_eval_short.yaml b/.github/workflows/ci_eval_short.yaml
@@ -27,7 +27,7 @@ jobs:
     strategy:
       matrix:
         version: [3.11]
-        runs-on: [llama-mi300x-3]
+        runs-on: [linux-mi300-1gpu-ossci]
       fail-fast: false
     runs-on: ${{matrix.runs-on}}
     defaults:
@@ -64,5 +64,5 @@ jobs:
       - name: Run perplexity test with vmfb
         run: |
           source ${VENV_DIR}/bin/activate
-          pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_iree_test.py --bs=5 --iree-device=hip://0 --iree-hip-target=gfx942 --iree-hal-target-device=hip --llama3-8b-f16-model-path=/data/llama3.1/weights/8b/fp16/llama3.1_8b_fp16_instruct.irpa --llama3-8b-tokenizer-path=/data/llama3.1/weights/8b/fp16/tokenizer_config.json --log-cli-level=INFO
+          pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_iree_test.py --bs=5 --iree-device=hip://0 --iree-hip-target=gfx942 --iree-hal-target-device=hip --llama3-8b-f16-model-path=/shark-dev/data/llama3.1/weights/8b/fp16/llama3.1_8b_fp16_instruct.irpa --llama3-8b-tokenizer-path=/shark-dev/data/llama3.1/weights/8b/fp16/tokenizer_config.json --log-cli-level=INFO
           ls -lha ${{ github.workspace }}/perplexity_ci_artifacts
diff --git a/.github/workflows/pkgci_shark_ai.yml b/.github/workflows/pkgci_shark_ai.yml
@@ -32,7 +32,7 @@ jobs:
             test_device: cpu
             python-version: 3.11
           - name: amdgpu_rocm_mi300_gfx942
-            runs-on: linux-mi300-gpu-1
+            runs-on: linux-mi300-1gpu-ossci
             test_device: gfx942
             python-version: 3.11
           # temporarily disable mi250 because the cluster is unsable & slow