From be0efc090c533c765282f9514938882131e0d8e2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Wed, 6 Nov 2024 09:02:10 +0100
Subject: [PATCH] [CI] remove unused inductor workflows
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These tests have completely offloaded torch inductor tests to Meta  a
few months ago. They are currently disabled on GitHub.

Signed-off-by: Sébastien Han <seb@redhat.com>
---
 .github/workflows/torch-inductor-tests.yml    | 45 -----------
 .../torch-inductor/scripts/check_acc.py       | 11 ---
 .../torch-inductor/scripts/check_perf.py      | 70 ------------------
 .../torch-inductor/scripts/common.sh          |  9 ---
 .../scripts/install_torchinductor.sh          | 74 -------------------
 .../torch-inductor/scripts/install_triton.sh  | 25 -------
 .../scripts/run_torchinductor_acc.sh          | 55 --------------
 .../scripts/run_torchinductor_perf.sh         | 71 ------------------
 8 files changed, 360 deletions(-)
 delete mode 100644 .github/workflows/torch-inductor-tests.yml
 delete mode 100644 .github/workflows/torch-inductor/scripts/check_acc.py
 delete mode 100644 .github/workflows/torch-inductor/scripts/check_perf.py
 delete mode 100755 .github/workflows/torch-inductor/scripts/common.sh
 delete mode 100755 .github/workflows/torch-inductor/scripts/install_torchinductor.sh
 delete mode 100755 .github/workflows/torch-inductor/scripts/install_triton.sh
 delete mode 100755 .github/workflows/torch-inductor/scripts/run_torchinductor_acc.sh
 delete mode 100755 .github/workflows/torch-inductor/scripts/run_torchinductor_perf.sh

diff --git a/.github/workflows/torch-inductor-tests.yml b/.github/workflows/torch-inductor-tests.yml
deleted file mode 100644
index 3d8f98095291..000000000000
--- a/.github/workflows/torch-inductor-tests.yml
+++ /dev/null
@@ -1,45 +0,0 @@
-name: Torchinductor
-
-on:
-  workflow_run:
-    workflows: ["Wheels"]
-    types: [completed]
-  workflow_dispatch:
-
-permissions: read-all
-
-jobs:
-  Runner-Preparation:
-    runs-on: ubuntu-latest
-    outputs:
-      matrix: ${{ steps.set-matrix.outputs.matrix }}
-    steps:
-      - name: Prepare runner matrix
-        id: set-matrix
-        run: |
-          echo '::set-output name=matrix::[["self-hosted", "A100"]]'
-
-  Torch-Inductor-Tests:
-    needs: Runner-Preparation
-    timeout-minutes: 240  # 4 hours
-    runs-on: ${{ matrix.runner }}
-    strategy:
-      matrix:
-        runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix)}}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-      - name: Packages
-        run: |
-          ./.github/workflows/torch-inductor/scripts/install_torchinductor.sh torchbench
-      - name: Environment
-        run: |
-          source /tmp/torchinductor_venv/bin/activate
-          ./.github/workflows/torch-inductor/scripts/install_triton.sh
-      - name: Performance
-        run: |
-          ./.github/workflows/torch-inductor/scripts/run_torchinductor_perf.sh torchbench
-      # Runs too long time
-      #- name: Accuracy
-      #  run: |
-      #    ./.github/workflows/torch-inductor/scripts/run_torchinductor_acc.sh torchbench
diff --git a/.github/workflows/torch-inductor/scripts/check_acc.py b/.github/workflows/torch-inductor/scripts/check_acc.py
deleted file mode 100644
index c89976acab11..000000000000
--- a/.github/workflows/torch-inductor/scripts/check_acc.py
+++ /dev/null
@@ -1,11 +0,0 @@
-import csv
-import sys
-
-file_path = sys.argv[1]
-with open(file_path) as f:
-    reader = csv.reader(f)
-    for i, row in enumerate(reader):
-        if i == 0:
-            continue
-        if row[3] != "pass":
-            print(f"{row[1]} failed on device {row[0]} with batch size {row[2]}")
diff --git a/.github/workflows/torch-inductor/scripts/check_perf.py b/.github/workflows/torch-inductor/scripts/check_perf.py
deleted file mode 100644
index 212eadad55ae..000000000000
--- a/.github/workflows/torch-inductor/scripts/check_perf.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import argparse
-import csv
-from collections import namedtuple
-
-# Create a named tuple for the output of the benchmark
-BenchmarkOutput = namedtuple('BenchmarkOutput', ['dev', 'name', 'batch_size', 'speedup', 'latency'])
-
-
-def parse_output(file_path: str) -> dict:
-    entries = {}
-    with open(file_path) as f:
-        reader = csv.reader(f)
-        for i, row in enumerate(reader):
-            if i == 0 or len(row) < 5:
-                continue
-            dev = row[0]
-            name = row[1]
-            batch_size = row[2]
-            speedup = float(row[3])
-            latency = float(row[4])
-            entries[name] = BenchmarkOutput(dev, name, batch_size, speedup, latency)
-    return entries
-
-
-def compare(baseline: dict, new: dict, threshold: float, geomean_threshold: float) -> bool:
-    baseline_geomean = 1.0
-    new_geomean = 1.0
-    for key in new:
-        if key not in baseline:
-            print(f"New benchmark {key} not found in baseline")
-        baseline_latency = baseline[key].latency
-        new_latency = new[key].latency
-        if baseline_latency == 0:
-            print(f"Baseline latency for {key} is 0")
-            continue
-        elif new_latency == 0:
-            print(f"New latency for {key} is 0")
-            continue
-
-        if new_latency < baseline_latency * (1 - threshold):
-            print(f"New benchmark {key} is faster than baseline: {new_latency} vs {baseline_latency}")
-        elif new_latency > baseline_latency * (1 + threshold):
-            print(f"New benchmark {key} is slower than baseline: {new_latency} vs {baseline_latency}")
-        else:
-            print(f"New benchmark {key} is within threshold: {new_latency} vs {baseline_latency}")
-        baseline_geomean *= baseline[key].speedup
-        new_geomean *= new[key].speedup
-
-    baseline_geomean = baseline_geomean**(1 / len(baseline))
-    new_geomean = new_geomean**(1 / len(new))
-    print(f"Baseline geomean: {baseline_geomean}")
-    print(f"New geomean: {new_geomean}")
-    assert new_geomean >= baseline_geomean * (1 - geomean_threshold), \
-        f"New geomean is slower than baseline: {new_geomean} vs {baseline_geomean}"
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--baseline', required=True)
-    parser.add_argument('--new', required=True)
-    parser.add_argument('--threshold', type=float, default=0.1)
-    parser.add_argument('--geomean-threshold', type=float, default=0.02)
-    args = parser.parse_args()
-    baseline = parse_output(args.baseline)
-    new = parse_output(args.new)
-    compare(baseline, new, args.threshold, args.geomean_threshold)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/.github/workflows/torch-inductor/scripts/common.sh b/.github/workflows/torch-inductor/scripts/common.sh
deleted file mode 100755
index 7e212a06a1ba..000000000000
--- a/.github/workflows/torch-inductor/scripts/common.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-
-TEST_REPORTS_DIR=/tmp/torchinductor_reports
-PYTORCH_DIR=/tmp/pytorch
-MODELS=(timm_models huggingface torchbench)
-
-echo "$TEST_REPORTS_DIR"
-echo "$PYTORCH_DIR"
-echo "${MODELS[@]}"
diff --git a/.github/workflows/torch-inductor/scripts/install_torchinductor.sh b/.github/workflows/torch-inductor/scripts/install_torchinductor.sh
deleted file mode 100755
index 18bea1f1716f..000000000000
--- a/.github/workflows/torch-inductor/scripts/install_torchinductor.sh
+++ /dev/null
@@ -1,74 +0,0 @@
-#!/bin/bash
-
-# remember where we started
-ROOT="$(pwd)"
-MODEL_SPEC=$1
-
-# torchinductor venv
-whoami
-
-sudo apt-get update && sudo apt-get install -y python3-venv libgl1
-
-# clean up old venv
-rm -rf /tmp/torchinductor_venv
-python3 -m venv /tmp/torchinductor_venv
-# shellcheck source=/dev/null
-source /tmp/torchinductor_venv/bin/activate
-# shellcheck source=/dev/null
-source ./.github/workflows/torch-inductor/scripts/common.sh
-
-pip3 install --upgrade pip wheel setuptools
-
-# Install torchtext stable first. Bundling it in the same install as torch
-# nightly forces torch stable release to be installed instead.
-# From https://github.com/pytorch/text?tab=readme-ov-file#torchtext,
-# "WARNING: TorchText development is stopped and the 0.18 release (April 2024)
-# will be the last stable release of the library."
-pip3 install --force-reinstall torchtext
-
-# pytorch nightly
-pip3 install --force-reinstall --pre torch torchvision torchaudio torchrec --extra-index-url https://download.pytorch.org/whl/nightly/cu121
-# pytorch source to get torchbench for dynamo
-cd /tmp || exit
-# cleanup old pytorch
-rm -rf pytorch
-git clone --recursive https://github.com/pytorch/pytorch
-cd pytorch || exit
-# if you are updating an existing checkout
-git submodule sync
-git submodule update --init --recursive
-cd ..
-
-# required packages
-# https://github.com/pytorch/benchmark/blob/main/docker/gcp-a100-runner-dind.dockerfile#L17
-sudo apt-get install --yes libpango-1.0-0 libpangoft2-1.0-0
-pip3 install expecttest psutil lightning-utilities pyre_extensions
-
-# torchbench
-if [ "$MODEL_SPEC" == "torchbench" ] || [ "$MODEL_SPEC" != "all" ]; then
-	# clean up old torchbench
-	rm -rf benchmark
-	pip3 install pyyaml
-	git clone https://github.com/pytorch/benchmark.git
-	cd benchmark || exit
-	python3 install.py
-	cd ..
-fi
-
-# timm
-if [ "$MODEL_SPEC" == "timm_models" ] || [ "$MODEL_SPEC" != "all" ]; then
-	# clean up old timm
-	rm -rf pytorch-image-models
-	git clone https://github.com/huggingface/pytorch-image-models.git
-	cd pytorch-image-models || exit
-	pip3 install -e .
-	cd ..
-fi
-
-# clean up cache
-rm -rf /tmp/torchinductor_"$(whoami)"/
-rm -rf ~/.triton/cache
-rm -rf "$TEST_REPORTS_DIR"
-
-# go back to where we started
-cd "$ROOT" || exit
diff --git a/.github/workflows/torch-inductor/scripts/install_triton.sh b/.github/workflows/torch-inductor/scripts/install_triton.sh
deleted file mode 100755
index 43367a02f527..000000000000
--- a/.github/workflows/torch-inductor/scripts/install_triton.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-
-# remember where we started
-ROOT="$(pwd)"
-
-# shellcheck source=/dev/null
-source /tmp/torchinductor_venv/bin/activate
-# shellcheck source=/dev/null
-source ./.github/workflows/torch-inductor/scripts/common.sh
-
-# Triton build-time dependencies
-pip3 install --upgrade cmake ninja lit
-
-# build our own triton and preserve the wheel build for later re-use in this test run.
-cd python || exit
-pip3 uninstall pytorch-triton -y
-rm -rf build dist
-python3 setup.py bdist_wheel
-pip3 install dist/triton*.whl
-
-# clean up cache
-rm -rf ~/.triton/cache
-
-# go back to where we started
-cd "$ROOT" || exit
diff --git a/.github/workflows/torch-inductor/scripts/run_torchinductor_acc.sh b/.github/workflows/torch-inductor/scripts/run_torchinductor_acc.sh
deleted file mode 100755
index aefd798f39ff..000000000000
--- a/.github/workflows/torch-inductor/scripts/run_torchinductor_acc.sh
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/bin/bash
-
-# remember where we started
-ROOT="$(pwd)"
-INDUCTOR="$ROOT"/.github/workflows/torch-inductor
-MODEL_SPEC=$1
-
-# shellcheck source=/dev/null
-source /tmp/torchinductor_venv/bin/activate
-# shellcheck source=/dev/null
-source "$INDUCTOR"/scripts/common.sh
-
-# Dependency of 'torch/fx/experimental/validator.py'.
-pip3 install --upgrade z3-solver
-
-# Install our own triton.
-pip3 uninstall pytorch-triton -y
-cd $ROOT/python || exit
-if [ -d "./dist" ]; then
-  pip3 install dist/triton*.whl
-else
-  rm -rf build
-  pip3 install -e .
-fi
-
-cd "$PYTORCH_DIR" || exit
-TEST_REPORTS_DIR=$TEST_REPORTS_DIR/acc
-mkdir -p "$TEST_REPORTS_DIR"
-
-for model in "${MODELS[@]}"; do
-  if [ "$model" != "$MODEL_SPEC" ] && [ "$MODEL_SPEC" != "all" ]; then
-    continue
-  fi
-  echo "Running accuracy test for $model"
-  python3 benchmarks/dynamo/"$model".py --ci --accuracy --timing --explain --inductor --inference --device cuda \
-    --output "$TEST_REPORTS_DIR"/inference_"$model".csv
-  python3 benchmarks/dynamo/"$model".py --ci --accuracy --timing --explain --inductor --training --amp --device cuda \
-    --output "$TEST_REPORTS_DIR"/training_"$model".csv
-  python3 benchmarks/dynamo/"$model".py --ci --accuracy --timing --explain --inductor --training --dynamic-shapes --device cuda \
-    --output "$TEST_REPORTS_DIR"/dynamic_shapes_"$model".csv
-done
-
-cd "$ROOT" || exit
-for model in "${MODELS[@]}"; do
-  if [ "$model" != "$MODEL_SPEC" ] && [ "$MODEL_SPEC" != "all" ]; then
-    continue
-  fi
-  echo "Checking accuracy test for $model"
-  python3 "$INDUCTOR"/scripts/check_acc.py "$TEST_REPORTS_DIR"/inference_"$model".csv
-  python3 "$INDUCTOR"/scripts/check_acc.py "$TEST_REPORTS_DIR"/training_"$model".csv
-  python3 "$INDUCTOR"/scripts/check_acc.py "$TEST_REPORTS_DIR"/dynamic_shapes_"$model".csv
-done
-
-# go back to where we started
-cd "$ROOT" || exit
diff --git a/.github/workflows/torch-inductor/scripts/run_torchinductor_perf.sh b/.github/workflows/torch-inductor/scripts/run_torchinductor_perf.sh
deleted file mode 100755
index 35853d97c8fe..000000000000
--- a/.github/workflows/torch-inductor/scripts/run_torchinductor_perf.sh
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/bin/bash
-
-# remember where we started
-ROOT="$(pwd)"
-INDUCTOR="$ROOT"/.github/workflows/torch-inductor
-MODEL_SPEC=$1
-
-# shellcheck source=/dev/null
-source /tmp/torchinductor_venv/bin/activate
-# shellcheck source=/dev/null
-source "$INDUCTOR"/scripts/common.sh
-
-# lock GPU clocks to 1350 MHz
-sudo nvidia-smi -i 0 -pm 1
-sudo nvidia-smi -i 0 --lock-gpu-clocks=1350,1350
-
-cd "$PYTORCH_DIR" || exit
-TRITON_TEST_REPORTS_DIR=$TEST_REPORTS_DIR/perf
-BASE_TEST_REPORTS_DIR=$TEST_REPORTS_DIR/acc
-mkdir -p "$TRITON_TEST_REPORTS_DIR"
-mkdir -p "$BASE_TEST_REPORTS_DIR"
-
-# Dependency of 'pytorch/benchmarks/dynamo/common.py'.
-pip3 install pandas scipy
-
-echo "Running with Triton Nightly"
-for model in "${MODELS[@]}"; do
-  if [ "$model" != "$MODEL_SPEC" ] && [ "$MODEL_SPEC" != "all" ]; then
-    continue
-  fi
-  echo "Running performance test for $model"
-  python3 benchmarks/dynamo/"$model".py --ci --float32 --training --inductor --performance --device cuda \
-    --output "$TRITON_TEST_REPORTS_DIR"/"$model".csv
-done
-
-# install pytorch-triton
-pip3 uninstall triton -y
-pip3 install --pre pytorch-triton --extra-index-url https://download.pytorch.org/whl/nightly/cu121
-
-echo "Running with pytorch-triton"
-for model in "${MODELS[@]}"; do
-  if [ "$model" != "$MODEL_SPEC" ] && [ "$MODEL_SPEC" != "all" ]; then
-    continue
-  fi
-  echo "Running performance test for $model"
-  python3 benchmarks/dynamo/"$model".py --ci --float32 --training --inductor --performance --device cuda \
-    --output "$BASE_TEST_REPORTS_DIR"/"$model".csv
-done
-
-# uninstall pytorch-triton
-pip3 uninstall pytorch-triton -y
-
-cd "$ROOT" || exit
-for model in "${MODELS[@]}"; do
-  if [ "$model" != "$MODEL_SPEC" ] && [ "$MODEL_SPEC" != "all" ]; then
-    continue
-  fi
-  echo "Checking performance test for $model"
-  python3 "$INDUCTOR"/scripts/check_perf.py --new "$TRITON_TEST_REPORTS_DIR"/"$model".csv --baseline "$BASE_TEST_REPORTS_DIR"/"$model".csv
-  EXIT_STATUS=$?
-  if [ "$EXIT_STATUS" -ne 0 ]; then
-    echo "Performance test for $model failed"
-    exit "$EXIT_STATUS"
-  fi
-done
-
-# unlock GPU clocks
-sudo nvidia-smi -i 0 -rgc
-
-# go back to where we started
-cd "$ROOT" || exit