Skip to content

Commit

Permalink
[VL] Re-enable some failed CI jobs
Browse files Browse the repository at this point in the history
  • Loading branch information
zhztheplayer authored Apr 7, 2024
1 parent 7ebfa23 commit 2378d1f
Showing 1 changed file with 137 additions and 87 deletions.
224 changes: 137 additions & 87 deletions .github/workflows/velox_docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -193,87 +193,137 @@ jobs:
--local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
--extra-conf=spark.gluten.sql.ras.enabled=true
# run-tpc-test-centos8-oom-randomkill:
# needs: build-native-lib
# strategy:
# fail-fast: false
# matrix:
# spark: ["spark-3.2"]
# runs-on: ubuntu-20.04
# container: centos:8
# steps:
# - uses: actions/checkout@v2
# - name: Download All Artifacts
# uses: actions/download-artifact@v2
# with:
# name: velox-native-lib-${{github.sha}}
# path: ./cpp/build/releases
# - name: Update mirror list
# run: |
# sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* || true
# sed -i -e "s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" /etc/yum.repos.d/CentOS-* || true
# - name: Setup java and maven
# run: |
# yum update -y && yum install -y java-1.8.0-openjdk-devel wget
# wget https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
# tar -xvf apache-maven-3.8.8-bin.tar.gz
# mv apache-maven-3.8.8 /usr/lib/maven
# - name: Build for Spark ${{ matrix.spark }}
# run: |
# cd $GITHUB_WORKSPACE/
# export MAVEN_HOME=/usr/lib/maven
# export PATH=${PATH}:${MAVEN_HOME}/bin
# mvn -ntp clean install -P${{ matrix.spark }} -Pbackends-velox -DskipTests
# - name: TPC-DS SF30.0 Parquet local spark3.2 Q67/Q95 low memory, memory isolation off
# run: |
# export MAVEN_HOME=/usr/lib/maven
# export PATH=${PATH}:${MAVEN_HOME}/bin
# cd tools/gluten-it && \
# mvn -ntp clean install -Pspark-3.2 \
# && GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh parameterized \
# --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q67,q95 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \
# --skip-data-gen -m=OffHeapExecutionMemory \
# -d=ISOLATION:OFF,spark.gluten.memory.isolation=false \
# -d=OFFHEAP_SIZE:5g,spark.memory.offHeap.size=5g \
# -d=OFFHEAP_SIZE:3g,spark.memory.offHeap.size=3g \
# -d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \
# -d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5
# - name: (To be fixed) TPC-DS SF30.0 Parquet local spark3.2 Q67/Q95 low memory, memory isolation on
# run: |
# export MAVEN_HOME=/usr/lib/maven
# export PATH=${PATH}:${MAVEN_HOME}/bin
# cd tools/gluten-it && \
# mvn -ntp clean install -Pspark-3.2 \
# && GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh parameterized \
# --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q67,q95 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \
# --skip-data-gen -m=OffHeapExecutionMemory \
# -d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \
# -d=OFFHEAP_SIZE:5g,spark.memory.offHeap.size=5g \
# -d=OFFHEAP_SIZE:3g,spark.memory.offHeap.size=3g \
# -d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \
# -d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5 || true
# - name: TPC-DS SF30.0 Parquet local spark3.2 Q23A/Q23B low memory
# run: |
# cd tools/gluten-it && \
# GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh parameterized \
# --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q23a,q23b -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \
# --skip-data-gen -m=OffHeapExecutionMemory \
# -d=ISOLATION:OFF,spark.gluten.memory.isolation=false \
# -d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \
# -d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \
# -d=FLUSH_MODE:DISABLED,spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \
# -d=FLUSH_MODE:ABANDONED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \
# -d=FLUSH_MODE:FLUSHED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=0.05,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=0.1,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0
# - name: (To be fixed) TPC-DS SF30.0 Parquet local spark3.2 Q97 low memory # The case currently causes crash with "free: invalid size".
# run: |
# cd tools/gluten-it && \
# GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh parameterized \
# --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q97 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \
# --skip-data-gen -m=OffHeapExecutionMemory \
# -d=ISOLATION:OFF,spark.gluten.memory.isolation=false \
# -d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \
# -d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \
# -d=OFFHEAP_SIZE:1g,spark.memory.offHeap.size=1g || true
run-tpc-test-ubuntu-oom:
needs: build-native-lib
strategy:
fail-fast: false
matrix:
spark: [ "spark-3.2" ]
runs-on: ubuntu-20.04
steps:
- name: Maximize build disk space
shell: bash
run: |
df -h
set -euo pipefail
echo "Removing unwanted software... "
sudo rm -rf /usr/share/dotnet
sudo rm -rf /usr/local/lib/android
sudo rm -rf /opt/ghc
sudo rm -rf /opt/hostedtoolcache/CodeQL
sudo docker image prune --all --force > /dev/null
df -h
- uses: actions/checkout@v2
- name: Download All Artifacts
uses: actions/download-artifact@v2
with:
name: velox-native-lib-${{github.sha}}
path: ./cpp/build/releases
- name: Setup java and maven
run: |
sudo apt-get update
sudo apt-get install -y openjdk-8-jdk maven
- name: Set environment variables
run: |
echo "JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" >> $GITHUB_ENV
- name: Build for Spark ${{ matrix.spark }}
run: |
cd $GITHUB_WORKSPACE/
mvn -ntp clean install -P${{ matrix.spark }} -Pbackends-velox -DskipTests
cd $GITHUB_WORKSPACE/tools/gluten-it
mvn -ntp clean install -P${{ matrix.spark }}
GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh data-gen-only --local --benchmark-type=ds -s=30.0 --threads=12
- name: TPC-DS SF30.0 Parquet local spark3.2 Q67/Q95 low memory, memory isolation off
run: |
cd tools/gluten-it \
&& GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh parameterized \
--local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q67,q95 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \
--skip-data-gen -m=OffHeapExecutionMemory \
-d=ISOLATION:OFF,spark.gluten.memory.isolation=false \
-d=OFFHEAP_SIZE:6g,spark.memory.offHeap.size=6g \
-d=OFFHEAP_SIZE:4g,spark.memory.offHeap.size=4g \
-d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \
-d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5
- name: (To be fixed) TPC-DS SF30.0 Parquet local spark3.2 Q67/Q95 low memory, memory isolation on
run: |
cd tools/gluten-it \
&& GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh parameterized \
--local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q67,q95 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \
--skip-data-gen -m=OffHeapExecutionMemory \
-d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \
-d=OFFHEAP_SIZE:6g,spark.memory.offHeap.size=6g \
-d=OFFHEAP_SIZE:4g,spark.memory.offHeap.size=4g \
-d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \
-d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5 || true
- name: TPC-DS SF30.0 Parquet local spark3.2 Q23A/Q23B low memory
run: |
cd tools/gluten-it \
&& GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh parameterized \
--local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q23a,q23b -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \
--skip-data-gen -m=OffHeapExecutionMemory \
-d=ISOLATION:OFF,spark.gluten.memory.isolation=false \
-d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \
-d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \
-d=FLUSH_MODE:DISABLED,spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \
-d=FLUSH_MODE:ABANDONED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \
-d=FLUSH_MODE:FLUSHED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=0.05,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=0.1,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0
- name: (To be fixed) TPC-DS SF30.0 Parquet local spark3.2 Q97 low memory # The case currently causes crash with "free: invalid size".
run: |
cd tools/gluten-it \
&& GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh parameterized \
--local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q97 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \
--skip-data-gen -m=OffHeapExecutionMemory \
-d=ISOLATION:OFF,spark.gluten.memory.isolation=false \
-d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \
-d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \
-d=OFFHEAP_SIZE:1g,spark.memory.offHeap.size=1g || true
run-tpc-test-ubuntu-randomkill:
needs: build-native-lib
strategy:
fail-fast: false
matrix:
spark: [ "spark-3.2" ]
runs-on: ubuntu-20.04
steps:
- name: Maximize build disk space
shell: bash
run: |
df -h
set -euo pipefail
echo "Removing unwanted software... "
sudo rm -rf /usr/share/dotnet
sudo rm -rf /usr/local/lib/android
sudo rm -rf /opt/ghc
sudo rm -rf /opt/hostedtoolcache/CodeQL
sudo docker image prune --all --force > /dev/null
df -h
- uses: actions/checkout@v2
- name: Download All Artifacts
uses: actions/download-artifact@v2
with:
name: velox-native-lib-${{github.sha}}
path: ./cpp/build/releases
- name: Setup java and maven
run: |
sudo apt-get update
sudo apt-get install -y openjdk-8-jdk maven
- name: Set environment variables
run: |
echo "JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" >> $GITHUB_ENV
- name: Build for Spark ${{ matrix.spark }}
run: |
cd $GITHUB_WORKSPACE/
mvn -ntp clean install -P${{ matrix.spark }} -Pbackends-velox -DskipTests
cd $GITHUB_WORKSPACE/tools/gluten-it
mvn -ntp clean install -P${{ matrix.spark }}
GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh data-gen-only --local --benchmark-type=ds -s=30.0 --threads=12
- name: TPC-DS SF30.0 Parquet local spark3.2 random kill tasks
run: |
cd tools/gluten-it \
&& GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries \
--local --preset=velox --benchmark-type=ds --error-on-memleak -s=30.0 --off-heap-size=8g --threads=12 --shuffle-partitions=72 --iterations=1 \
--skip-data-gen --random-kill-tasks
run-tpc-test-ubuntu-2204-celeborn:
needs: build-native-lib
Expand Down Expand Up @@ -357,7 +407,7 @@ jobs:
wget https://github.com/apache/spark/archive/refs/tags/v3.2.2.tar.gz && \
tar --strip-components=1 -xf v3.2.2.tar.gz spark-3.2.2/sql/core/src/test/resources/ && \
mkdir -p shims/spark32/spark_home/ && \
mv sql shims/spark32/spark_home/
mv sql shims/spark32/spark_home/
- name: Build and run unit test for Spark 3.2.2 (other tests)
run: |
cd $GITHUB_WORKSPACE/
Expand Down Expand Up @@ -406,7 +456,7 @@ jobs:
wget https://github.com/apache/spark/archive/refs/tags/v3.2.2.tar.gz && \
tar --strip-components=1 -xf v3.2.2.tar.gz spark-3.2.2/sql/core/src/test/resources/ && \
mkdir -p shims/spark32/spark_home/ && \
mv sql shims/spark32/spark_home/
mv sql shims/spark32/spark_home/
- name: Build and run unit test for Spark 3.2.2 (slow tests)
run: |
cd $GITHUB_WORKSPACE/ && \
Expand Down Expand Up @@ -448,7 +498,7 @@ jobs:
wget https://github.com/apache/spark/archive/refs/tags/v3.3.1.tar.gz && \
tar --strip-components=1 -xf v3.3.1.tar.gz spark-3.3.1/sql/core/src/test/resources/ && \
mkdir -p shims/spark33/spark_home/ && \
mv sql shims/spark33/spark_home/
mv sql shims/spark33/spark_home/
- name: Build and Run unit test for Spark 3.3.1 (other tests)
run: |
cd $GITHUB_WORKSPACE/ && \
Expand Down Expand Up @@ -493,7 +543,7 @@ jobs:
wget https://github.com/apache/spark/archive/refs/tags/v3.3.1.tar.gz && \
tar --strip-components=1 -xf v3.3.1.tar.gz spark-3.3.1/sql/core/src/test/resources/ && \
mkdir -p shims/spark33/spark_home/ && \
mv sql shims/spark33/spark_home/
mv sql shims/spark33/spark_home/
- name: Build and Run unit test for Spark 3.3.1 (slow tests)
run: |
cd $GITHUB_WORKSPACE/ && \
Expand Down Expand Up @@ -535,7 +585,7 @@ jobs:
wget https://github.com/apache/spark/archive/refs/tags/v3.4.2.tar.gz && \
tar --strip-components=1 -xf v3.4.2.tar.gz spark-3.4.2/sql/core/src/test/resources/ && \
mkdir -p shims/spark34/spark_home/ && \
mv sql shims/spark34/spark_home/
mv sql shims/spark34/spark_home/
- name: Build and Run unit test for Spark 3.4.2 (other tests)
run: |
cd $GITHUB_WORKSPACE/ && \
Expand Down Expand Up @@ -580,7 +630,7 @@ jobs:
wget https://github.com/apache/spark/archive/refs/tags/v3.4.2.tar.gz && \
tar --strip-components=1 -xf v3.4.2.tar.gz spark-3.4.2/sql/core/src/test/resources/ && \
mkdir -p shims/spark34/spark_home/ && \
mv sql shims/spark34/spark_home/
mv sql shims/spark34/spark_home/
- name: Build and Run unit test for Spark 3.4.2 (slow tests)
run: |
cd $GITHUB_WORKSPACE/
Expand Down

0 comments on commit 2378d1f

Please sign in to comment.