-
Notifications
You must be signed in to change notification settings - Fork 28.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' into SPARK-31709
- Loading branch information
Showing
1,716 changed files
with
48,360 additions
and
20,582 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,148 +9,265 @@ on: | |
- master | ||
|
||
jobs: | ||
# TODO(SPARK-32248): Recover JDK 11 builds | ||
# Build: build Spark and run the tests for specified modules. | ||
build: | ||
|
||
name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})" | ||
runs-on: ubuntu-latest | ||
strategy: | ||
fail-fast: false | ||
matrix: | ||
java: [ '1.8', '11' ] | ||
hadoop: [ 'hadoop-2.7', 'hadoop-3.2' ] | ||
hive: [ 'hive-1.2', 'hive-2.3' ] | ||
exclude: | ||
- java: '11' | ||
hive: 'hive-1.2' | ||
- hadoop: 'hadoop-3.2' | ||
hive: 'hive-1.2' | ||
name: Build Spark - JDK${{ matrix.java }}/${{ matrix.hadoop }}/${{ matrix.hive }} | ||
|
||
java: | ||
- 1.8 | ||
hadoop: | ||
- hadoop3.2 | ||
hive: | ||
- hive2.3 | ||
# TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now. | ||
# Kinesis tests depends on external Amazon kinesis service. | ||
# Note that the modules below are from sparktestsupport/modules.py. | ||
modules: | ||
- |- | ||
core, unsafe, kvstore, avro, | ||
network-common, network-shuffle, repl, launcher, | ||
examples, sketch, graphx | ||
- |- | ||
catalyst, hive-thriftserver | ||
- |- | ||
streaming, sql-kafka-0-10, streaming-kafka-0-10, | ||
mllib-local, mllib, | ||
yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl | ||
- |- | ||
pyspark-sql, pyspark-mllib, pyspark-resource | ||
- |- | ||
pyspark-core, pyspark-streaming, pyspark-ml | ||
- |- | ||
sparkr | ||
# Here, we split Hive and SQL tests into some of slow ones and the rest of them. | ||
included-tags: [""] | ||
excluded-tags: [""] | ||
comment: [""] | ||
include: | ||
# Hive tests | ||
- modules: hive | ||
java: 1.8 | ||
hadoop: hadoop3.2 | ||
hive: hive2.3 | ||
included-tags: org.apache.spark.tags.SlowHiveTest | ||
comment: "- slow tests" | ||
- modules: hive | ||
java: 1.8 | ||
hadoop: hadoop3.2 | ||
hive: hive2.3 | ||
excluded-tags: org.apache.spark.tags.SlowHiveTest | ||
comment: "- other tests" | ||
# SQL tests | ||
- modules: sql | ||
java: 1.8 | ||
hadoop: hadoop3.2 | ||
hive: hive2.3 | ||
included-tags: org.apache.spark.tags.ExtendedSQLTest | ||
comment: "- slow tests" | ||
- modules: sql | ||
java: 1.8 | ||
hadoop: hadoop3.2 | ||
hive: hive2.3 | ||
excluded-tags: org.apache.spark.tags.ExtendedSQLTest | ||
comment: "- other tests" | ||
env: | ||
MODULES_TO_TEST: ${{ matrix.modules }} | ||
EXCLUDED_TAGS: ${{ matrix.excluded-tags }} | ||
INCLUDED_TAGS: ${{ matrix.included-tags }} | ||
HADOOP_PROFILE: ${{ matrix.hadoop }} | ||
HIVE_PROFILE: ${{ matrix.hive }} | ||
# GitHub Actions' default miniconda to use in pip packaging test. | ||
CONDA_PREFIX: /usr/share/miniconda | ||
GITHUB_PREV_SHA: ${{ github.event.before }} | ||
steps: | ||
- uses: actions/checkout@master | ||
# We split caches because GitHub Action Cache has a 400MB-size limit. | ||
- uses: actions/cache@v1 | ||
- name: Checkout Spark repository | ||
uses: actions/checkout@v2 | ||
# In order to fetch changed files | ||
with: | ||
fetch-depth: 0 | ||
# Cache local repositories. Note that GitHub Actions cache has a 2G limit. | ||
- name: Cache Scala, SBT, Maven and Zinc | ||
uses: actions/cache@v1 | ||
with: | ||
path: build | ||
key: build-${{ hashFiles('**/pom.xml') }} | ||
restore-keys: | | ||
build- | ||
- uses: actions/cache@v1 | ||
with: | ||
path: ~/.m2/repository/com | ||
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-com-${{ hashFiles('**/pom.xml') }} | ||
restore-keys: | | ||
${{ matrix.java }}-${{ matrix.hadoop }}-maven-com- | ||
- uses: actions/cache@v1 | ||
with: | ||
path: ~/.m2/repository/org | ||
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-org-${{ hashFiles('**/pom.xml') }} | ||
restore-keys: | | ||
${{ matrix.java }}-${{ matrix.hadoop }}-maven-org- | ||
- uses: actions/cache@v1 | ||
- name: Cache Maven local repository | ||
uses: actions/cache@v2 | ||
with: | ||
path: ~/.m2/repository/net | ||
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-net-${{ hashFiles('**/pom.xml') }} | ||
path: ~/.m2/repository | ||
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ hashFiles('**/pom.xml') }} | ||
restore-keys: | | ||
${{ matrix.java }}-${{ matrix.hadoop }}-maven-net- | ||
- uses: actions/cache@v1 | ||
${{ matrix.java }}-${{ matrix.hadoop }}-maven- | ||
- name: Cache Ivy local repository | ||
uses: actions/cache@v2 | ||
with: | ||
path: ~/.m2/repository/io | ||
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-io-${{ hashFiles('**/pom.xml') }} | ||
path: ~/.ivy2/cache | ||
key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{ hashFiles('**/pom.xml') }}-${{ hashFiles('**/plugins.sbt') }} | ||
restore-keys: | | ||
${{ matrix.java }}-${{ matrix.hadoop }}-maven-io- | ||
- name: Set up JDK ${{ matrix.java }} | ||
${{ matrix.java }}-${{ matrix.hadoop }}-ivy- | ||
- name: Install JDK ${{ matrix.java }} | ||
uses: actions/setup-java@v1 | ||
with: | ||
java-version: ${{ matrix.java }} | ||
- name: Build with Maven | ||
# PySpark | ||
- name: Install PyPy3 | ||
# Note that order of Python installations here matters because default python3 is | ||
# overridden by pypy3. | ||
uses: actions/setup-python@v2 | ||
if: contains(matrix.modules, 'pyspark') | ||
with: | ||
python-version: pypy3 | ||
architecture: x64 | ||
- name: Install Python 3.6 | ||
uses: actions/setup-python@v2 | ||
if: contains(matrix.modules, 'pyspark') | ||
with: | ||
python-version: 3.6 | ||
architecture: x64 | ||
- name: Install Python 3.8 | ||
uses: actions/setup-python@v2 | ||
# We should install one Python that is higher then 3+ for SQL and Yarn because: | ||
# - SQL component also has Python related tests, for example, IntegratedUDFTestUtils. | ||
# - Yarn has a Python specific test too, for example, YarnClusterSuite. | ||
if: contains(matrix.modules, 'yarn') || contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) | ||
with: | ||
python-version: 3.8 | ||
architecture: x64 | ||
- name: Install Python packages (Python 3.6 and PyPy3) | ||
if: contains(matrix.modules, 'pyspark') | ||
# PyArrow is not supported in PyPy yet, see ARROW-2651. | ||
# TODO(SPARK-32247): scipy installation with PyPy fails for an unknown reason. | ||
run: | | ||
export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN" | ||
export MAVEN_CLI_OPTS="--no-transfer-progress" | ||
python3.6 -m pip install numpy pyarrow pandas scipy | ||
python3.6 -m pip list | ||
pypy3 -m pip install numpy pandas | ||
pypy3 -m pip list | ||
- name: Install Python packages (Python 3.8) | ||
if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) | ||
run: | | ||
python3.8 -m pip install numpy pyarrow pandas scipy | ||
python3.8 -m pip list | ||
# SparkR | ||
- name: Install R 4.0 | ||
if: contains(matrix.modules, 'sparkr') | ||
run: | | ||
sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' >> /etc/apt/sources.list" | ||
curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9" | sudo apt-key add | ||
sudo apt-get update | ||
sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev | ||
- name: Install R packages | ||
if: contains(matrix.modules, 'sparkr') | ||
run: | | ||
# qpdf is required to reduce the size of PDFs to make CRAN check pass. See SPARK-32497. | ||
sudo apt-get install -y libcurl4-openssl-dev qpdf | ||
sudo Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow', 'roxygen2'), repos='https://cloud.r-project.org/')" | ||
# Show installed packages in R. | ||
sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]' | ||
# Run the tests. | ||
- name: "Run tests: ${{ matrix.modules }}" | ||
run: | | ||
# Hive tests become flaky when running in parallel as it's too intensive. | ||
if [[ "$MODULES_TO_TEST" == "hive" ]]; then export SERIAL_SBT_TESTS=1; fi | ||
mkdir -p ~/.m2 | ||
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -P${{ matrix.hive }} -Phive-thriftserver -P${{ matrix.hadoop }} -Phadoop-cloud -Djava.version=${{ matrix.java }} install | ||
./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS" | ||
rm -rf ~/.m2/repository/org/apache/spark | ||
# Static analysis, and documentation build | ||
lint: | ||
name: Linters, licenses, dependencies and documentation generation | ||
runs-on: ubuntu-latest | ||
name: Linters (Java/Scala/Python), licenses, dependencies | ||
steps: | ||
- uses: actions/checkout@master | ||
- uses: actions/setup-java@v1 | ||
with: | ||
java-version: '11' | ||
- uses: actions/setup-python@v1 | ||
- name: Checkout Spark repository | ||
uses: actions/checkout@v2 | ||
- name: Cache Maven local repository | ||
uses: actions/cache@v2 | ||
with: | ||
python-version: '3.x' | ||
architecture: 'x64' | ||
- name: Scala | ||
run: ./dev/lint-scala | ||
- name: Java | ||
run: ./dev/lint-java | ||
- name: Python | ||
run: | | ||
pip install flake8 sphinx numpy | ||
./dev/lint-python | ||
- name: License | ||
run: ./dev/check-license | ||
- name: Dependencies | ||
run: ./dev/test-dependencies.sh | ||
|
||
lintr: | ||
runs-on: ubuntu-latest | ||
name: Linter (R) | ||
steps: | ||
- uses: actions/checkout@master | ||
- uses: actions/setup-java@v1 | ||
path: ~/.m2/repository | ||
key: docs-maven-repo-${{ hashFiles('**/pom.xml') }} | ||
restore-keys: | | ||
docs-maven- | ||
- name: Install JDK 1.8 | ||
uses: actions/setup-java@v1 | ||
with: | ||
java-version: '11' | ||
- uses: r-lib/actions/setup-r@v1 | ||
java-version: 1.8 | ||
- name: Install Python 3.6 | ||
uses: actions/setup-python@v2 | ||
with: | ||
r-version: '3.6.2' | ||
- name: Install lib | ||
python-version: 3.6 | ||
architecture: x64 | ||
- name: Install Python linter dependencies | ||
run: | | ||
sudo apt-get install -y libcurl4-openssl-dev | ||
- name: install R packages | ||
# TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes. | ||
# See also https://github.com/sphinx-doc/sphinx/issues/7551. | ||
pip3 install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme | ||
- name: Install R 4.0 | ||
run: | | ||
sudo Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')" | ||
sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' >> /etc/apt/sources.list" | ||
curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9" | sudo apt-key add | ||
sudo apt-get update | ||
sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev | ||
- name: Install R linter dependencies and SparkR | ||
run: | | ||
sudo apt-get install -y libcurl4-openssl-dev | ||
sudo Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')" | ||
sudo Rscript -e "devtools::install_github('jimhester/[email protected]')" | ||
- name: package and install SparkR | ||
run: ./R/install-dev.sh | ||
- name: lint-r | ||
./R/install-dev.sh | ||
- name: Install Ruby 2.7 for documentation generation | ||
uses: actions/setup-ruby@v1 | ||
with: | ||
ruby-version: 2.7 | ||
- name: Install dependencies for documentation generation | ||
run: | | ||
sudo apt-get install -y libcurl4-openssl-dev pandoc | ||
# TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes. | ||
# See also https://github.com/sphinx-doc/sphinx/issues/7551. | ||
pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme | ||
gem install jekyll jekyll-redirect-from rouge | ||
sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')" | ||
- name: Scala linter | ||
run: ./dev/lint-scala | ||
- name: Java linter | ||
run: ./dev/lint-java | ||
- name: Python linter | ||
run: ./dev/lint-python | ||
- name: R linter | ||
run: ./dev/lint-r | ||
- name: License test | ||
run: ./dev/check-license | ||
- name: Dependencies test | ||
run: ./dev/test-dependencies.sh | ||
- name: Run documentation build | ||
run: | | ||
cd docs | ||
jekyll build | ||
docs: | ||
java11: | ||
name: Java 11 build | ||
runs-on: ubuntu-latest | ||
name: Generate documents | ||
steps: | ||
- uses: actions/checkout@master | ||
- uses: actions/cache@v1 | ||
- name: Checkout Spark repository | ||
uses: actions/checkout@v2 | ||
- name: Cache Maven local repository | ||
uses: actions/cache@v2 | ||
with: | ||
path: ~/.m2/repository | ||
key: docs-maven-repo-${{ hashFiles('**/pom.xml') }} | ||
key: java11-maven-${{ hashFiles('**/pom.xml') }} | ||
restore-keys: | | ||
docs-maven-repo- | ||
- uses: actions/setup-java@v1 | ||
with: | ||
java-version: '1.8' | ||
- uses: actions/setup-python@v1 | ||
with: | ||
python-version: '3.x' | ||
architecture: 'x64' | ||
- uses: actions/setup-ruby@v1 | ||
with: | ||
ruby-version: '2.7' | ||
- uses: r-lib/actions/setup-r@v1 | ||
java11-maven- | ||
- name: Install Java 11 | ||
uses: actions/setup-java@v1 | ||
with: | ||
r-version: '3.6.2' | ||
- name: Install lib and pandoc | ||
run: | | ||
sudo apt-get install -y libcurl4-openssl-dev pandoc | ||
- name: Install packages | ||
run: | | ||
pip install sphinx mkdocs numpy | ||
gem install jekyll jekyll-redirect-from rouge | ||
sudo Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')" | ||
- name: Run jekyll build | ||
java-version: 11 | ||
- name: Build with Maven | ||
run: | | ||
cd docs | ||
jekyll build | ||
export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN" | ||
export MAVEN_CLI_OPTS="--no-transfer-progress" | ||
mkdir -p ~/.m2 | ||
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 install | ||
rm -rf ~/.m2/repository/org/apache/spark |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.