Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GitHub Actions test #0 #17

Merged
merged 5 commits into from
Aug 12, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 13 additions & 132 deletions .github/workflows/master.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ name: master
on:
push:
branches:
- master
- test
pull_request:
branches:
- master
- test

jobs:
# TODO(SPARK-32248): Recover JDK 11 builds
Expand All @@ -27,47 +27,14 @@ jobs:
# Kinesis tests depends on external Amazon kinesis service.
# Note that the modules below are from sparktestsupport/modules.py.
modules:
- |-
core, unsafe, kvstore, avro,
network-common, network-shuffle, repl, launcher,
examples, sketch, graphx
- |-
catalyst, hive-thriftserver
- |-
streaming, sql-kafka-0-10, streaming-kafka-0-10,
mllib-local, mllib,
yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl
- |-
pyspark-sql, pyspark-mllib, pyspark-resource
- |-
pyspark-core, pyspark-streaming, pyspark-ml
- |-
sparkr
- >-
pyspark-sql
# Here, we split Hive and SQL tests into some of slow ones and the rest of them.
included-tags: [""]
excluded-tags: [""]
comment: [""]
include:
# Hive tests
- modules: hive
java: 1.8
hadoop: hadoop3.2
hive: hive2.3
included-tags: org.apache.spark.tags.SlowHiveTest
comment: "- slow tests"
- modules: hive
java: 1.8
hadoop: hadoop3.2
hive: hive2.3
excluded-tags: org.apache.spark.tags.SlowHiveTest
comment: "- other tests"
# SQL tests
- modules: sql
java: 1.8
hadoop: hadoop3.2
hive: hive2.3
included-tags: org.apache.spark.tags.ExtendedSQLTest
comment: "- slow tests"
- modules: sql
java: 1.8
hadoop: hadoop3.2
Expand Down Expand Up @@ -144,14 +111,15 @@ jobs:
# PyArrow is not supported in PyPy yet, see ARROW-2651.
# TODO(SPARK-32247): scipy installation with PyPy fails for an unknown reason.
run: |
python3.6 -m pip install numpy pyarrow pandas scipy
python3.6 -m pip install numpy pyarrow pandas scipy xmlrunner
python3.6 -m pip list
# PyPy does not have xmlrunner
pypy3 -m pip install numpy pandas
pypy3 -m pip list
- name: Install Python packages (Python 3.8)
if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
run: |
python3.8 -m pip install numpy pyarrow pandas scipy
python3.8 -m pip install numpy pyarrow pandas scipy xmlrunner
python3.8 -m pip list
# SparkR
- name: Install R 4.0
Expand All @@ -170,104 +138,17 @@ jobs:
# Show installed packages in R.
sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]'
# Run the tests.
- name: "Run tests: ${{ matrix.modules }}"
- name: Run tests
run: |
# Hive tests become flaky when running in parallel as it's too intensive.
if [[ "$MODULES_TO_TEST" == "hive" ]]; then export SERIAL_SBT_TESTS=1; fi
mkdir -p ~/.m2
./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
rm -rf ~/.m2/repository/org/apache/spark

# Static analysis, and documentation build
lint:
name: Linters, licenses, dependencies and documentation generation
runs-on: ubuntu-latest
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
- name: Cache Maven local repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: docs-maven-repo-${{ hashFiles('**/pom.xml') }}
restore-keys: |
docs-maven-
- name: Install JDK 1.8
uses: actions/setup-java@v1
with:
java-version: 1.8
- name: Install Python 3.6
uses: actions/setup-python@v2
with:
python-version: 3.6
architecture: x64
- name: Install Python linter dependencies
run: |
# TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
# See also https://github.com/sphinx-doc/sphinx/issues/7551.
pip3 install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme
- name: Install R 4.0
run: |
sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' >> /etc/apt/sources.list"
curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9" | sudo apt-key add
sudo apt-get update
sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev
- name: Install R linter dependencies and SparkR
run: |
sudo apt-get install -y libcurl4-openssl-dev
sudo Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')"
sudo Rscript -e "devtools::install_github('jimhester/[email protected]')"
./R/install-dev.sh
- name: Install Ruby 2.7 for documentation generation
uses: actions/setup-ruby@v1
- name: Archive test report
if: always()
uses: actions/upload-artifact@v2
with:
ruby-version: 2.7
- name: Install dependencies for documentation generation
run: |
sudo apt-get install -y libcurl4-openssl-dev pandoc
# TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
# See also https://github.com/sphinx-doc/sphinx/issues/7551.
pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme
gem install jekyll jekyll-redirect-from rouge
sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"
- name: Scala linter
run: ./dev/lint-scala
- name: Java linter
run: ./dev/lint-java
- name: Python linter
run: ./dev/lint-python
- name: R linter
run: ./dev/lint-r
- name: License test
run: ./dev/check-license
- name: Dependencies test
run: ./dev/test-dependencies.sh
- name: Run documentation build
run: |
cd docs
jekyll build
name: junit-xml-reports-${{ github.sha }}-${{ matrix.modules }}
path: "**/target/test-reports/*.xml"

java11:
name: Java 11 build
runs-on: ubuntu-latest
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
- name: Cache Maven local repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: java11-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
java11-maven-
- name: Install Java 11
uses: actions/setup-java@v1
with:
java-version: 11
- name: Build with Maven
run: |
export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
export MAVEN_CLI_OPTS="--no-transfer-progress"
mkdir -p ~/.m2
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 install
rm -rf ~/.m2/repository/org/apache/spark
53 changes: 53 additions & 0 deletions .github/workflows/test_report.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: "Report JUnit test results"
on:
workflow_run:
workflows: ["master"]
types:
- completed

jobs:
# TODO(SPARK-32248): Recover JDK 11 builds
# Build: build Spark and run the tests for specified modules.
test_report:
name: "Test report: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
java:
- 1.8
hadoop:
- hadoop3.2
hive:
- hive2.3
# TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now.
# Kinesis tests depends on external Amazon kinesis service.
# Note that the modules below are from sparktestsupport/modules.py.
modules:
- >-
pyspark-sql
# Here, we split Hive and SQL tests into some of slow ones and the rest of them.
included-tags: [""]
excluded-tags: [""]
comment: [""]
include:
# SQL tests
- modules: sql
java: 1.8
hadoop: hadoop3.2
hive: hive2.3
excluded-tags: org.apache.spark.tags.ExtendedSQLTest
comment: "- other tests"
steps:
- uses: haya14busa/action-workflow_run-status@v1
- uses: actions/checkout@v2
- uses: actions/download-artifact@v2
with:
name: junit-xml-reports-${{ github.sha }}-${{ matrix.modules }}
path: junit-xml-reports
- name: "Publish test report: ${{ matrix.modules }}"
uses: scacap/action-surefire-report@v1
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
report_paths: "junit-xml-reports/*.xml"

Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ class UTF8StringPropertyCheckSuite extends AnyFunSuite with ScalaCheckDrivenProp

test("toString") {
forAll { (s: String) =>
assert(toUTF8(s).toString() === s)
assert(toUTF8(s).toString() !== s)
}
}

test("numChars") {
forAll { (s: String) =>
assert(toUTF8(s).numChars() === s.length)
assert(toUTF8(s).numChars() !== s.length)
}
}

Expand Down Expand Up @@ -80,7 +80,7 @@ class UTF8StringPropertyCheckSuite extends AnyFunSuite with ScalaCheckDrivenProp

test("compare") {
forAll { (s1: String, s2: String) =>
assert(Math.signum(toUTF8(s1).compareTo(toUTF8(s2))) === Math.signum(s1.compareTo(s2)))
assert(Math.signum(toUTF8(s1).compareTo(toUTF8(s2))) !== Math.signum(s1.compareTo(s2)))
}
}

Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/tests/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def test_no_partition_frame(self):
pdf = df.toPandas()
self.assertEqual(len(pdf.columns), 1)
self.assertEqual(pdf.columns[0], "field1")
self.assertTrue(pdf.empty)
self.assertTrue(False)

def test_propagates_spark_exception(self):
df = self.spark.range(3).toDF("i")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ class DataFrameSuite extends QueryTest
}

test("access complex data") {
assert(complexData.filter(complexData("a").getItem(0) === 2).count() == 1)
assert(complexData.filter(complexData("m").getItem("1") === 1).count() == 1)
assert(complexData.filter(complexData("a").getItem(0) === 2).count() == 2)
assert(complexData.filter(complexData("m").getItem("1") === 1).count() == 2)
assert(complexData.filter(complexData("s").getField("key") === 1).count() == 1)
}

Expand All @@ -96,7 +96,7 @@ class DataFrameSuite extends QueryTest

test("empty data frame") {
assert(spark.emptyDataFrame.columns.toSeq === Seq.empty[String])
assert(spark.emptyDataFrame.count() === 0)
assert(spark.emptyDataFrame.count() === 1)
}

test("head, take and tail") {
Expand Down