From 277151183fa4f6278f2dcd31253d6fba8a3880c4 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Mon, 3 Aug 2020 14:25:47 +0900 Subject: [PATCH 1/5] Publish failed and succeeded test reports in GitHub Actions --- .github/workflows/master.yml | 11 +++++++++-- .../unsafe/types/UTF8StringPropertyCheckSuite.scala | 6 +++--- python/pyspark/sql/tests/test_arrow.py | 2 +- .../scala/org/apache/spark/sql/DataFrameSuite.scala | 6 +++--- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 009ebe90ddf51..e99194990318d 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -144,14 +144,15 @@ jobs: # PyArrow is not supported in PyPy yet, see ARROW-2651. # TODO(SPARK-32247): scipy installation with PyPy fails for an unknown reason. run: | - python3.6 -m pip install numpy pyarrow pandas scipy + python3.6 -m pip install numpy pyarrow pandas scipy xmlrunner python3.6 -m pip list + # PyPy does not have xmlrunner pypy3 -m pip install numpy pandas pypy3 -m pip list - name: Install Python packages (Python 3.8) if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) run: | - python3.8 -m pip install numpy pyarrow pandas scipy + python3.8 -m pip install numpy pyarrow pandas scipy xmlrunner python3.8 -m pip list # SparkR - name: Install R 4.0 @@ -177,6 +178,12 @@ jobs: mkdir -p ~/.m2 ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS" rm -rf ~/.m2/repository/org/apache/spark + - name: "Publish test report: ${{ matrix.modules }}" + if: always() + uses: scacap/action-surefire-report@v1 + with: + github_token: ${{ secrets.TEST_REPORT_GITHUB_TOKEN }} + report_paths: "**/target/test-reports/*.xml" # Static analysis, and documentation build lint: diff --git a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala index 69a082053aa65..51a54eb07db9f 100644 --- a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala +++ b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala @@ -34,13 +34,13 @@ class UTF8StringPropertyCheckSuite extends AnyFunSuite with ScalaCheckDrivenProp test("toString") { forAll { (s: String) => - assert(toUTF8(s).toString() === s) + assert(toUTF8(s).toString() !== s) } } test("numChars") { forAll { (s: String) => - assert(toUTF8(s).numChars() === s.length) + assert(toUTF8(s).numChars() !== s.length) } } @@ -80,7 +80,7 @@ class UTF8StringPropertyCheckSuite extends AnyFunSuite with ScalaCheckDrivenProp test("compare") { forAll { (s1: String, s2: String) => - assert(Math.signum(toUTF8(s1).compareTo(toUTF8(s2))) === Math.signum(s1.compareTo(s2))) + assert(Math.signum(toUTF8(s1).compareTo(toUTF8(s2))) !== Math.signum(s1.compareTo(s2))) } } diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py index 6859084237b89..31287a24265df 100644 --- a/python/pyspark/sql/tests/test_arrow.py +++ b/python/pyspark/sql/tests/test_arrow.py @@ -201,7 +201,7 @@ def test_no_partition_frame(self): pdf = df.toPandas() self.assertEqual(len(pdf.columns), 1) self.assertEqual(pdf.columns[0], "field1") - self.assertTrue(pdf.empty) + self.assertTrue(False) def test_propagates_spark_exception(self): df = self.spark.range(3).toDF("i") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index bcfc77545bbd2..856af275ce534 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -83,8 +83,8 @@ class DataFrameSuite extends QueryTest } test("access complex data") { - assert(complexData.filter(complexData("a").getItem(0) === 2).count() == 1) - assert(complexData.filter(complexData("m").getItem("1") === 1).count() == 1) + assert(complexData.filter(complexData("a").getItem(0) === 2).count() == 2) + assert(complexData.filter(complexData("m").getItem("1") === 1).count() == 2) assert(complexData.filter(complexData("s").getField("key") === 1).count() == 1) } @@ -96,7 +96,7 @@ class DataFrameSuite extends QueryTest test("empty data frame") { assert(spark.emptyDataFrame.columns.toSeq === Seq.empty[String]) - assert(spark.emptyDataFrame.count() === 0) + assert(spark.emptyDataFrame.count() === 1) } test("head, take and tail") { From fa43663546fc13e1792c1e1177c4d421c2066163 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 5 Aug 2020 11:18:31 +0900 Subject: [PATCH 2/5] Switch back to GITHUB_TOKEN --- .github/workflows/master.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index e99194990318d..5aa887c15f034 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -182,7 +182,7 @@ jobs: if: always() uses: scacap/action-surefire-report@v1 with: - github_token: ${{ secrets.TEST_REPORT_GITHUB_TOKEN }} + github_token: ${{ secrets.GITHUB_TOKEN }} report_paths: "**/target/test-reports/*.xml" # Static analysis, and documentation build From dd2084982194fbe8a0f3809389a7704b52ec6a01 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 12 Aug 2020 15:25:26 +0900 Subject: [PATCH 3/5] Test uploading Junit test report artifact --- .github/workflows/master.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 5aa887c15f034..799b10b18c323 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -3,10 +3,10 @@ name: master on: push: branches: - - master + - test pull_request: branches: - - master + - test jobs: # TODO(SPARK-32248): Recover JDK 11 builds @@ -178,12 +178,12 @@ jobs: mkdir -p ~/.m2 ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS" rm -rf ~/.m2/repository/org/apache/spark - - name: "Publish test report: ${{ matrix.modules }}" + - name: "Archive test report: ${{ matrix.modules }}" if: always() - uses: scacap/action-surefire-report@v1 + uses: actions/upload-artifact@v2 with: - github_token: ${{ secrets.GITHUB_TOKEN }} - report_paths: "**/target/test-reports/*.xml" + name: junit-xml-reports-${{ github.sha }}-${{ matrix.modules }} + path: "**/target/test-reports/*.xml" # Static analysis, and documentation build lint: From ac047dc6ae275a53ee9a2290c92c428f45c11956 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 12 Aug 2020 15:48:39 +0900 Subject: [PATCH 4/5] Remove other tests to save resources --- .github/workflows/master.yml | 128 +---------------------------------- 1 file changed, 1 insertion(+), 127 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 799b10b18c323..37db98f6b2295 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -28,46 +28,13 @@ jobs: # Note that the modules below are from sparktestsupport/modules.py. modules: - |- - core, unsafe, kvstore, avro, - network-common, network-shuffle, repl, launcher, - examples, sketch, graphx - - |- - catalyst, hive-thriftserver - - |- - streaming, sql-kafka-0-10, streaming-kafka-0-10, - mllib-local, mllib, - yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl - - |- - pyspark-sql, pyspark-mllib, pyspark-resource - - |- - pyspark-core, pyspark-streaming, pyspark-ml - - |- - sparkr + pyspark-sql # Here, we split Hive and SQL tests into some of slow ones and the rest of them. included-tags: [""] excluded-tags: [""] comment: [""] include: - # Hive tests - - modules: hive - java: 1.8 - hadoop: hadoop3.2 - hive: hive2.3 - included-tags: org.apache.spark.tags.SlowHiveTest - comment: "- slow tests" - - modules: hive - java: 1.8 - hadoop: hadoop3.2 - hive: hive2.3 - excluded-tags: org.apache.spark.tags.SlowHiveTest - comment: "- other tests" # SQL tests - - modules: sql - java: 1.8 - hadoop: hadoop3.2 - hive: hive2.3 - included-tags: org.apache.spark.tags.ExtendedSQLTest - comment: "- slow tests" - modules: sql java: 1.8 hadoop: hadoop3.2 @@ -185,96 +152,3 @@ jobs: name: junit-xml-reports-${{ github.sha }}-${{ matrix.modules }} path: "**/target/test-reports/*.xml" - # Static analysis, and documentation build - lint: - name: Linters, licenses, dependencies and documentation generation - runs-on: ubuntu-latest - steps: - - name: Checkout Spark repository - uses: actions/checkout@v2 - - name: Cache Maven local repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: docs-maven-repo-${{ hashFiles('**/pom.xml') }} - restore-keys: | - docs-maven- - - name: Install JDK 1.8 - uses: actions/setup-java@v1 - with: - java-version: 1.8 - - name: Install Python 3.6 - uses: actions/setup-python@v2 - with: - python-version: 3.6 - architecture: x64 - - name: Install Python linter dependencies - run: | - # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes. - # See also https://github.com/sphinx-doc/sphinx/issues/7551. - pip3 install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme - - name: Install R 4.0 - run: | - sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' >> /etc/apt/sources.list" - curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9" | sudo apt-key add - sudo apt-get update - sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev - - name: Install R linter dependencies and SparkR - run: | - sudo apt-get install -y libcurl4-openssl-dev - sudo Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')" - sudo Rscript -e "devtools::install_github('jimhester/lintr@v2.0.0')" - ./R/install-dev.sh - - name: Install Ruby 2.7 for documentation generation - uses: actions/setup-ruby@v1 - with: - ruby-version: 2.7 - - name: Install dependencies for documentation generation - run: | - sudo apt-get install -y libcurl4-openssl-dev pandoc - # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes. - # See also https://github.com/sphinx-doc/sphinx/issues/7551. - pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme - gem install jekyll jekyll-redirect-from rouge - sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')" - - name: Scala linter - run: ./dev/lint-scala - - name: Java linter - run: ./dev/lint-java - - name: Python linter - run: ./dev/lint-python - - name: R linter - run: ./dev/lint-r - - name: License test - run: ./dev/check-license - - name: Dependencies test - run: ./dev/test-dependencies.sh - - name: Run documentation build - run: | - cd docs - jekyll build - - java11: - name: Java 11 build - runs-on: ubuntu-latest - steps: - - name: Checkout Spark repository - uses: actions/checkout@v2 - - name: Cache Maven local repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: java11-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: | - java11-maven- - - name: Install Java 11 - uses: actions/setup-java@v1 - with: - java-version: 11 - - name: Build with Maven - run: | - export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN" - export MAVEN_CLI_OPTS="--no-transfer-progress" - mkdir -p ~/.m2 - ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 install - rm -rf ~/.m2/repository/org/apache/spark From 74f8b897ab7f470f2ef26e5a031f4f31725e297d Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 12 Aug 2020 17:12:51 +0900 Subject: [PATCH 5/5] test --- .github/workflows/master.yml | 6 ++-- .github/workflows/test_report.yml | 53 +++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/test_report.yml diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 37db98f6b2295..809ce0133c57b 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -27,7 +27,7 @@ jobs: # Kinesis tests depends on external Amazon kinesis service. # Note that the modules below are from sparktestsupport/modules.py. modules: - - |- + - >- pyspark-sql # Here, we split Hive and SQL tests into some of slow ones and the rest of them. included-tags: [""] @@ -138,14 +138,14 @@ jobs: # Show installed packages in R. sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]' # Run the tests. - - name: "Run tests: ${{ matrix.modules }}" + - name: Run tests run: | # Hive tests become flaky when running in parallel as it's too intensive. if [[ "$MODULES_TO_TEST" == "hive" ]]; then export SERIAL_SBT_TESTS=1; fi mkdir -p ~/.m2 ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS" rm -rf ~/.m2/repository/org/apache/spark - - name: "Archive test report: ${{ matrix.modules }}" + - name: Archive test report if: always() uses: actions/upload-artifact@v2 with: diff --git a/.github/workflows/test_report.yml b/.github/workflows/test_report.yml new file mode 100644 index 0000000000000..13e291d3ca0e5 --- /dev/null +++ b/.github/workflows/test_report.yml @@ -0,0 +1,53 @@ +name: "Report JUnit test results" +on: + workflow_run: + workflows: ["master"] + types: + - completed + +jobs: + # TODO(SPARK-32248): Recover JDK 11 builds + # Build: build Spark and run the tests for specified modules. + test_report: + name: "Test report: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})" + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + java: + - 1.8 + hadoop: + - hadoop3.2 + hive: + - hive2.3 + # TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now. + # Kinesis tests depends on external Amazon kinesis service. + # Note that the modules below are from sparktestsupport/modules.py. + modules: + - >- + pyspark-sql + # Here, we split Hive and SQL tests into some of slow ones and the rest of them. + included-tags: [""] + excluded-tags: [""] + comment: [""] + include: + # SQL tests + - modules: sql + java: 1.8 + hadoop: hadoop3.2 + hive: hive2.3 + excluded-tags: org.apache.spark.tags.ExtendedSQLTest + comment: "- other tests" + steps: + - uses: haya14busa/action-workflow_run-status@v1 + - uses: actions/checkout@v2 + - uses: actions/download-artifact@v2 + with: + name: junit-xml-reports-${{ github.sha }}-${{ matrix.modules }} + path: junit-xml-reports + - name: "Publish test report: ${{ matrix.modules }}" + uses: scacap/action-surefire-report@v1 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + report_paths: "junit-xml-reports/*.xml" +