From a278c5eac87a0cbc577f002575e8c931bf3b8f52 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Fri, 31 Jul 2020 00:57:24 +0900 Subject: [PATCH] [SPARK-32497][INFRA] Installs qpdf package for CRAN check in GitHub Actions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? CRAN check fails due to the size of the generated PDF docs as below: ``` ... WARNING ‘qpdf’ is needed for checks on size reduction of PDFs ... Status: 1 WARNING, 1 NOTE See ‘/home/runner/work/spark/spark/R/SparkR.Rcheck/00check.log’ for details. ``` This PR proposes to install `qpdf` in GitHub Actions. Note that I cannot reproduce in my local with the same R version so I am not documenting it for now. Also, while I am here, I piggyback to install SparkR when the module includes `sparkr`. it is rather a followup of SPARK-32491. ### Why are the changes needed? To fix SparkR CRAN check failure. ### Does this PR introduce _any_ user-facing change? No, dev-only. ### How was this patch tested? GitHub Actions will test it out. Closes #29306 from HyukjinKwon/SPARK-32497. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- .github/workflows/master.yml | 3 ++- dev/run-tests.py | 16 +++++++++------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 45d641cf5af58..17bcac7ebd38d 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -166,7 +166,8 @@ jobs: - name: Install R packages if: contains(matrix.modules, 'sparkr') run: | - sudo apt-get install -y libcurl4-openssl-dev + # qpdf is required to reduce the size of PDFs to make CRAN check pass. See SPARK-32497. + sudo apt-get install -y libcurl4-openssl-dev qpdf sudo Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow', 'roxygen2'), repos='https://cloud.r-project.org/')" # Show installed packages in R. sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]' diff --git a/dev/run-tests.py b/dev/run-tests.py index a7a1c1caa88b4..976854dad8214 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -609,8 +609,14 @@ def main(): # Install SparkR should_only_test_modules = opts.modules is not None - if not should_only_test_modules: + test_modules = [] + if should_only_test_modules: + str_test_modules = [m.strip() for m in opts.modules.split(",")] + test_modules = [m for m in modules.all_modules if m.name in str_test_modules] + + if not should_only_test_modules or modules.sparkr in test_modules: # If tests modules are specified, we will not run R linter. + # SparkR needs the manual SparkR installation. if which("R"): run_cmd([os.path.join(SPARK_HOME, "R", "install-dev.sh")]) else: @@ -641,15 +647,11 @@ def main(): "and Hive profile", hive_version, "under environment", test_env) extra_profiles = get_hadoop_profiles(hadoop_version) + get_hive_profiles(hive_version) - changed_modules = None - test_modules = None - changed_files = None + changed_modules = [] + changed_files = [] included_tags = [] excluded_tags = [] if should_only_test_modules: - str_test_modules = [m.strip() for m in opts.modules.split(",")] - test_modules = [m for m in modules.all_modules if m.name in str_test_modules] - # If we're running the tests in Github Actions, attempt to detect and test # only the affected modules. if test_env == "github_actions":