From ee16a16ee9f991770693e5adfd43a121c4dfac9f Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Thu, 12 Oct 2023 05:21:09 +0200 Subject: [PATCH 01/20] use logging function --- r/tools/nixlibs.R | 73 +++++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 31 deletions(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 3d651ec6ac9ea..9b1a21c7a8412 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -30,7 +30,19 @@ if (test_mode && is.na(VERSION)) { dev_version <- package_version(VERSION)[1, 4] is_release <- is.na(dev_version) || dev_version < "100" on_macos <- tolower(Sys.info()[["sysname"]]) == "darwin" -checksum_path <- Sys.getenv("ARROW_R_CHECKSUM_PATH", "tools/checksums") +env_is <- function(var, value) identical(tolower(Sys.getenv(var)), value) +# For local debugging, set ARROW_R_DEV=TRUE to make this script print more +quietly <- !env_is("ARROW_R_DEV", "true") + +# Log messages in the style of the configure script +lg <- function(..., .indent = "***") { + cat(.indent, " ", sprintf(...), "\n", sep = "") +} +# Exit the script after logging with .status=1 instead of throwing an error +exit <- function(..., .status = 1) { + lg(...) + q(save = "no", status = .status) +} # Small dev versions are added for R-only changes during CRAN submission. if (is_release) { @@ -43,7 +55,6 @@ if (is_release) { options(.arrow.cleanup = character()) # To collect dirs to rm on exit on.exit(unlink(getOption(".arrow.cleanup"))) -env_is <- function(var, value) identical(tolower(Sys.getenv(var)), value) try_download <- function(from_url, to_file, hush = quietly) { status <- try( @@ -94,14 +105,14 @@ download_binary <- function(lib) { binary_url <- paste0(arrow_repo, "bin/", lib, "/arrow-", VERSION, ".zip") if (try_download(binary_url, libfile)) { if (!quietly) { - cat(sprintf("*** Successfully retrieved C++ binaries (%s)\n", lib)) + lg("Successfully retrieved C++ binaries (%s)", lib) } } else { if (!quietly) { - cat(sprintf( - "*** Downloading libarrow binary failed for version %s (%s)\n at %s\n", + lg( + "Downloading libarrow binary failed for version %s (%s)\n at %s", VERSION, lib, binary_url - )) + ) } libfile <- NULL } @@ -209,13 +220,13 @@ select_binary <- function(os = tolower(Sys.info()[["sysname"]]), ifelse(is.null(openssl_version), NULL, paste0(os, arch, openssl_version)) }, error = function(e) { - cat("*** Unable to find libcurl and openssl\n") + lg("Unable to find libcurl and openssl") NULL } ) } else { # No binary available for arch - cat(sprintf("*** Building on %s %s\n", os, arch)) + lg("Building on %s %s", os, arch) binary <- NULL } return(binary) @@ -284,32 +295,32 @@ determine_binary_from_stderr <- function(errs) { if (is.null(attr(errs, "status"))) { # There was no error in compiling: so we found libcurl and OpenSSL >= 1.1, # openssl is < 3.0 - cat("*** Found libcurl and OpenSSL >= 1.1\n") + lg("Found libcurl and OpenSSL >= 1.1") return("openssl-1.1") # Else, check for dealbreakers: } else if (!on_macos && any(grepl("Using libc++", errs, fixed = TRUE))) { # Our linux binaries are all built with GNU stdlib so they fail with libc++ - cat("*** Linux binaries incompatible with libc++\n") + lg("Linux binaries incompatible with libc++") return(NULL) } else if (header_not_found("curl/curl", errs)) { - cat("*** libcurl not found\n") + lg("libcurl not found") return(NULL) } else if (header_not_found("openssl/opensslv", errs)) { - cat("*** OpenSSL not found\n") + lg("OpenSSL not found") return(NULL) } else if (any(grepl("OpenSSL version too old", errs))) { - cat("*** OpenSSL found but version >= 1.0.2 is required for some features\n") + lg("OpenSSL found but version >= 1.0.2 is required for some features") return(NULL) # Else, determine which other binary will work } else if (any(grepl("Using OpenSSL version 1.0", errs))) { if (on_macos) { - cat("*** OpenSSL 1.0 is not supported on macOS\n") + lg("OpenSSL 1.0 is not supported on macOS") return(NULL) } - cat("*** Found libcurl and OpenSSL < 1.1\n") + lg("Found libcurl and OpenSSL < 1.1") return("openssl-1.0") } else if (any(grepl("Using OpenSSL version 3", errs))) { - cat("*** Found libcurl and OpenSSL >= 3.0.0\n") + lg("Found libcurl and OpenSSL >= 3.0.0") return("openssl-3.0") } NULL @@ -438,7 +449,7 @@ find_local_source <- function() { ) for (cpp_dir in cpp_dir_options) { if (file.exists(file.path(cpp_dir, "src/arrow/api.h"))) { - cat(paste0("*** Found local C++ source: '", cpp_dir, "'\n")) + lg("Found local C++ source: '%s'", cpp_dir) return(cpp_dir) } } @@ -459,7 +470,7 @@ env_vars_as_string <- function(env_var_list) { if (nchar(env_var_string) > 30000) { # This could happen if the full paths in *_SOURCE_URL were *very* long. # A more formal check would look at getconf ARG_MAX, but this shouldn't matter - cat("*** Warning: Environment variables are very long. This could cause issues on some shells.\n") + lg("Warning: Environment variables are very long. This could cause issues on some shells.") } env_var_string } @@ -484,7 +495,7 @@ build_libarrow <- function(src_dir, dst_dir) { Sys.setenv(MAKEFLAGS = makeflags) } if (!quietly) { - cat("*** Building with MAKEFLAGS=", makeflags, "\n") + lg("Building with MAKEFLAGS=", makeflags) } # Check for libarrow build dependencies: # * cmake @@ -523,7 +534,7 @@ build_libarrow <- function(src_dir, dst_dir) { dep_source <- Sys.getenv("ARROW_DEPENDENCY_SOURCE") if (dep_source %in% c("", "AUTO") && !nzchar(Sys.which("pkg-config"))) { - cat("**** pkg-config not installed, setting ARROW_DEPENDENCY_SOURCE=BUNDLED\n") + lg("pkg-config not installed, setting ARROW_DEPENDENCY_SOURCE=BUNDLED", .indent = "****") env_var_list <- c(env_var_list, ARROW_DEPENDENCY_SOURCE = "BUNDLED") } @@ -552,7 +563,7 @@ build_libarrow <- function(src_dir, dst_dir) { } env_vars <- env_vars_as_string(env_var_list) - cat("**** arrow", ifelse(quietly, "", paste("with", env_vars)), "\n") + lg("arrow %s", ifelse(quietly, "", paste("with", env_vars)), .indent = "****") build_log_path <- tempfile(fileext = ".log") status <- suppressWarnings(system2( @@ -565,7 +576,7 @@ build_libarrow <- function(src_dir, dst_dir) { if (status != 0) { # It failed :( - cat("**** Error building Arrow C++.", "\n") + lg("Error building Arrow C++.", .indent = "****") if (quietly) { cat( "**** Printing contents of build log because the build failed", @@ -583,7 +594,7 @@ ensure_cmake <- function(cmake_minimum_required = "3.16") { if (is.null(cmake)) { # If not found, download it - cat("**** cmake\n") + lg("cmake", .indent = "****") CMAKE_VERSION <- Sys.getenv("CMAKE_VERSION", "3.26.4") if (on_macos) { postfix <- "-macos-universal.tar.gz" @@ -592,10 +603,10 @@ ensure_cmake <- function(cmake_minimum_required = "3.16") { } else if (tolower(Sys.info()[["machine"]]) == "x86_64") { postfix <- "-linux-x86_64.tar.gz" } else { - stop(paste0( + exit(paste0( "*** cmake was not found locally.\n", " Please make sure cmake >= ", cmake_minimum_required, - " is installed and available on your PATH.\n" + " is installed and available on your PATH." )) } cmake_binary_url <- paste0( @@ -606,7 +617,7 @@ ensure_cmake <- function(cmake_minimum_required = "3.16") { cmake_dir <- tempfile() download_successful <- try_download(cmake_binary_url, cmake_tar) if (!download_successful) { - cat(paste0( + exit(paste0( "*** cmake was not found locally and download failed.\n", " Make sure cmake >= ", cmake_minimum_required, " is installed and available on your PATH,\n", @@ -625,7 +636,7 @@ ensure_cmake <- function(cmake_minimum_required = "3.16") { } else { # Show which one we found # Full source builds will always show "cmake" in the logs - cat(sprintf("**** cmake: %s\n", cmake)) + lg("cmake: %s", cmake, .indent = "****") } cmake } @@ -821,12 +832,12 @@ if (!test_mode && !file.exists(paste0(dst_dir, "/include/arrow/api.h"))) { if (!identical(Sys.getenv("ARROW_DOWNLOADED_BINARIES"), "")) { bin_zip <- Sys.getenv("ARROW_DOWNLOADED_BINARIES") - cat(sprintf("*** Using pre-downloaded zip for libarrow binaries: %s\n", bin_zip)) + lg("Using pre-downloaded zip for libarrow binaries: %s", bin_zip) if (file.exists(bin_zip)) { bin_file <- tempfile() file.copy(bin_zip, bin_file) } else { - cat(sprintf("*** File not found: %s ($ARROW_DOWNLOADED_BINARIES)\n", bin_zip)) + lg("File not found: %s ($ARROW_DOWNLOADED_BINARIES)", bin_zip) bin_file <- NULL } } else if (download_ok) { @@ -853,9 +864,9 @@ if (!test_mode && !file.exists(paste0(dst_dir, "/include/arrow/api.h"))) { )) build_libarrow(src_dir, dst_dir) } else { - cat("*** Proceeding without libarrow (no local source)\n") + exit("Proceeding without libarrow (no local source)") } } else { - cat("*** Proceeding without libarrow (build not authorized)\n") + exit("Proceeding without libarrow (build not authorized)") } } From 9a7b8ab1fb421e3f91bdb352e4ad646e40267f9b Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Thu, 12 Oct 2023 05:21:48 +0200 Subject: [PATCH 02/20] add latests nightly check to nixlibs --- r/tools/nixlibs.R | 46 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 9b1a21c7a8412..1fb85965adf40 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -16,7 +16,7 @@ # under the License. args <- commandArgs(TRUE) -VERSION <- args[1] +VERSION <- package_version(args[1]) dst_dir <- paste0("libarrow/arrow-", VERSION) # TESTING is set in test-nixlibs.R; it won't be set when called from configure @@ -24,12 +24,17 @@ test_mode <- exists("TESTING") # Prevent error with binary selection during testing. if (test_mode && is.na(VERSION)) { - VERSION <- "8.0.0.9000" + VERSION <- package_version("8.0.0.9000") } -dev_version <- package_version(VERSION)[1, 4] +dev_version <- VERSION[1, 4] +# Small dev versions are added for R-only changes during CRAN submission is_release <- is.na(dev_version) || dev_version < "100" + on_macos <- tolower(Sys.info()[["sysname"]]) == "darwin" +on_windows <- tolower(Sys.info()[["sysname"]]) == "windows" + + env_is <- function(var, value) identical(tolower(Sys.getenv(var)), value) # For local debugging, set ARROW_R_DEV=TRUE to make this script print more quietly <- !env_is("ARROW_R_DEV", "true") @@ -44,11 +49,39 @@ exit <- function(..., .status = 1) { q(save = "no", status = .status) } -# Small dev versions are added for R-only changes during CRAN submission. +find_latest_nightly <- function(description_version) { + res <- try( + { + url_file <- tempfile() + on.exit(unlink(url_file)) + # Binaries are only uploaded if all jobs pass so can just look at the source versions. + download.file("https://nightlies.apache.org/arrow/r/src/contrib", url_file, quiet = TRUE) + urls <- readLines(url_file) + versions <- grep("arrow_.*\\.tar\\.gz", urls, value = TRUE) + versions <- sub(".*arrow_(.*)\\.tar\\.gz.*", "\\1", x = versions) + versions <- sapply(versions, package_version) + versions <- data.frame(do.call(rbind, versions)) + matching_major <- versions[versions$X1 == description_version[1, 1], ] + latest <- matching_major[which.max(matching_major$X4), ] + package_version(paste0(latest, collapse = ".")) + }, + silent = quietly + ) + if (inherits(res, "try-error")) { + lg("Failed to find latest nightly for %s", description_version) + latest <- description_version + } else { + lg("Found latest nightly for %s: %s", description_version, res) + latest <- res + } + latest +} + if (is_release) { - VERSION <- package_version(VERSION)[1, 1:3] + VERSION <- VERSION[1, 1:3] arrow_repo <- paste0(getOption("arrow.repo", sprintf("https://apache.jfrog.io/artifactory/arrow/r/%s", VERSION)), "/libarrow/") } else { + VERSION <- find_latest_nightly(VERSION) arrow_repo <- paste0(getOption("arrow.dev_repo", "https://nightlies.apache.org/arrow/r"), "/libarrow/") } @@ -79,8 +112,6 @@ if (not_cran || on_macos) { } } -# For local debugging, set ARROW_R_DEV=TRUE to make this script print more -quietly <- !env_is("ARROW_R_DEV", "true") # The default will build from source as a fallback if a binary is not found or shouldn't be used # Set LIBARROW_BUILD=FALSE to ensure that we use a previously built libarrow @@ -99,7 +130,6 @@ download_ok <- !test_mode && !env_is("TEST_OFFLINE_BUILD", "true") # `create_package_with_all_dependencies()` in install-arrow.R thirdparty_dependency_dir <- Sys.getenv("ARROW_THIRDPARTY_DEPENDENCY_DIR", "tools/thirdparty_dependencies") - download_binary <- function(lib) { libfile <- paste0("arrow-", VERSION, ".zip") binary_url <- paste0(arrow_repo, "bin/", lib, "/arrow-", VERSION, ".zip") From 81fa87032f150abe9fa7efc9a6dab889da21a0f0 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Thu, 12 Oct 2023 05:44:25 +0200 Subject: [PATCH 03/20] seperate vars and functions --- r/tools/nixlibs.R | 131 +++++++++++++++++++++++----------------------- 1 file changed, 66 insertions(+), 65 deletions(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 1fb85965adf40..9e0af61b64bc6 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -15,40 +15,22 @@ # specific language governing permissions and limitations # under the License. -args <- commandArgs(TRUE) -VERSION <- package_version(args[1]) -dst_dir <- paste0("libarrow/arrow-", VERSION) - -# TESTING is set in test-nixlibs.R; it won't be set when called from configure -test_mode <- exists("TESTING") - -# Prevent error with binary selection during testing. -if (test_mode && is.na(VERSION)) { - VERSION <- package_version("8.0.0.9000") -} - -dev_version <- VERSION[1, 4] -# Small dev versions are added for R-only changes during CRAN submission -is_release <- is.na(dev_version) || dev_version < "100" - -on_macos <- tolower(Sys.info()[["sysname"]]) == "darwin" -on_windows <- tolower(Sys.info()[["sysname"]]) == "windows" - - +#### Fuctions #### check end of file for main logic env_is <- function(var, value) identical(tolower(Sys.getenv(var)), value) -# For local debugging, set ARROW_R_DEV=TRUE to make this script print more -quietly <- !env_is("ARROW_R_DEV", "true") # Log messages in the style of the configure script lg <- function(..., .indent = "***") { cat(.indent, " ", sprintf(...), "\n", sep = "") } + # Exit the script after logging with .status=1 instead of throwing an error exit <- function(..., .status = 1) { lg(...) q(save = "no", status = .status) } + +# checks the nightly repo for the latest nightly version X.Y.Z.100 find_latest_nightly <- function(description_version) { res <- try( { @@ -77,18 +59,6 @@ find_latest_nightly <- function(description_version) { latest } -if (is_release) { - VERSION <- VERSION[1, 1:3] - arrow_repo <- paste0(getOption("arrow.repo", sprintf("https://apache.jfrog.io/artifactory/arrow/r/%s", VERSION)), "/libarrow/") -} else { - VERSION <- find_latest_nightly(VERSION) - arrow_repo <- paste0(getOption("arrow.dev_repo", "https://nightlies.apache.org/arrow/r"), "/libarrow/") -} - -options(.arrow.cleanup = character()) # To collect dirs to rm on exit -on.exit(unlink(getOption(".arrow.cleanup"))) - - try_download <- function(from_url, to_file, hush = quietly) { status <- try( suppressWarnings( @@ -100,36 +70,6 @@ try_download <- function(from_url, to_file, hush = quietly) { !inherits(status, "try-error") && status == 0 } -not_cran <- env_is("NOT_CRAN", "true") -# enable full featured builds and binaries for macOS (or if the NOT_CRAN variable has been set) -if (not_cran || on_macos) { - # Set more eager defaults - if (env_is("LIBARROW_BINARY", "")) { - Sys.setenv(LIBARROW_BINARY = "true") - } - if (env_is("LIBARROW_MINIMAL", "")) { - Sys.setenv(LIBARROW_MINIMAL = "false") - } -} - - -# The default will build from source as a fallback if a binary is not found or shouldn't be used -# Set LIBARROW_BUILD=FALSE to ensure that we use a previously built libarrow -# and don't fall back to a full source build -build_ok <- !env_is("LIBARROW_BUILD", "false") - -# Check if we're authorized to download (not asked an offline build). -# (Note that cmake will still be downloaded if necessary -# https://arrow.apache.org/docs/developers/cpp/building.html#offline-builds) -download_ok <- !test_mode && !env_is("TEST_OFFLINE_BUILD", "true") - -# This "tools/thirdparty_dependencies" path, within the tar file, might exist if -# create_package_with_all_dependencies() was run, or if someone has created it -# manually before running make build. -# If you change this path, you also need to edit -# `create_package_with_all_dependencies()` in install-arrow.R -thirdparty_dependency_dir <- Sys.getenv("ARROW_THIRDPARTY_DEPENDENCY_DIR", "tools/thirdparty_dependencies") - download_binary <- function(lib) { libfile <- paste0("arrow-", VERSION, ".zip") binary_url <- paste0(arrow_repo, "bin/", lib, "/arrow-", VERSION, ".zip") @@ -852,7 +792,68 @@ cmake_find_package <- function(pkg, version = NULL, env_var_list) { system(cmake_cmd, ignore.stdout = TRUE, ignore.stderr = TRUE) == 0 } -##### +############### Main logic ############# +args <- commandArgs(TRUE) +VERSION <- package_version(args[1]) +dst_dir <- paste0("libarrow/arrow-", VERSION) + +# TESTING is set in test-nixlibs.R; it won't be set when called from configure +test_mode <- exists("TESTING") + +# Prevent error with binary selection during testing. +if (test_mode && is.na(VERSION)) { + VERSION <- package_version("8.0.0.9000") +} + +dev_version <- VERSION[1, 4] +# Small dev versions are added for R-only changes during CRAN submission +is_release <- is.na(dev_version) || dev_version < "100" + +on_macos <- tolower(Sys.info()[["sysname"]]) == "darwin" +on_windows <- tolower(Sys.info()[["sysname"]]) == "windows" + +# For local debugging, set ARROW_R_DEV=TRUE to make this script print more +quietly <- !env_is("ARROW_R_DEV", "true") + +if (is_release) { + VERSION <- VERSION[1, 1:3] + arrow_repo <- paste0(getOption("arrow.repo", sprintf("https://apache.jfrog.io/artifactory/arrow/r/%s", VERSION)), "/libarrow/") +} else { + VERSION <- find_latest_nightly(VERSION) + arrow_repo <- paste0(getOption("arrow.dev_repo", "https://nightlies.apache.org/arrow/r"), "/libarrow/") +} + +options(.arrow.cleanup = character()) # To collect dirs to rm on exit +on.exit(unlink(getOption(".arrow.cleanup"))) + +not_cran <- env_is("NOT_CRAN", "true") +# enable full featured builds for macOS in case of CRAN source builds. +if (not_cran || on_macos) { + # Set more eager defaults + if (env_is("LIBARROW_BINARY", "")) { + Sys.setenv(LIBARROW_BINARY = "true") + } + if (env_is("LIBARROW_MINIMAL", "")) { + Sys.setenv(LIBARROW_MINIMAL = "false") + } +} + +# The default will build from source as a fallback if a binary is not found or shouldn't be used +# Set LIBARROW_BUILD=FALSE to ensure that we use a previously built libarrow +# and don't fall back to a full source build +build_ok <- !env_is("LIBARROW_BUILD", "false") + +# Check if we're authorized to download (not asked an offline build). +# (Note that cmake will still be downloaded if necessary +# https://arrow.apache.org/docs/developers/cpp/building.html#offline-builds) +download_ok <- !test_mode && !env_is("TEST_OFFLINE_BUILD", "true") + +# This "tools/thirdparty_dependencies" path, within the tar file, might exist if +# create_package_with_all_dependencies() was run, or if someone has created it +# manually before running make build. +# If you change this path, you also need to edit +# `create_package_with_all_dependencies()` in install-arrow.R +thirdparty_dependency_dir <- Sys.getenv("ARROW_THIRDPARTY_DEPENDENCY_DIR", "tools/thirdparty_dependencies") if (!test_mode && !file.exists(paste0(dst_dir, "/include/arrow/api.h"))) { # If we're working in a local checkout and have already built the libs, we From 13e269883f16cea6a8634dedbeb9a2b38d300f2b Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Thu, 12 Oct 2023 05:48:58 +0200 Subject: [PATCH 04/20] add early exit on nixlib error --- r/configure | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/r/configure b/r/configure index addf7b59c7fbf..5a720b110a671 100755 --- a/r/configure +++ b/r/configure @@ -203,6 +203,15 @@ find_arrow () { do_bundled_build () { ${R_HOME}/bin/Rscript tools/nixlibs.R $VERSION + if [ $? -ne 0 ]; then + # If the nixlibs.R script failed, we can't continue + echo "------------------------- NOTE ---------------------------" + echo "There was an issue building the Arrow C++ libraries." + echo "See https://arrow.apache.org/docs/r/articles/install.html" + echo "---------------------------------------------------------" + exit 1 + fi + # Handle a few special cases, using what we know about the bundled build # and our ability to make edits to it since we "own" it. _LIBARROW_FOUND="`pwd`/libarrow/arrow-${VERSION}" From 45e90c13ea4047330c66fbf48db56c184da41163 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Thu, 12 Oct 2023 06:12:16 +0200 Subject: [PATCH 05/20] integrate windows into nixlibs.R --- r/configure.win | 3 +-- r/tools/nixlibs.R | 22 +++++++++++++++------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/r/configure.win b/r/configure.win index eed0c632dea13..2d9e5cdf54e44 100755 --- a/r/configure.win +++ b/r/configure.win @@ -38,8 +38,7 @@ GCS_LIBS="-lcurl -lnormaliz -lssh2 -lgdi32 -lssl -lcrypto -lcrypt32 -lwldap32 \ function configure_release() { VERSION=$(grep ^Version DESCRIPTION | sed s/Version:\ //) # Try to find/download a C++ Arrow binary, - # including possibly a local .zip file if RWINLIB_LOCAL is set - "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" "tools/winlibs.R" $VERSION $RWINLIB_LOCAL + "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" "tools/nixlibs.R" $VERSION # If binary not found, script exits nonzero if [ $? -ne 0 ]; then echo "Arrow C++ library was not found" diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 9e0af61b64bc6..e45fbed863419 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -144,6 +144,10 @@ download_binary <- function(lib) { # These string values, along with `NULL`, are the potential return values of # this function. identify_binary <- function(lib = Sys.getenv("LIBARROW_BINARY"), info = distro()) { + if (on_windows) { + return("windows") + } + lib <- tolower(lib) if (identical(lib, "")) { # Not specified. Check the allowlist. @@ -152,16 +156,16 @@ identify_binary <- function(lib = Sys.getenv("LIBARROW_BINARY"), info = distro() if (identical(lib, "false")) { # Do not download a binary - NULL + lib <- NULL } else if (!identical(lib, "true")) { # Env var provided an os-version to use, to override our logic. # We don't validate that this exists. If it doesn't, the download will fail # and the build will fall back to building from source - lib } else { # See if we can find a suitable binary - select_binary() + lib <- select_binary() } + return(lib) } check_allowlist <- function(os, allowed = "https://raw.githubusercontent.com/apache/arrow/main/r/tools/nixlibs-allowlist.txt") { @@ -795,7 +799,6 @@ cmake_find_package <- function(pkg, version = NULL, env_var_list) { ############### Main logic ############# args <- commandArgs(TRUE) VERSION <- package_version(args[1]) -dst_dir <- paste0("libarrow/arrow-", VERSION) # TESTING is set in test-nixlibs.R; it won't be set when called from configure test_mode <- exists("TESTING") @@ -855,14 +858,19 @@ download_ok <- !test_mode && !env_is("TEST_OFFLINE_BUILD", "true") # `create_package_with_all_dependencies()` in install-arrow.R thirdparty_dependency_dir <- Sys.getenv("ARROW_THIRDPARTY_DEPENDENCY_DIR", "tools/thirdparty_dependencies") +# configure.win uses a different libarrow dir +dst_dir <- paste0(ifelse(on_windows, "windows", "libarrow"), "/arrow-", VERSION) + if (!test_mode && !file.exists(paste0(dst_dir, "/include/arrow/api.h"))) { # If we're working in a local checkout and have already built the libs, we # don't need to do anything. Otherwise, # (1) Look for a prebuilt binary for this version bin_file <- src_dir <- NULL - if (!identical(Sys.getenv("ARROW_DOWNLOADED_BINARIES"), "")) { - bin_zip <- Sys.getenv("ARROW_DOWNLOADED_BINARIES") + # Keep backwards compatibility with winlibs.R + bin_zip <- Sys.getenv("ARROW_DOWNLOADED_BINARIES", Sys.getenv("RWINLIB_LOCAL", NA)) + + if (!is.na(bin_zip)) { lg("Using pre-downloaded zip for libarrow binaries: %s", bin_zip) if (file.exists(bin_zip)) { bin_file <- tempfile() @@ -884,7 +892,7 @@ if (!test_mode && !file.exists(paste0(dst_dir, "/include/arrow/api.h"))) { dir.create(dst_dir, showWarnings = !quietly, recursive = TRUE) unzip(bin_file, exdir = dst_dir) unlink(bin_file) - } else if (build_ok) { + } else if (build_ok && !on_windows) { # (2) Find source and build it src_dir <- find_local_source() if (!is.null(src_dir)) { From 7a904982ec9e2cd5d561b1d72aff4d9958d62976 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Thu, 12 Oct 2023 06:21:55 +0200 Subject: [PATCH 06/20] fix version for testing --- r/tools/nixlibs.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index e45fbed863419..4a8ad33a98c8e 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -798,16 +798,17 @@ cmake_find_package <- function(pkg, version = NULL, env_var_list) { ############### Main logic ############# args <- commandArgs(TRUE) -VERSION <- package_version(args[1]) +VERSION <- args[1] # TESTING is set in test-nixlibs.R; it won't be set when called from configure test_mode <- exists("TESTING") # Prevent error with binary selection during testing. if (test_mode && is.na(VERSION)) { - VERSION <- package_version("8.0.0.9000") + VERSION <- "8.0.0.9000" } +VERSION <- package_version(VERSION) dev_version <- VERSION[1, 4] # Small dev versions are added for R-only changes during CRAN submission is_release <- is.na(dev_version) || dev_version < "100" From 8d9ed40735f14bec9aa9c941fa5f18fb75f054c3 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Thu, 12 Oct 2023 06:32:32 +0200 Subject: [PATCH 07/20] skip search if dev repo is not nightlies.a.o --- r/tools/nixlibs.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 4a8ad33a98c8e..8e243315b7b8c 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -32,6 +32,11 @@ exit <- function(..., .status = 1) { # checks the nightly repo for the latest nightly version X.Y.Z.100 find_latest_nightly <- function(description_version) { + if (!startsWith(arrow_repo, "https://nightlies.apache.org/arrow/r")) { + lg("Detected non standard dev repo: %s, not checking latest nightly version.", arrow_repo) + return(description_version) + } + res <- try( { url_file <- tempfile() From f8bbfa411368932b11cda917378fd61ab5e37fa8 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Thu, 12 Oct 2023 06:53:03 +0200 Subject: [PATCH 08/20] fix arrow repo --- r/tools/nixlibs.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 8e243315b7b8c..185e4d3bc415e 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -828,8 +828,8 @@ if (is_release) { VERSION <- VERSION[1, 1:3] arrow_repo <- paste0(getOption("arrow.repo", sprintf("https://apache.jfrog.io/artifactory/arrow/r/%s", VERSION)), "/libarrow/") } else { - VERSION <- find_latest_nightly(VERSION) arrow_repo <- paste0(getOption("arrow.dev_repo", "https://nightlies.apache.org/arrow/r"), "/libarrow/") + VERSION <- find_latest_nightly(VERSION) } options(.arrow.cleanup = character()) # To collect dirs to rm on exit From a759f70ee9b917a1605a33ea267201bdc8d1febf Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Thu, 12 Oct 2023 07:41:51 +0200 Subject: [PATCH 09/20] set not_cran when dev version --- r/tools/nixlibs.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 185e4d3bc415e..ac1c78bad081d 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -824,10 +824,13 @@ on_windows <- tolower(Sys.info()[["sysname"]]) == "windows" # For local debugging, set ARROW_R_DEV=TRUE to make this script print more quietly <- !env_is("ARROW_R_DEV", "true") +not_cran <- env_is("NOT_CRAN", "true") + if (is_release) { VERSION <- VERSION[1, 1:3] arrow_repo <- paste0(getOption("arrow.repo", sprintf("https://apache.jfrog.io/artifactory/arrow/r/%s", VERSION)), "/libarrow/") } else { + not_cran <- TRUE arrow_repo <- paste0(getOption("arrow.dev_repo", "https://nightlies.apache.org/arrow/r"), "/libarrow/") VERSION <- find_latest_nightly(VERSION) } @@ -835,7 +838,6 @@ if (is_release) { options(.arrow.cleanup = character()) # To collect dirs to rm on exit on.exit(unlink(getOption(".arrow.cleanup"))) -not_cran <- env_is("NOT_CRAN", "true") # enable full featured builds for macOS in case of CRAN source builds. if (not_cran || on_macos) { # Set more eager defaults From 2d5d66ee4003cc76eaf8ce9101c7b78cab247c91 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Thu, 12 Oct 2023 07:43:45 +0200 Subject: [PATCH 10/20] automatically handle version mismatch between package and libarrow main use case automatic download of nightly versions --- r/configure | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/r/configure b/r/configure index 5a720b110a671..0bb207381c185 100755 --- a/r/configure +++ b/r/configure @@ -212,9 +212,18 @@ do_bundled_build () { exit 1 fi + if [ -d "libarrow/arrow-$VERSION" ]; then + _LIBARROW_FOUND="`pwd`/libarrow/arrow-${VERSION}" + else + # It's possible that the version of the libarrow binary is not identical to the + # R version, e.g. if the R build is a patch release, so find what the dir is + # actually called. If there is more than one version present, use the one + # with the highest version: + _LIBARROW_FOUND="`pwd`/libarrow/$(ls libarrow/ | grep ^arrow- | tail -n 1)" + fi + # Handle a few special cases, using what we know about the bundled build # and our ability to make edits to it since we "own" it. - _LIBARROW_FOUND="`pwd`/libarrow/arrow-${VERSION}" LIB_DIR="${_LIBARROW_FOUND}/lib" if [ -d "$LIB_DIR" ]; then if [ "${PKG_CONFIG_AVAILABLE}" = "true" ]; then From 32a06e42b002cb5694b6fd76ebc248bd3ebb997b Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Thu, 12 Oct 2023 08:02:34 +0200 Subject: [PATCH 11/20] fix libarrow dir on windows --- r/tools/nixlibs.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index ac1c78bad081d..6a2d5d671114b 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -866,8 +866,8 @@ download_ok <- !test_mode && !env_is("TEST_OFFLINE_BUILD", "true") # `create_package_with_all_dependencies()` in install-arrow.R thirdparty_dependency_dir <- Sys.getenv("ARROW_THIRDPARTY_DEPENDENCY_DIR", "tools/thirdparty_dependencies") -# configure.win uses a different libarrow dir -dst_dir <- paste0(ifelse(on_windows, "windows", "libarrow"), "/arrow-", VERSION) +# configure.win uses a different libarrow dir and and the zip is already nested +dst_dir <- ifelse(on_windows, "windows", paste0("libarrow/arrow-", VERSION)) if (!test_mode && !file.exists(paste0(dst_dir, "/include/arrow/api.h"))) { # If we're working in a local checkout and have already built the libs, we From 680748bbc1ac5db072280580cff9ea810cedfff7 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Fri, 13 Oct 2023 07:30:11 +0200 Subject: [PATCH 12/20] remove winlibs --- r/tools/winlibs.R | 100 --------------------- r/vignettes/developers/install_details.Rmd | 59 ++++++------ 2 files changed, 29 insertions(+), 130 deletions(-) delete mode 100644 r/tools/winlibs.R diff --git a/r/tools/winlibs.R b/r/tools/winlibs.R deleted file mode 100644 index 314062044dcf2..0000000000000 --- a/r/tools/winlibs.R +++ /dev/null @@ -1,100 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -args <- commandArgs(TRUE) -VERSION <- args[1] -dev_version <- package_version(VERSION)[1, 4] -# Small dev versions are added for R-only changes during CRAN submission -is_release <- is.na(dev_version) || dev_version < "100" -env_is <- function(var, value) identical(tolower(Sys.getenv(var)), value) -# We want to log the message in the style of the configure script -# not as an R error. Use `return` to exit the script after logging. -lg <- function(...) { - cat("*** ", sprintf(...), "\n") -} - -if (is_release) { - # This is a release version, so we need to use the major.minor.patch version without - # the CRAN suffix/dev_version - VERSION <- package_version(VERSION)[1, 1:3] - # %1$s uses the first variable for both substitutions - url_template <- paste0( - getOption("arrow.repo", "https://apache.jfrog.io/artifactory/arrow/r/%1$s"), - "/libarrow/bin/windows/arrow-%1$s.zip" - ) -} else { - url_template <- paste0( - getOption("arrow.dev_repo", "https://nightlies.apache.org/arrow/r"), - "/libarrow/bin/windows/arrow-%s.zip" - ) -} - -if (file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) { - lg("Found local Arrow %s!", VERSION) - return() -} - -zip_file <- sprintf("arrow-%s.zip", VERSION) - -if (length(args) > 1) { - # Arg 2 would be the path/to/lib.zip - localfile <- args[2] - if (!file.exists(localfile)) { - lg("RWINLIB_LOCAL '%s' does not exist. Build will fail.", localfile) - return() - } else { - lg("Using RWINLIB_LOCAL %s", localfile) - } - file.copy(localfile, zip_file) -} else { - quietly <- !identical(tolower(Sys.getenv("ARROW_R_DEV")), "true") - binary_url <- sprintf(url_template, VERSION) - try( - suppressWarnings( - download.file(binary_url, zip_file, quiet = quietly) - ), - silent = quietly - ) - - if (!file.exists(zip_file) || file.size(zip_file) == 0) { - lg("Failed to download libarrow binary from %s. Build will fail.", binary_url) - return() - } - - checksum_path <- Sys.getenv("ARROW_R_CHECKSUM_PATH", "tools/checksums") - # Explicitly setting the env var to "false" will skip checksum validation - # e.g. in case the included checksums are stale. - skip_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "false") - enforce_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "true") - # validate binary checksum for CRAN release only - if (!skip_checksum && dir.exists(checksum_path) && is_release || - enforce_checksum) { - checksum_file <- sprintf("%s/windows/arrow-%s.zip.sha512", checksum_path, VERSION) - # rtools does not have shasum with default config - checksum_ok <- system2("sha512sum", args = c("--status", "-c", checksum_file)) - - if (checksum_ok != 0) { - lg("Checksum validation failed for libarrow binary: %s", zip_file) - return() - } - lg("Checksum validated successfully for libarrow binary: %s", zip_file) - } -} - -dir.create("windows", showWarnings = FALSE) -unzip(zip_file, exdir = "windows") -unlink(zip_file) diff --git a/r/vignettes/developers/install_details.Rmd b/r/vignettes/developers/install_details.Rmd index 2f2f126b61b38..83700b196a642 100644 --- a/r/vignettes/developers/install_details.Rmd +++ b/r/vignettes/developers/install_details.Rmd @@ -5,12 +5,12 @@ description: > output: rmarkdown::html_vignette --- -This document is intended specifically for arrow _developers_ who wish to know -more about these scripts. If you are an arrow _user_ looking for help with +This document is intended specifically for arrow _developers_ who wish to know +more about these scripts. If you are an arrow _user_ looking for help with installing arrow, please see [the installation guide](../install.html) -The arrow R package requires that Arrow C++ library (also known as libarrow) to -be installed in order to work properly. There are a number of different ways +The arrow R package requires that Arrow C++ library (also known as libarrow) to +be installed in order to work properly. There are a number of different ways in which libarrow could be installed: * as part of the R package installation process @@ -21,9 +21,9 @@ Below, we discuss each of these setups in turn. # Installing libarrow during R package installation -There are a number of scripts that are triggered -when `R CMD INSTALL .` is run and for Arrow users, these should all just work -without configuration and pull in the most complete pieces (e.g. official +There are a number of scripts that are triggered +when `R CMD INSTALL .` is run and for Arrow users, these should all just work +without configuration and pull in the most complete pieces (e.g. official binaries that we host). One of the jobs of these scripts is to work out if libarrow is installed, and if not, install it. @@ -35,23 +35,22 @@ handle finding the libarrow, setting up the build variables necessary, and writing the package Makevars file that is used to compile the C++ code in the R package. -* `tools/nixlibs.R` - this script is called by `configure` on Linux +* `tools/nixlibs.R` - this script is called by `configure` on Linux and macOS (or on any non-windows OS with the environment variable -`FORCE_BUNDLED_BUILD=true`) if an existing libarrow installation cannot be found. -This sets up the build process for our bundled builds (which is the default on -linux) and checks for binaries or downloads libarrow from source depending on -dependency availability and build configuration. - -* `tools/winlibs.R` - this script is called by `configure.win` on Windows -when environment variable `ARROW_HOME` is not set. It looks for an existing libarrow -installation, and if it can't find one downloads an appropriate libarrow binary. - -* `inst/build_arrow_static.sh` - called by `tools/nixlibs.R` when libarrow +`FORCE_BUNDLED_BUILD=true`). On windows this script is called by +`configure.win` when environment variable `ARROW_HOME` is not set. +It looks for an existing libarrow installation, and if it can't find one +downloads an appropriate libarrow binary. +On non-windows if no binary could be found, the script sets up the build +process for our bundled builds (which is the default on linux) and checks +for dependencies. + +* `inst/build_arrow_static.sh` - called by `tools/nixlibs.R` when libarrow needs to be built. It builds libarrow for a bundled, static build, and mirrors the steps described in the [Arrow R developer guide](./setup.html) This build script is also what is used to generate our prebuilt binaries. -The actions taken by these scripts to resolve dependencies and install the +The actions taken by these scripts to resolve dependencies and install the correct components are described below. ## How the R package finds libarrow @@ -80,7 +79,7 @@ On Linux and macOS, the core logic is: 2. Find libarrow on the system. If it is present, make sure that its version is compatible with the R package. 3. If no suitable libarrow is found, download it (where allowed) or build it from source. -4. Determine what features this libarrow has and what other flags it requires, +4. Determine what features this libarrow has and what other flags it requires, and set them in `src/Makevars` for use when compiling the bindings. #### Finding libarrow on the system @@ -93,7 +92,7 @@ The `configure` script will look for libarrow in three places: If a libarrow build is found, it will then check that the version of that C++ library matches that of the R package. If the versions do not match, like when you've installed -a system package for a release version but you have a development version of the +a system package for a release version but you have a development version of the R package, that libarrow will not be used. If both the C++ library and R package are on development versions, you will see a warning message advising you that if you do have trouble, you should ensure that the C++ library was built from the same commit as the R @@ -101,10 +100,10 @@ package, as development version numbers do not change with every commit. #### Prebuilt binaries -If libarrow is not found on the system, the R package installation +If libarrow is not found on the system, the R package installation script will next attempt to download prebuilt libarrow binaries -that match your both your local operating system, required -dependencies (e.g. openssl version) and arrow R package version. +that match your both your local operating system, required +dependencies (e.g. openssl version) and arrow R package version. These are used automatically on many Linux distributions (x86_64 architecture only), according to the [allowlist](https://github.com/apache/arrow/blob/main/r/tools/nixlibs-allowlist.txt). @@ -115,18 +114,18 @@ downloaded and bundled when your R package compiles. #### Building from source If no suitable libarrow binary is found, it will attempt to build it locally. -First, it will also look to see if you are in a checkout of the `apache/arrow` +First, it will also look to see if you are in a checkout of the `apache/arrow` git repository and thus have the libarrow source files there. Otherwise, it builds from the source files included in the package. -Depending on your system, building libarrow from source may be slow. If +Depending on your system, building libarrow from source may be slow. If libarrow is built from source, `inst/build_arrow_static.sh` is executed. # Using the R package with libarrow installed as a system package If you are authorized to install system packages and you're installing a CRAN release, -you may want to use the official Apache Arrow release packages corresponding to -the R package version via software distribution tools such as `apt` or `yum` -(though there are some drawbacks: see the +you may want to use the official Apache Arrow release packages corresponding to +the R package version via software distribution tools such as `apt` or `yum` +(though there are some drawbacks: see the ["Troubleshooting" section in the main installation docs](../install.html#troubleshooting)). See the [Arrow project installation page](https://arrow.apache.org/install/) to find pre-compiled binary packages for some common Linux distributions, @@ -138,5 +137,5 @@ be useful because the versions will not match. # Using the R package with an existing libarrow build This setup is much more common for arrow developers, who may be needing to make -changes to both the R package and libarrow source code. See +changes to both the R package and libarrow source code. See the [developer setup docs](./setup.html) for more information. From 1b33ce462b4961d18af15e846ee65eafc0876912 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Fri, 13 Oct 2023 07:52:49 +0200 Subject: [PATCH 13/20] fix checksum_path --- r/tools/nixlibs.R | 1 + 1 file changed, 1 insertion(+) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 6a2d5d671114b..d4cd03ec1d616 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -95,6 +95,7 @@ download_binary <- function(lib) { # e.g. in case the included checksums are stale. skip_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "false") enforce_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "true") + checksum_path <- Sys.getenv("ARROW_R_CHECKSUM_PATH", "tools/checksums") # validate binary checksum for CRAN release only if (!skip_checksum && dir.exists(checksum_path) && is_release || enforce_checksum) { From 86d8f82681d4f3fc326269f2fa9ee4d2f31b601b Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Wed, 18 Oct 2023 05:32:16 +0200 Subject: [PATCH 14/20] Apply suggestions from code review Co-authored-by: Dewey Dunnington --- r/tools/nixlibs.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index d4cd03ec1d616..22c53910856e3 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -46,10 +46,10 @@ find_latest_nightly <- function(description_version) { urls <- readLines(url_file) versions <- grep("arrow_.*\\.tar\\.gz", urls, value = TRUE) versions <- sub(".*arrow_(.*)\\.tar\\.gz.*", "\\1", x = versions) - versions <- sapply(versions, package_version) - versions <- data.frame(do.call(rbind, versions)) - matching_major <- versions[versions$X1 == description_version[1, 1], ] - latest <- matching_major[which.max(matching_major$X4), ] + versions <- lapply(versions, package_version) + versions <- as.data.frame(do.call(rbind, versions)) + matching_major <- versions[versions$X1 == description_version[1, 1], , drop = FALSE] + latest <- matching_major[which.max(matching_major$X4)[1], , drop = TRUE] package_version(paste0(latest, collapse = ".")) }, silent = quietly @@ -171,7 +171,7 @@ identify_binary <- function(lib = Sys.getenv("LIBARROW_BINARY"), info = distro() # See if we can find a suitable binary lib <- select_binary() } - return(lib) + lib } check_allowlist <- function(os, allowed = "https://raw.githubusercontent.com/apache/arrow/main/r/tools/nixlibs-allowlist.txt") { From e0a121a37249fe70754a1d2ee12c793046e20237 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Wed, 18 Oct 2023 05:33:15 +0200 Subject: [PATCH 15/20] use readLines directly --- r/tools/nixlibs.R | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 22c53910856e3..55c8173f3ddff 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -39,11 +39,8 @@ find_latest_nightly <- function(description_version) { res <- try( { - url_file <- tempfile() - on.exit(unlink(url_file)) # Binaries are only uploaded if all jobs pass so can just look at the source versions. - download.file("https://nightlies.apache.org/arrow/r/src/contrib", url_file, quiet = TRUE) - urls <- readLines(url_file) + urls <- readLines("https://nightlies.apache.org/arrow/r/src/contrib") versions <- grep("arrow_.*\\.tar\\.gz", urls, value = TRUE) versions <- sub(".*arrow_(.*)\\.tar\\.gz.*", "\\1", x = versions) versions <- lapply(versions, package_version) From 56219939908f68aa96a826c40e9b6058c07b77e6 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Wed, 18 Oct 2023 05:49:48 +0200 Subject: [PATCH 16/20] fix check for existing libarrow on windows --- r/tools/nixlibs.R | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 55c8173f3ddff..604e0b3d1842d 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -864,10 +864,19 @@ download_ok <- !test_mode && !env_is("TEST_OFFLINE_BUILD", "true") # `create_package_with_all_dependencies()` in install-arrow.R thirdparty_dependency_dir <- Sys.getenv("ARROW_THIRDPARTY_DEPENDENCY_DIR", "tools/thirdparty_dependencies") +arrow_versioned <- paste0("arrow-", VERSION) # configure.win uses a different libarrow dir and and the zip is already nested -dst_dir <- ifelse(on_windows, "windows", paste0("libarrow/arrow-", VERSION)) +if (on_windows) { + lib_dir <- "windows" + dst_dir <- lib_dir +} else { + lib_dir <- "libarrow" + dst_dir <- file.path(lib_dir, arrow_versioned) +} + +api_h <- file.path(lib_dir, arrow_versioned, "include/arrow/api.h") -if (!test_mode && !file.exists(paste0(dst_dir, "/include/arrow/api.h"))) { +if (!test_mode && !file.exists(api_h)) { # If we're working in a local checkout and have already built the libs, we # don't need to do anything. Otherwise, # (1) Look for a prebuilt binary for this version From 12339834a07711571054b2a58df4610df38b1ecd Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Wed, 18 Oct 2023 06:20:27 +0200 Subject: [PATCH 17/20] add wrapper function to add things to the clean up list --- r/tools/nixlibs.R | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 604e0b3d1842d..dcae1cce2c99c 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -23,6 +23,10 @@ lg <- function(..., .indent = "***") { cat(.indent, " ", sprintf(...), "\n", sep = "") } +del <- function(path) { + options(.arrow.cleanup = c(getOption(".arrow.cleanup"), path)) +} + # Exit the script after logging with .status=1 instead of throwing an error exit <- function(..., .status = 1) { lg(...) @@ -486,7 +490,7 @@ build_libarrow <- function(src_dir, dst_dir) { # But normally we'll just build in a tmp dir build_dir <- tempfile() } - options(.arrow.cleanup = c(getOption(".arrow.cleanup"), build_dir)) + del(build_dir) env_var_list <- c( SOURCE_DIR = src_dir, @@ -604,7 +608,7 @@ ensure_cmake <- function(cmake_minimum_required = "3.16") { } untar(cmake_tar, exdir = cmake_dir) unlink(cmake_tar) - options(.arrow.cleanup = c(getOption(".arrow.cleanup"), cmake_dir)) + del(cmake_dir) cmake <- paste0( cmake_dir, "/cmake-", CMAKE_VERSION, sub(".tar.gz", "", postfix, fixed = TRUE), @@ -784,7 +788,7 @@ with_cloud_support <- function(env_var_list) { cmake_find_package <- function(pkg, version = NULL, env_var_list) { td <- tempfile() dir.create(td) - options(.arrow.cleanup = c(getOption(".arrow.cleanup"), td)) + del(td) find_package <- paste0("find_package(", pkg, " ", version, " REQUIRED)") writeLines(find_package, file.path(td, "CMakeLists.txt")) env_vars <- env_vars_as_string(env_var_list) @@ -833,8 +837,10 @@ if (is_release) { VERSION <- find_latest_nightly(VERSION) } -options(.arrow.cleanup = character()) # To collect dirs to rm on exit -on.exit(unlink(getOption(".arrow.cleanup"))) +# To collect dirs to rm on exit, use del() to add dirs +# we reset it to avoid errors on reruns in the same session. +options(.arrow.cleanup = character()) +on.exit(unlink(getOption(".arrow.cleanup"), recursive = TRUE), add = TRUE) # enable full featured builds for macOS in case of CRAN source builds. if (not_cran || on_macos) { From bbf77873da3676327c32a7adb9ec6a70da969abb Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Wed, 18 Oct 2023 06:35:24 +0200 Subject: [PATCH 18/20] Revert "Apply suggestions from code review" This reverts commit 86d8f82681d4f3fc326269f2fa9ee4d2f31b601b. --- r/tools/nixlibs.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index dcae1cce2c99c..385ce6c621058 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -47,10 +47,10 @@ find_latest_nightly <- function(description_version) { urls <- readLines("https://nightlies.apache.org/arrow/r/src/contrib") versions <- grep("arrow_.*\\.tar\\.gz", urls, value = TRUE) versions <- sub(".*arrow_(.*)\\.tar\\.gz.*", "\\1", x = versions) - versions <- lapply(versions, package_version) - versions <- as.data.frame(do.call(rbind, versions)) - matching_major <- versions[versions$X1 == description_version[1, 1], , drop = FALSE] - latest <- matching_major[which.max(matching_major$X4)[1], , drop = TRUE] + versions <- sapply(versions, package_version) + versions <- data.frame(do.call(rbind, versions)) + matching_major <- versions[versions$X1 == description_version[1, 1], ] + latest <- matching_major[which.max(matching_major$X4), ] package_version(paste0(latest, collapse = ".")) }, silent = quietly @@ -172,7 +172,7 @@ identify_binary <- function(lib = Sys.getenv("LIBARROW_BINARY"), info = distro() # See if we can find a suitable binary lib <- select_binary() } - lib + return(lib) } check_allowlist <- function(os, allowed = "https://raw.githubusercontent.com/apache/arrow/main/r/tools/nixlibs-allowlist.txt") { From 4b482be830ef285f3d48c84aa8202a45a3c8508d Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Wed, 18 Oct 2023 06:35:48 +0200 Subject: [PATCH 19/20] use implicit return --- r/tools/nixlibs.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 385ce6c621058..bb5b74a367f70 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -172,7 +172,7 @@ identify_binary <- function(lib = Sys.getenv("LIBARROW_BINARY"), info = distro() # See if we can find a suitable binary lib <- select_binary() } - return(lib) + lib } check_allowlist <- function(os, allowed = "https://raw.githubusercontent.com/apache/arrow/main/r/tools/nixlibs-allowlist.txt") { From d219ba4e69efd142397580a37ae1eeb1d99207ea Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Wed, 25 Oct 2023 02:52:41 +0200 Subject: [PATCH 20/20] rename del to cleanup --- r/tools/nixlibs.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index bb5b74a367f70..96f2b858cf180 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -23,7 +23,7 @@ lg <- function(..., .indent = "***") { cat(.indent, " ", sprintf(...), "\n", sep = "") } -del <- function(path) { +cleanup <- function(path) { options(.arrow.cleanup = c(getOption(".arrow.cleanup"), path)) } @@ -490,7 +490,7 @@ build_libarrow <- function(src_dir, dst_dir) { # But normally we'll just build in a tmp dir build_dir <- tempfile() } - del(build_dir) + cleanup(build_dir) env_var_list <- c( SOURCE_DIR = src_dir, @@ -608,7 +608,7 @@ ensure_cmake <- function(cmake_minimum_required = "3.16") { } untar(cmake_tar, exdir = cmake_dir) unlink(cmake_tar) - del(cmake_dir) + cleanup(cmake_dir) cmake <- paste0( cmake_dir, "/cmake-", CMAKE_VERSION, sub(".tar.gz", "", postfix, fixed = TRUE), @@ -788,7 +788,7 @@ with_cloud_support <- function(env_var_list) { cmake_find_package <- function(pkg, version = NULL, env_var_list) { td <- tempfile() dir.create(td) - del(td) + cleanup(td) find_package <- paste0("find_package(", pkg, " ", version, " REQUIRED)") writeLines(find_package, file.path(td, "CMakeLists.txt")) env_vars <- env_vars_as_string(env_var_list)