Skip to content

Commit

Permalink
Merge pull request #88 from chainsawriot/alwaysbioc
Browse files Browse the repository at this point in the history
Fix #85
  • Loading branch information
chainsawriot authored Feb 25, 2023
2 parents ce6889a + ef1b307 commit ea087fd
Show file tree
Hide file tree
Showing 7 changed files with 98 additions and 99 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

S3method(as_pkgrefs,character)
S3method(as_pkgrefs,default)
S3method(as_pkgrefs,sessionInfo)
S3method(convert_edgelist,default)
Expand Down
22 changes: 16 additions & 6 deletions R/as_pkgrefs.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,19 @@
#' can be used as the first argument of the function [resolve()]. This function guessimates the possible sources of the
#' packages. But we strongly recommend manually reviewing the detected packages before using them for [resolve()].
#' @param x, currently supported data structure(s) are: output from [sessionInfo()], a character vector of package names
#' @param bioc_version character. When x is a character vector, version of Bioconductor to search for package names. NULL indicates not
#' search for Bioconductor.
#' @param ..., not used
#' @return a vector of package references
#' @export
#' @examples
#' as_pkgrefs(sessionInfo())
#' if (interactive()) {
#' require(rang)
#' require(pkgsearch)
#' graph <- resolve(as_pkgrefs(sessionInfo()))
#' as_pkgrefs(c("rtoot"))
#' as_pkgrefs(c("rtoot", "S4Vectors")) ## this gives cran::S4Vectors and is not correct.
#' as_pkgrefs(c("rtoot", "S4Vectors"), bioc_version = "3.3") ## This gives bioc::S4Vectors
#' }
as_pkgrefs <- function(x, ...) {
UseMethod("as_pkgrefs", x)
Expand All @@ -22,10 +26,16 @@ as_pkgrefs <- function(x, ...) {
#' @export
as_pkgrefs.default <- function(x, ...) {
## an exported version of .normalize_pkgs
if (is.numeric(x) || is.logical(x) || is.integer(x)) {
stop("Don't know how to convert this to package references.", call. = FALSE)
}
return(.normalize_pkgs(x))
## if (is.numeric(x) || is.logical(x) || is.integer(x)) {
stop("Don't know how to convert this to package references.", call. = FALSE)
## }
## return(.normalize_pkgs(x))
}

#' @rdname as_pkgrefs
#' @export
as_pkgrefs.character <- function(x, bioc_version = NULL, ...) {
return(.normalize_pkgs(pkgs = x, bioc_version = bioc_version))
}

#' @rdname as_pkgrefs
Expand All @@ -39,7 +49,7 @@ as_pkgrefs.sessionInfo <- function(x, ...) {
if ("GithubRepo" %in% names(packageDescription)) {
return(paste0("github::", packageDescription[["GithubUsername"]], "/", packageDescription[["GithubRepo"]]))
}
if(grepl("bioconductor",packageDescription[["URL"]])){
if(grepl("bioconductor",packageDescription[["URL"]])) {
return(paste0("bioc::",handle))
}
## uncomment this when #57 is implemented
Expand Down
146 changes: 58 additions & 88 deletions R/resolve.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,32 +18,26 @@
utils::tail(allvers[allvers$date < snapshot_date,], 1)[,1:2]
}

## .msysreps <- memoise::memoise(.raw_sysreqs, cache = cachem::cache_mem(max_age = 60 * 60))

## .sysreps <- function(pkg, verbose = FALSE) {
## if (isTRUE(verbose)) {
## cat("Querying SystemRequirements of", pkg, "\n")
## }
## .msysreps(pkg)
## }

## get the latest version as of date
## let's call this output dep_df; basically is a rough version of edgelist
.query_snapshot_dependencies <- function(pkgref = "cran::rtoot", snapshot_date = "2022-12-10") {
.query_snapshot_dependencies <- function(pkgref = "cran::rtoot", snapshot_date = "2022-12-10", bioc_version) {
source <- .parse_pkgref(pkgref, return_handle = FALSE)
switch(source,
"cran" = {
return(.query_snapshot_dependencies_cran(handle = .parse_pkgref(pkgref) ,snapshot_date = snapshot_date))
return(.query_snapshot_dependencies_cran(handle = .parse_pkgref(pkgref), snapshot_date = snapshot_date,
bioc_version = bioc_version))
},
"github" = {
return(.query_snapshot_dependencies_github(handle = .parse_pkgref(pkgref), snapshot_date = snapshot_date))
return(.query_snapshot_dependencies_github(handle = .parse_pkgref(pkgref), snapshot_date = snapshot_date,
bioc_version = bioc_version))
},
"bioc" = {
return(.query_snapshot_dependencies_bioc(handle = .parse_pkgref(pkgref), snapshot_date = snapshot_date))
## no need to have bioc_version because it will get queried once again
return(.query_snapshot_dependencies_bioc(handle = .parse_pkgref(pkgref), snapshot_date = snapshot_date))
})
}

.query_snapshot_dependencies_cran <- function(handle = "rtoot", snapshot_date = "2022-12-10") {
.query_snapshot_dependencies_cran <- function(handle = "rtoot", snapshot_date = "2022-12-10", bioc_version = NULL) {
snapshot_date <- anytime::anytime(snapshot_date, tz = "UTC", asUTC = TRUE)
search_res <- .memo_search(handle)
search_res$pubdate <- anytime::anytime(search_res$crandb_file_date, tz = "UTC", asUTC = TRUE)
Expand All @@ -53,55 +47,58 @@
}
latest_version <- utils::tail(snapshot_versions[order(snapshot_versions$pubdate),], n = 1)
dependencies <- latest_version$dependencies[[1]]
if (nrow(dependencies != 0)) {
return(data.frame(snapshot_date = snapshot_date, x = handle, x_version = latest_version$Version, x_pubdate = latest_version$pubdate, x_pkgref = .normalize_pkgs(handle), y = dependencies$package, type = dependencies$type, y_raw_version = dependencies$version, y_pkgref = .normalize_pkgs(dependencies$package)))
} else {
## no y
return(data.frame(snapshot_date = snapshot_date, x = handle, x_version = latest_version$Version, x_pubdate = latest_version$pubdate, x_pkgref = .normalize_pkgs(handle)))
}
if (nrow(dependencies) == 0) {
return(data.frame(snapshot_date = snapshot_date, x = handle, x_version = latest_version$Version,
x_pubdate = latest_version$pubdate, x_pkgref = .normalize_pkgs(handle, bioc_version = bioc_version)))
}
data.frame(snapshot_date = snapshot_date, x = handle,
x_version = latest_version$Version,
x_pubdate = latest_version$pubdate,
x_pkgref = .normalize_pkgs(handle, bioc_version = bioc_version),
y = dependencies$package, type = dependencies$type,
y_raw_version = dependencies$version,
y_pkgref = .normalize_pkgs(dependencies$package, bioc_version = bioc_version))
}

.query_snapshot_dependencies_github <- function(handle = "schochastics/rtoot", snapshot_date = "2022-12-10") {
.query_snapshot_dependencies_github <- function(handle = "schochastics/rtoot", snapshot_date = "2022-12-10", bioc_version = NULL) {
snapshot_date <- anytime::anytime(snapshot_date, tz = "UTC", asUTC = TRUE)
sha <- .query_sha(handle, snapshot_date)
repo_descr <- gh::gh(paste0("GET /repos/", handle,"/contents/DESCRIPTION"), ref = sha$sha)
con <- url(repo_descr$download_url)
descr_df <- as.data.frame(read.dcf(con))
close(con)
pkg_dep_df <- .parse_desc(descr_df,snapshot_date)
pkg_dep_df$x_pkgref <- .normalize_pkgs(handle)
pkg_dep_df$x_pkgref <- .normalize_pkgs(pkgs = handle, bioc_version = bioc_version)
pkg_dep_df$x_uid <- sha$sha
pkg_dep_df$x_pubdate <- sha$x_pubdate
if("y"%in% names(pkg_dep_df)) {
pkg_dep_df$y_pkgref <- .normalize_pkgs(pkg_dep_df$y)
return(pkg_dep_df[,c("snapshot_date", "x", "x_version", "x_pubdate", "x_pkgref", "x_uid", "y", "type", "y_raw_version", "y_pkgref")])
} else {
if (isFALSE("y" %in% names(pkg_dep_df))) {
return(pkg_dep_df[,c("snapshot_date", "x", "x_version", "x_pubdate", "x_pkgref", "x_uid")])
}
pkg_dep_df$y_pkgref <- .normalize_pkgs(pkg_dep_df$y, bioc_version = bioc_version)
pkg_dep_df[,c("snapshot_date", "x", "x_version", "x_pubdate", "x_pkgref", "x_uid", "y", "type", "y_raw_version", "y_pkgref")]
}

.query_snapshot_dependencies_bioc <- function(handle = "BiocGenerics", snapshot_date = "2022-01-10") {
snapshot_date <- anytime::anytime(snapshot_date, tz = "UTC", asUTC = TRUE)
bioc_version <- .query_biocver(snapshot_date)
search_res <- .memo_search_bioc(bioc_version$version)
search_res$pubdate <- anytime::anytime(bioc_version$date, tz = "UTC", asUTC = TRUE)
bioc_version_df <- .query_biocver(snapshot_date) ## a dataframe!
search_res <- .memo_search_bioc(bioc_version_df$version)
search_res$pubdate <- anytime::anytime(bioc_version_df$date, tz = "UTC", asUTC = TRUE)
latest_version <- search_res[search_res$Package==handle,]
if (nrow(latest_version) == 0) {
stop("No snapshot version exists for ", handle, ".", call. = FALSE)
}
pkg_dep_df <- .parse_desc(descr_df = latest_version,snapshot_date = snapshot_date)
pkg_dep_df$x_bioc_ver <- bioc_version$version
pkg_dep_df$x_bioc_ver <- bioc_version_df$version
if ("y" %in% colnames(pkg_dep_df)) {
pkg_dep_df <- pkg_dep_df[!is.na(pkg_dep_df$y),]
}
pkg_dep_df$x_pubdate <- bioc_version$date
pkg_dep_df$x_pkgref <- .normalize_pkgs(handle,bioc_version = bioc_version$version)
if("y"%in% names(pkg_dep_df)) {
pkg_dep_df$y_pkgref <- .normalize_pkgs(pkg_dep_df$y,bioc_version = bioc_version$version)
return(pkg_dep_df[,c("snapshot_date", "x", "x_version", "x_pubdate", "x_pkgref", "x_bioc_ver", "y", "type", "y_raw_version", "y_pkgref")])
} else {
pkg_dep_df$x_pubdate <- bioc_version_df$date
pkg_dep_df$x_pkgref <- .normalize_pkgs(handle, bioc_version = bioc_version_df$version)
if (isFALSE("y" %in% names(pkg_dep_df))) {
return(pkg_dep_df[,c("snapshot_date", "x", "x_version", "x_pubdate", "x_pkgref", "x_bioc_ver")])
}
pkg_dep_df$y_pkgref <- .normalize_pkgs(pkg_dep_df$y, bioc_version = bioc_version_df$version)
pkg_dep_df[,c("snapshot_date", "x", "x_version", "x_pubdate", "x_pkgref", "x_bioc_ver", "y", "type", "y_raw_version", "y_pkgref")]
}

# get the commit sha for the commit closest to date
Expand Down Expand Up @@ -185,15 +182,31 @@
res <- dep_df[!dep_df$type %in% disabled_types & dep_df$y != "R" & !(dep_df$y %in% c("datasets", "utils", "grDevices", "graphics", "stats", "methods", "tools", "grid", "splines", "Rgraphviz", "parallel", "stats4", "tcltk", "MASS", "nnet", "class", "spatial")),]
if (nrow(res) == 0) {
return(NULL)
} else {
return(unique(res$y_pkgref))
}
unique(res$y_pkgref)
}

.is_terminal_node <- function(dep_df, no_enhances = TRUE, no_suggests = TRUE) {
length(.extract_queryable_dependencies(dep_df, no_enhances, no_suggests)) == 0
}

## The checking function for below
.generate_bioc_version <- function(snapshot_date, pkgs) {
bioc_version <- .query_biocver(snapshot_date)$version
if (length(bioc_version) == 0) {
return(NULL)
}
## explicit bioc case and too old
if(any(grepl("^bioc::", pkgs)) && utils::compareVersion(bioc_version, "2.0") == -1) {
stop("Bioconductor versions < 2.0 are not supported.", call. = FALSE)
}
## old bioc, but not explicit
if (utils::compareVersion(bioc_version, "2.0") == -1) {
return(NULL)
}
bioc_version
}

#' Resolve Dependencies Of R Packages
#'
#' This function recursively queries dependencies of R packages at a specific snapshot time. The dependency graph can then be used to recreate the computational environment. The data on dependencies are provided by R-hub.
Expand All @@ -204,7 +217,6 @@
#' @param no_suggests logical, whether to ignore packages in the "Suggests" field
#' @param query_sysreqs logical, whether to query for System Requirements. Important: Archived CRAN can't be queried for system requirements. Those
#' packages are assumed to have no system requirement.
#' @param query_bioc Logical, whether to query for packages from Bioconductor
#' @param os character, which OS to query for system requirements
#' @param verbose logical, whether to display messages
#' @return a `rang` S3 object with the following items
Expand Down Expand Up @@ -233,7 +245,7 @@
#' gh_graph
#' }
#' }
resolve <- function(pkgs, snapshot_date, no_enhances = TRUE, no_suggests = TRUE, query_sysreqs = TRUE, query_bioc = FALSE, os = "ubuntu-20.04", verbose = FALSE) {
resolve <- function(pkgs, snapshot_date, no_enhances = TRUE, no_suggests = TRUE, query_sysreqs = TRUE, os = "ubuntu-20.04", verbose = FALSE) {
if (!os %in% supported_os) {
stop("Don't know how to resolve ", os, ". Supported OSes are: ", paste(supported_os, collapse = ", "))
}
Expand All @@ -247,24 +259,8 @@ resolve <- function(pkgs, snapshot_date, no_enhances = TRUE, no_suggests = TRUE,
if (snapshot_date >= anytime::anytime(Sys.Date())) {
stop("We don't know the future.", call. = FALSE)
}
if (class(pkgs) %in% c("sessionInfo")) {
pkgrefs <- as_pkgrefs(pkgs)
if(any(grepl("^bioc::",pkgrefs))){
bioc_version <- .query_biocver(snapshot_date)$version
} else{
bioc_version <- NULL
}
} else {
if(any(grepl("^bioc::",pkgs))){
query_bioc <- TRUE
}
if(isTRUE(query_bioc)){
bioc_version <- .query_biocver(snapshot_date)$version
} else{
bioc_version <- NULL
}
pkgrefs <- .normalize_pkgs(pkgs, bioc_version = bioc_version)
}
bioc_version <- .generate_bioc_version(snapshot_date = snapshot_date, pkgs = pkgs)
pkgrefs <- as_pkgrefs(pkgs, bioc_version = bioc_version)
output <- list()
output$call <- match.call()
output$ranglets <- list()
Expand All @@ -279,7 +275,7 @@ resolve <- function(pkgs, snapshot_date, no_enhances = TRUE, no_suggests = TRUE,
for (pkgref in pkgrefs) {
tryCatch({
res <- .resolve_pkgref(pkgref = pkgref, snapshot_date = snapshot_date, no_enhances = no_enhances,
no_suggests = no_suggests, verbose = verbose)
no_suggests = no_suggests, verbose = verbose, bioc_version = bioc_version)
output$ranglets[[pkgref]] <- res
}, error = function(err) {
if (isTRUE(verbose)) {
Expand All @@ -300,8 +296,8 @@ resolve <- function(pkgs, snapshot_date, no_enhances = TRUE, no_suggests = TRUE,
return(output)
}

.resolve_pkgref <- function(pkgref, snapshot_date, no_enhances = TRUE, no_suggests = TRUE, verbose = FALSE) {
pkg_dep_df <- .query_snapshot_dependencies(pkgref = pkgref, snapshot_date = snapshot_date)
.resolve_pkgref <- function(pkgref, snapshot_date, no_enhances = TRUE, no_suggests = TRUE, verbose = FALSE, bioc_version = NULL) {
pkg_dep_df <- .query_snapshot_dependencies(pkgref = pkgref, snapshot_date = snapshot_date, bioc_version = bioc_version)
output <- list()
output$pkgref <- pkgref
output$no_enhances <- no_enhances
Expand All @@ -321,7 +317,7 @@ resolve <- function(pkgs, snapshot_date, no_enhances = TRUE, no_suggests = TRUE,
cat("Querying: ", current_pkgref, "\n")
}
tryCatch({
pkg_dep_df <- .query_snapshot_dependencies(pkgref = current_pkgref, snapshot_date = snapshot_date)
pkg_dep_df <- .query_snapshot_dependencies(pkgref = current_pkgref, snapshot_date = snapshot_date, bioc_version = bioc_version)
output$deps[[current_pkgref]] <- pkg_dep_df
pkgs_to_query <- unique(setdiff(.extract_queryable_dependencies(pkg_dep_df, no_enhances, no_suggests), c(names(output$deps), seen_deps)))
seen_deps <- union(seen_deps, pkgs_to_query)
Expand Down Expand Up @@ -484,22 +480,6 @@ query_sysreqs <- function(rang, os = "ubuntu-20.04") {
vapply(jsonlite::read_json(url), .extract_sys_package, character(1), arch = arch)
}

## .write_pony_description_file <- function(raw_sys_reqs) {
## description_file_path <- tempfile()
## x <- data.frame(SystemRequirements =
## paste(raw_sys_reqs, collapse = ","))
## write.dcf(x, file = description_file_path)
## return(description_file_path)
## }

## .query_sysreqs_bioc <- function(handle, os) {
## sys_reqs_all <- .memo_query_sysreqs_rhub()
## pkgs <- .memo_search_bioc(bioc_version = "release")
## raw_sys_reqs <- pkgs$SystemRequirements[pkgs$Package %in% handle]
## .query_sysreqs_posit(.write_pony_description_file(raw_sys_reqs),
## os = os)
## }

.extract_sys_package <- function(item, arch = "DEB") {
output <- item[[names(item)]]$platforms[[arch]]
if (isFALSE(is.list((output)))) {
Expand All @@ -515,16 +495,6 @@ query_sysreqs <- function(rang, os = "ubuntu-20.04") {
}
}

## .clean_sys_reqs_bioc <- function(sys_reqs){
## sys_reqs <- unlist(strsplit(sys_reqs,split = ",\\s*|\\n"),use.names = FALSE)
## sys_reqs <- tolower(sys_reqs)
## sys_reqs <- gsub("\\s*\\(.*\\)","",sys_reqs)
## sys_reqs <- gsub("GNU make","gnumake",sys_reqs)
## sys_reqs <- gsub("^gsl$","libgsl",sys_reqs)
## sys_reqs <- gsub("^pandoc.*","pandoc",sys_reqs)
## sys_reqs <- gsub("^xml2$","libxml2",sys_reqs)
## }

.query_sysreqs_posit <- function(description_file, os, remove_description = TRUE) {
os_info <- strsplit(os, "-")[[1]]
DEFAULT_RSPM <- "https://packagemanager.rstudio.com"
Expand Down
10 changes: 9 additions & 1 deletion man/as_pkgrefs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 0 additions & 3 deletions man/resolve.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions tests/testthat/test_pkgref.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ test_that("as_pkgrefs dispatch", {
expect_error(as_pkgrefs(1L))
expect_equal(as_pkgrefs("rtoot"), "cran::rtoot")
expect_equal(as_pkgrefs(c("rtoot", "sna")), c("cran::rtoot", "cran::sna"))
expect_equal(as_pkgrefs(c("rtoot", "S4Vectors")), c("cran::rtoot", "cran::S4Vectors")) ## the bioc version is in test_resolve
})

test_that("as_pkgrefs_packageDescription", {
Expand Down
Loading

0 comments on commit ea087fd

Please sign in to comment.