From 4ccc147fe3377073ef6cf749c297ec89225ad5fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A1bor=20Cs=C3=A1rdi?= Date: Mon, 11 Oct 2021 12:15:43 +0200 Subject: [PATCH] First session_diff() implementation --- DESCRIPTION | 2 + NAMESPACE | 3 + R/clipboard.R | 33 ++++ R/compare.R | 364 ++++++++++++++++++++++++++++++++++++++++++++ R/rematch2.R | 34 +++++ R/session-info.R | 3 +- man/session_diff.Rd | 37 +++++ 7 files changed, 475 insertions(+), 1 deletion(-) create mode 100644 R/clipboard.R create mode 100644 R/compare.R create mode 100644 R/rematch2.R create mode 100644 man/session_diff.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 75a3f84..0826507 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -44,6 +44,8 @@ Suggests: rmarkdown, testthat, withr +Remotes: + r-lib/cli Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.1.2 diff --git a/NAMESPACE b/NAMESPACE index 225a4be..6dd73fb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,17 +10,20 @@ S3method(format,external_info) S3method(format,packages_info) S3method(format,platform_info) S3method(format,python_info) +S3method(format,session_diff) S3method(format,session_info) S3method(print,external_info) S3method(print,packages_info) S3method(print,platform_info) S3method(print,python_info) +S3method(print,session_diff) S3method(print,session_info) export(external_info) export(os_name) export(package_info) export(platform_info) export(python_info) +export(session_diff) export(session_info) importFrom(cli,symbol) importFrom(utils,packageVersion) diff --git a/R/clipboard.R b/R/clipboard.R new file mode 100644 index 0000000..56db0a8 --- /dev/null +++ b/R/clipboard.R @@ -0,0 +1,33 @@ + +get_os <- function() { + if (.Platform$OS.type == "windows") { + "win" + } else if (Sys.info()["sysname"] == "Darwin") { + "mac" + } else { + "unix" + } +} + +clipboard_read <- function() { + os <- get_os() + + switch( + os, + win = utils::readClipboard(), + mac = clipboard_read_mac(), + clipboard_read_x11() + ) +} + +clipboard_read_mac <- function() { + on.exit(try(close(con), silent = TRUE), add = TRUE) + con <- pipe("pbpaste") + scan(con, what = "", sep = "\n", blank.lines.skip = FALSE, quiet = TRUE) +} + +clipboard_read_x11 <- function() { + on.exit(try(close(con), silent = TRUE), add = TRUE) + con <- file("clipboard") + scan(con, what = "", sep = "\n", blank.lines.skip = FALSE, quiet = TRUE) +} diff --git a/R/compare.R b/R/compare.R new file mode 100644 index 0000000..4acf3e8 --- /dev/null +++ b/R/compare.R @@ -0,0 +1,364 @@ + +#' Compare session information from two sources +#' +#' @param old,new A `session_info` object (the return value of +#' [session_info()]), or a pointer to [session_info()] output. See details +#' below. +#' @param ... Passed to any new [session_info()] calls. +#' +#' @details +#' Various way to specify `old` and `new`: +#' * A `session_info` object. +#' * `"local"` runs [session_info()] in the current +#' session, and uses its output. +#' * `"clipboard"` takes the session info from the system clipboard. +#' If the clipboard contains a URL, it is followed to download the +#' session info. +#' * A URL starting with `http://` or `https://`. `session_diff` searches +#' the HTML (or text) page for the session info header to find the session +#' info. +#' +#' @export +#' @examplesIf FALSE +#' session_diff() + +session_diff <- function(old = "local", new = "clipboard", ...) { + + oldname <- get_symbol_name(substitute(old)) + newname <- get_symbol_name(substitute(new)) + + old <- get_session_info(old, oldname %||% "old", ...) + new <- get_session_info(new, newname %||% "new", ...) + + ret <- list( + old = old, + new = new, + diff = session_diff_text(old$text, new$text) + ) + + class(ret) <- c("session_diff", "list") + ret +} + +#' @export + +format.session_diff <- function(x, ...) { + c( + cli::style_bold(paste0("--- ", substr(x$old$name, 1, 78))), + cli::style_bold(paste0("+++ ", substr(x$new$name, 1, 78))), + format(x$diff, context = Inf) + ) +} + +#' @export + +print.session_diff <- function(x, ...) { + writeLines(format(x, ...)) +} + +get_session_info <- function(src, name = NULL, ...) { + si <- if (is_string(src) == 1 && src == "local") { + get_session_info_local(...) + } else if (is_string(src) == 1 && src == "clipboard") { + get_session_info_clipboard() + } else if (is_string(src) && grepl("https?://", src)) { + get_session_info_url(src) + } else { + get_session_info_literal(src) + } + if (is.null(si$name)) si$name <- name + si +} + +get_session_info_local <- function() { + si <- session_info() + old <- options(cli.num_colors = 1) + on.exit(options(old), add = TRUE) + list(arg = "local", si = si, text = format(si)) +} + +get_session_info_clipboard <- function() { + cnt <- clipboard_read() + if (is_string(cnt) && cnt == "clipboard") { + si <- cnt + } else { + si <- get_session_info(cnt) + } + si$arg <- "" + si +} + +get_session_info_url <- function(url) { + tmp <- tempfile("session-diff-") + on.exit(unlink(tmp), add = TRUE) + suppressWarnings(download.file(url, tmp, quiet = TRUE, mode = "wb")) + html <- readLines(url, warn = FALSE) + find_session_info_in_html(url, html) +} + +find_session_info_in_html <- function(url, lines) { + purl <- parse_url(url) + re_start <- "[-=\u2500\u2550][ ]Session info[ ]" + cand <- grep(re_start, lines) + if (length(cand) == 0) stop("Cannot find session info at '", url, "'.") + + # check if the URL has an anchor and that the anchor exists in HTML + # if yes, then we "skip" there + if (purl$anchor != "") { + anch <- which( + grepl(paste0(" id=\"", purl$anchor, "\""), lines, fixed = TRUE) | + grepl(paste0(" id='", purl$anchor, "'"), lines, fixed = TRUE) | + grepl(paste0(" id = \"", purl$anchor, "\""), lines, fixed = TRUE) | + grepl(paste0(" id = '", purl$anchor, "'"), lines, fixed = TRUE) + )[1] + if (!is.na(anch) && any(anch < cand)) { + lines <- lines[anch:length(lines)] + cand <- grep(re_start, lines) + } else { + url <- sub(paste0("#", purl$anchor), "", url) + } + } + + lines <- lines[cand[1]:length(lines)] + lines[1] <- sub(paste0("^.*(", re_start, ")"), "\\1", lines[1]) + + grepl_end <- function(lines) { + grepl("^(#>)?[ ]*\\[[0-9]\\] ", lines) | + grepl("^(#>)?[ ]*[-\u2500]+$", lines) + } + + end <- which(grepl_end(lines))[1] + + if (is.na(end)) stop("Cannot parse session info from '", url, "'.") + while (end < length(lines) && grepl_end(lines[end + 1])) { + end <- end + 1 + } + + si <- get_session_info_literal(lines[1:end]) + si$arg <- url + si$name <- url + si +} + +parse_url <- function (url) { + re_url <- paste0( + "^(?[a-zA-Z0-9]+)://", + "(?:(?[^@/:]+)(?::(?[^@/]+))?@)?", + "(?[^/]+)", + "(?[^#]*)", + "#?(?.*)$" + ) + re_match(url, re_url)$groups +} + +get_session_info_literal <- function(si) { + if (inherits(si, "session_info")) { + old <- options(cli.num_colors = 1) + on.exit(options(old), add = TRUE) + list(arg = si, si = si, text = format(si)) + + } else if (is.character(si)) { + # in case it has ANSI sequences + text <- cli::ansi_strip(si) + + # Might be a single string + text <- unlist(strsplitx(text, "\n", fixed = TRUE)) + + # reprex has the knitr output prefix, remove it + # order is important here + text <- sub("^#>[ ]?", "", text) + text <- sub("^#[>#]?[ ]?", "", text) + + check_session_info(text) + list(arg = si, si = si, text = text) + + } else { + stop("Could not interpret a `", class(si), "` as a session info.") + } +} + +# strsplit("", "\n") -> character(), but it should be the empty string, +# so we fix this. + +strsplitx <- function(...) { + lapply(strsplit(...), paste0, "") +} + +check_session_info <- function(x) { + if (!any(grepl("[-=\u2500\u2550] Session info", x))) { + warning("This does not look like a session info: '", beginning(x), "'.") + } +} + +beginning <- function(x) { + x123 <- utils::head(unlist(strsplit(x, "\n", fixed = TRUE)), 3) + trimws(substr(paste0(x123, sep = "\n"), 1, 100)) +} + +session_diff_text <- function(old, new) { + old <- enc2utf8(old) + new <- enc2utf8(new) + + old <- diff_drop_empty(old) + new <- diff_drop_empty(new) + + old <- diff_no_date(old) + new <- diff_no_date(new) + + min <- diff_min_line(c(old, new)) + old <- diff_fix_lines(old, min) + new <- diff_fix_lines(new, min) + + tryCatch <- function(x, ...) x + + # expand thinner package info to match the wider one + # do not error, in case we cannot parse sessioninfo output + suppressWarnings(tryCatch({ + exp <- expand_diff_text(old, new) + old <- exp$old + new <- exp$new + }, error = function(e) NULL)) + + old2 <- gsub("\\s+", " ", old) + new2 <- gsub("\\s+", " ", new) + + diff <- cli::ediff_chr(old2, new2) + diff$old <- old + diff$new <- new + diff +} + +diff_drop_empty <- function(x) { + len <- length(x) + if (len == 0) return(x) + + empty <- rle(grepl("^\\s*$", x)) + pre <- if (empty$values[1]) { + 1:empty$lengths[1] + } + post <- if (utils::tail(empty$values, 1)) { + (len - tail(empty$lengths, 1) + 1):len + } + del <- as.integer(c(pre, post)) + if (length(del)) x <- x[-del] + + x +} + +diff_no_date <- function(x) { + date <- grep("^[ ]*date[ ]+[0-9][0-9][0-9][0-9]-", x) + if (length(date) > 0) { + x <- x[-date[1]] + } + x +} + +diff_min_line <- function(x) { + lines <- c( + grep("[-\u2500][-\u2500][-\u2500]$", x), + grep("[=\u2550][=\u2550][=\u2550]$", x) + ) + min(c(80, cli:::utf8_nchar(x[lines], "width"))) +} + +diff_fix_lines <- function(x, w) { + slines <- grepl("[-\u2500]+$", x) + dlines <- grepl("[=\u2550]+$", x) + x[slines] <- gsub("[-\u2500]", cli::symbol$line, x[slines]) + x[dlines] <- gsub("[=\u2550]", cli::symbol$double_line, x[dlines]) + x[slines] <- substr(x[slines], 1, w) + x[dlines] <- substr(x[dlines], 1, w) + x +} + +expand_diff_text <- function(old, new) { + opkgs <- parse_pkgs(old) + npkgs <- parse_pkgs(new) + + if (is.null(opkgs) || is.null(opkgs)) return(list(old = old, new = new)) + + # Add the "!" column if needed + if ("!" %in% names(opkgs$pkgs) || "!" %in% names(opkgs$pkgs)) { + if (! "!" %in% names(opkgs$pkgs)) { + opkgs$pkgs <- cbind("!" = "", opkgs$pkgs) + } + if (! "!" %in% names(npkgs$pkgs)) { + npkgs$pkgs <- cbind("!" = "", npkgs$pkgs) + } + } + + # If the column names differ, we keep it as is + if (any(names(opkgs$pkgs) != names(npkgs$pkgs))) { + return(list(old = old, new = new)) + } + + cmn <- rbind(opkgs$pkgs, npkgs$pkgs) + oldopts <- options(cli.num_colors = 1) + on.exit(options(oldopts), add = TRUE) + fmt <- format_df(cmn) + + oend <- opkgs$end - opkgs$begin + 1L + nend <- oend + npkgs$end - npkgs$begin + fmt_old <- c(fmt[1], fmt[2:oend]) + fmt_new <- c(fmt[1], fmt[(oend+1):(nend)]) + + old <- insert_instead(old, opkgs$begin, opkgs$end, fmt_old) + new <- insert_instead(new, npkgs$begin, npkgs$end, fmt_new) + browser() + + list(old = old, new = new) +} + +insert_instead <- function(orig, from, to, new) { + pre <- if (from > 1) orig[1:(from-1)] + pst <- if (to < length(orig)) orig[(to+1):length(orig)] + c(pre, new, pst) +} + +parse_pkgs <- function(lines) { + begin <- grep("^[-\u2500] Packages ", lines) + 1 + + # back out if no Packages header + if (length(begin) != 1 || length(begin) > length(lines)) return(NULL) + + # now find the end + end <- begin + grep( + "^\\s*[a-zA-Z]", + lines[begin:length(lines)], + invert = TRUE, + perl = TRUE + )[1] - 2 + if (is.na(end)) end <- length(lines) + + pkgs <- parse_pkgs_section(lines[begin:end]) + + list(begin = begin, end = end, pkgs = pkgs) +} + +parse_pkgs_section <- function(lines) { + lines[1] <- sub(" date ", " date (UTC) ", fixed = TRUE, lines[1]) + hdr <- sub("date (UTC)", "date-(UTC)", fixed = TRUE, lines[1]) + wth <- find_words(hdr) + wth[length(wth)] <- max(nchar(lines)) + df <- read.fwf(textConnection(lines), widths = wth) + df[] <- lapply(df, trimws) + names(df) <- as.character(df[1,]) + df <- df[-1, , drop = FALSE] + df +} + +find_words <- function(x) { + tmp <- paste0(gsub("[^\\s]", "X", x, perl = TRUE), " ") + ltr <- strsplit(tmp, "")[[1]] + rl <- rle(ltr) + pos <- cumsum(rl$length) + diff(pos[which(rl$values == " ")]) +} + +get_symbol_name <- function(x) { + if (is.symbol(x)) { + as.character(x) + } else if (is_string(x)) { + x + } +} diff --git a/R/rematch2.R b/R/rematch2.R new file mode 100644 index 0000000..a38c6a9 --- /dev/null +++ b/R/rematch2.R @@ -0,0 +1,34 @@ + +re_match <- function(text, pattern, perl = TRUE, ...) { + + stopifnot(is.character(pattern), length(pattern) == 1, !is.na(pattern)) + text <- as.character(text) + + match <- regexpr(pattern, text, perl = perl, ...) + + start <- as.vector(match) + length <- attr(match, "match.length") + end <- start + length - 1L + + matchstr <- substring(text, start, end) + matchstr[ start == -1 ] <- NA_character_ + + empty <- data.frame(stringsAsFactors = FALSE, .text = text)[, numeric()] + res <- list(match = !is.na(matchstr), groups = empty) + + if (!is.null(attr(match, "capture.start"))) { + + gstart <- attr(match, "capture.start") + glength <- attr(match, "capture.length") + gend <- gstart + glength - 1L + + groupstr <- substring(text, gstart, gend) + groupstr[ gstart == -1 ] <- NA_character_ + dim(groupstr) <- dim(gstart) + + res$groups <- cbind(groupstr, res$groups, stringsAsFactors = FALSE) + names(res$groups) <- attr(match, "capture.names") + } + + res +} diff --git a/R/session-info.R b/R/session-info.R index a84ded0..e45f8f8 100644 --- a/R/session-info.R +++ b/R/session-info.R @@ -126,7 +126,8 @@ format.session_info <- function(x, ...) { }, if ("python" %in% names(x)) { c(rule("Python configuration"), format(x$python), "") - } + }, + rule() ) } diff --git a/man/session_diff.Rd b/man/session_diff.Rd new file mode 100644 index 0000000..0265aac --- /dev/null +++ b/man/session_diff.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/compare.R +\name{session_diff} +\alias{session_diff} +\title{Compare session information from two sources} +\usage{ +session_diff(old = "local", new = "clipboard", ...) +} +\arguments{ +\item{old, new}{A \code{session_info} object (the return value of +\code{\link[=session_info]{session_info()}}), or a pointer to \code{\link[=session_info]{session_info()}} output. See details +below.} + +\item{...}{Passed to any new \code{\link[=session_info]{session_info()}} calls.} +} +\description{ +Compare session information from two sources +} +\details{ +Various way to specify \code{old} and \code{new}: +\itemize{ +\item A \code{session_info} object. +\item \code{"local"} runs \code{\link[=session_info]{session_info()}} in the current +session, and uses its output. +\item \code{"clipboard"} takes the session info from the system clipboard. +If the clipboard contains a URL, it is followed to download the +session info. +\item A URL starting with \verb{http://} or \verb{https://}. \code{session_diff} searches +the HTML (or text) page for the session info header to find the session +info. +} +} +\examples{ +\dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +session_diff() +\dontshow{\}) # examplesIf} +}