From 005e75aa56a0cdf87a43124345957f207202d272 Mon Sep 17 00:00:00 2001 From: Johan Larsson Date: Sun, 25 Feb 2018 00:02:17 +0100 Subject: [PATCH] allow empty sets. closes #23 --- NEWS.md | 1 + R/euler.R | 320 ++++++++++++++++++----------------- R/geometry.R | 16 +- R/layout.R | 20 ++- R/plot.euler.R | 17 +- tests/testthat/test_inputs.R | 2 - 6 files changed, 205 insertions(+), 171 deletions(-) diff --git a/NEWS.md b/NEWS.md index 3abba671..af23e821 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,6 +8,7 @@ are now also found in this function and `plot.eulergram()` and of `plot.euler()`. This change means that functions such as `gridExtra::grid.arrange()` now work as one would intuit on the objects produced by `plot.euler()`. +* Fitting and plotting euler diagrams with empty sets is now allowed (#23). ## Bug fixes diff --git a/R/euler.R b/R/euler.R index d172c599..ca49457e 100644 --- a/R/euler.R +++ b/R/euler.R @@ -178,182 +178,196 @@ euler.default <- function( stop("Check your set configuration. Some disjoint areas are negative.") } - id_sums <- rowSums(id) - ones <- id_sums == 1L - twos <- id_sums == 2L - two <- choose_two(1L:n) - r <- sqrt(areas[ones]/pi) - - # Establish identities of disjoint and subset sets - subset <- disjoint <- matrix(FALSE, ncol = n, nrow = n) - distances <- area_mat <- matrix(0, ncol = n, nrow = n) - - lwrtri <- lower.tri(subset) - - tmp <- matrix(areas[ones][two], ncol = 2L) - - subset[lwrtri] <- areas[twos] == tmp[, 1L] | areas[twos] == tmp[, 2L] - disjoint[lwrtri] <- areas[twos] == 0 - distances[lwrtri] <- mapply(separate_two_discs, - r1 = r[two[, 1L]], - r2 = r[two[, 2L]], - overlap = areas[twos], - USE.NAMES = FALSE) - - # Starting layout - loss <- Inf - initial_layouts <- vector("list", n_restarts) - bnd <- sqrt(sum(r^2*pi)) - - i <- 1L - while (loss > small && i <= n_restarts) { - initial_layouts[[i]] <- stats::nlm( - f = optim_init, - p = stats::runif(n*2, 0, bnd), - d = distances, - disjoint = disjoint, - subset = subset, - iterlim = 1000L - ) - loss <- initial_layouts[[i]]$minimum - i <- i + 1L - } + if (all(areas == 0)) { + # all sets are zero + fpar <- matrix(data = rep.int(0, 5L*n), + ncol = 5L, + dimnames = list(setnames, c("h", "k", "a", "b", "phi"))) + regionError <- diagError <- stress <- 0 + orig <- fit <- areas + names(orig) <- names(fit) <- + apply(id, 1L, function(x) paste0(setnames[x], collapse = "&")) + } else { + id_sums <- rowSums(id) + ones <- id_sums == 1L + twos <- id_sums == 2L + two <- choose_two(1L:n) + r <- sqrt(areas[ones]/pi) + + # Establish identities of disjoint and subset sets + subset <- disjoint <- matrix(FALSE, ncol = n, nrow = n) + distances <- area_mat <- matrix(0, ncol = n, nrow = n) + + lwrtri <- lower.tri(subset) + + tmp <- matrix(areas[ones][two], ncol = 2L) + + subset[lwrtri] <- areas[twos] == tmp[, 1L] | areas[twos] == tmp[, 2L] + disjoint[lwrtri] <- areas[twos] == 0 + distances[lwrtri] <- mapply(separate_two_discs, + r1 = r[two[, 1L]], + r2 = r[two[, 2L]], + overlap = areas[twos], + USE.NAMES = FALSE) + + # Starting layout + loss <- Inf + initial_layouts <- vector("list", n_restarts) + bnd <- sqrt(sum(r^2*pi)) + + i <- 1L + while (loss > small && i <= n_restarts) { + initial_layouts[[i]] <- stats::nlm( + f = optim_init, + p = stats::runif(n*2, 0, bnd), + d = distances, + disjoint = disjoint, + subset = subset, + iterlim = 1000L + ) + loss <- initial_layouts[[i]]$minimum + i <- i + 1L + } - # Find the best initial layout - best_init <- which.min(lapply(initial_layouts[1L:(i - 1L)], - "[[", - "minimum")) - initial_layout <- initial_layouts[[best_init]] + # Find the best initial layout + best_init <- which.min(lapply(initial_layouts[1L:(i - 1L)], + "[[", + "minimum")) + initial_layout <- initial_layouts[[best_init]] - # Final layout - circle <- match.arg(shape) == "circle" + # Final layout + circle <- match.arg(shape) == "circle" - if (circle) { - pars <- as.vector(matrix(c(initial_layout$estimate, r), 3L, byrow = TRUE)) - lwr <- rep.int(0, 3L) - upr <- rep.int(bnd, 3L) - } else { - pars <- as.vector(rbind(matrix(initial_layout$estimate, 2L, byrow = TRUE), - r, r, 0, deparse.level = 0L)) - lwr <- c(rep.int(0, 4L), -2*pi) - upr <- c(rep.int(bnd, 4L), 2*pi) - } + if (circle) { + pars <- as.vector(matrix(c(initial_layout$estimate, r), 3L, + byrow = TRUE)) + lwr <- rep.int(0, 3L) + upr <- rep.int(bnd, 3L) + } else { + pars <- as.vector(rbind(matrix(initial_layout$estimate, 2L, + byrow = TRUE), + r, r, 0, deparse.level = 0L)) + lwr <- c(rep.int(0, 4L), -2*pi) + upr <- c(rep.int(bnd, 4L), 2*pi) + } - orig <- areas_disjoint - - # Try to find a solution using nlm() first (faster) - # TODO: Allow user options here? - nlm_solution <- stats::nlm( - f = optim_final_loss, - p = pars, - areas = areas_disjoint, - circle = circle, - iterlim = 1e4L - )$estimate - - tpar <- as.data.frame(matrix( - data = nlm_solution, - ncol = if (circle) 3L else 5L, - dimnames = list( - setnames, - if (circle) c("h", "k", "r") else c("h", "k", "a", "b", "phi") - ), - byrow = TRUE - )) - if (circle) - tpar <- cbind(tpar, tpar[, 3L], 0) - - # Normalize layout - nlm_fit <- as.vector(intersect_ellipses(nlm_solution, circle)) - - nlm_pars <- compress_layout(normalize_pars(tpar), id, nlm_fit) - - nlm_diagError <- diagError(nlm_fit, orig) - - # If inadequate solution, try with a second optimizer (slower, better) - if (!circle && control$extraopt && - nlm_diagError > control$extraopt_threshold) { - # Set bounds for the parameters - newpars <- matrix( - data = as.vector(t(nlm_pars)), - ncol = 5L, - dimnames = list(setnames, c("h", "k", "a", "b", "phi")), - byrow = TRUE - ) - - constraints <- get_constraints(compress_layout(newpars, id, nlm_fit)) - - # TODO: Set up initial population in some clever fashion. - - deoptim <- RcppDE::DEoptim( - fn = optim_final_loss, - lower = constraints$lwr, - upper = constraints$upr, - control = do.call( - RcppDE::DEoptim.control, - utils::modifyList( - list(VTR = -Inf, - NP = length(newpars)*10, - CR = 0.6, - F = 0.2, - itermax = 1000L, - trace = FALSE), - control$extraopt_control - ) - ), - areas = areas_disjoint, - circle = circle - ) + orig <- areas_disjoint - # Fine tune the fit from DEoptim - last_ditch_effort <- stats::nlm( + # Try to find a solution using nlm() first (faster) + # TODO: Allow user options here? + nlm_solution <- stats::nlm( f = optim_final_loss, - p = deoptim$optim$bestmem, + p = pars, areas = areas_disjoint, circle = circle, iterlim = 1e4L )$estimate - last_ditch_fit <- as.vector(intersect_ellipses(last_ditch_effort, circle)) - last_ditch_diagError <- diagError(last_ditch_fit, orig) - - # Check for the best solution - if (last_ditch_diagError < nlm_diagError) { - final_par <- last_ditch_effort - fit <- last_ditch_fit + tpar <- as.data.frame(matrix( + data = nlm_solution, + ncol = if (circle) 3L else 5L, + dimnames = list( + setnames, + if (circle) c("h", "k", "r") else c("h", "k", "a", "b", "phi") + ), + byrow = TRUE + )) + if (circle) + tpar <- cbind(tpar, tpar[, 3L], 0) + + # Normalize layout + nlm_fit <- as.vector(intersect_ellipses(nlm_solution, circle)) + + nlm_pars <- compress_layout(normalize_pars(tpar), id, nlm_fit) + + nlm_diagError <- diagError(nlm_fit, orig) + + # If inadequate solution, try with a second optimizer (slower, better) + if (!circle && control$extraopt && + nlm_diagError > control$extraopt_threshold) { + # Set bounds for the parameters + newpars <- matrix( + data = as.vector(t(nlm_pars)), + ncol = 5L, + dimnames = list(setnames, c("h", "k", "a", "b", "phi")), + byrow = TRUE + ) + + constraints <- get_constraints(compress_layout(newpars, id, nlm_fit)) + + # TODO: Set up initial population in some clever fashion. + + deoptim <- RcppDE::DEoptim( + fn = optim_final_loss, + lower = constraints$lwr, + upper = constraints$upr, + control = do.call( + RcppDE::DEoptim.control, + utils::modifyList( + list(VTR = -Inf, + NP = length(newpars)*10, + CR = 0.6, + F = 0.2, + itermax = 1000L, + trace = FALSE), + control$extraopt_control + ) + ), + areas = areas_disjoint, + circle = circle + ) + + # Fine tune the fit from DEoptim + last_ditch_effort <- stats::nlm( + f = optim_final_loss, + p = deoptim$optim$bestmem, + areas = areas_disjoint, + circle = circle, + iterlim = 1e4L + )$estimate + + last_ditch_fit <- as.vector(intersect_ellipses(last_ditch_effort, + circle)) + last_ditch_diagError <- diagError(last_ditch_fit, orig) + + # Check for the best solution + if (last_ditch_diagError < nlm_diagError) { + final_par <- last_ditch_effort + fit <- last_ditch_fit + } else { + final_par <- nlm_solution + fit <- nlm_fit + } } else { final_par <- nlm_solution fit <- nlm_fit } - } else { - final_par <- nlm_solution - fit <- nlm_fit - } - names(orig) <- names(fit) <- - apply(id, 1L, function(x) paste0(setnames[x], collapse = "&")) + names(orig) <- names(fit) <- + apply(id, 1L, function(x) paste0(setnames[x], collapse = "&")) - regionError <- regionError(fit, orig) - diagError <- diagError(regionError = regionError) - stress <- stress(orig, fit) + regionError <- regionError(fit, orig) + diagError <- diagError(regionError = regionError) + stress <- stress(orig, fit) - fpar <- matrix(data = final_par, - ncol = if (circle) 3L else 5L, - byrow = TRUE) + fpar <- matrix(data = final_par, + ncol = if (circle) 3L else 5L, + byrow = TRUE) - if (circle) - fpar <- cbind(fpar, fpar[, 3L], 0) + if (circle) + fpar <- cbind(fpar, fpar[, 3L], 0) - dimnames(fpar) <- list(setnames, c("h", "k", "a", "b", "phi")) + dimnames(fpar) <- list(setnames, c("h", "k", "a", "b", "phi")) - # Normalize semiaxes and rotation - fpar <- normalize_pars(fpar) + # Normalize semiaxes and rotation + fpar <- normalize_pars(fpar) - # Find disjoint clusters and compress the layout - fpar <- compress_layout(fpar, id, fit) + # Find disjoint clusters and compress the layout + fpar <- compress_layout(fpar, id, fit) - # Center the solution on the coordinate plane - fpar <- center_layout(fpar) + # Center the solution on the coordinate plane + fpar <- center_layout(fpar) + } } else { # One set fpar <- matrix(data = c(0, 0, sqrt(areas/pi), sqrt(areas/pi), 0), diff --git a/R/geometry.R b/R/geometry.R index 826251a1..8e9b7a15 100644 --- a/R/geometry.R +++ b/R/geometry.R @@ -24,12 +24,16 @@ #' area. #' @keywords internal separate_two_discs <- function(r1, r2, overlap) { - stats::optimize(discdisc, - interval = c(abs(r1 - r2), sum(r1, r2)), - r1 = r1, - r2 = r2, - overlap = overlap, - tol = sqrt(.Machine$double.eps))$minimum + if (r1 > 0 && r2 > 0) { + stats::optimize(discdisc, + interval = c(abs(r1 - r2), sum(r1, r2)), + r1 = r1, + r2 = r2, + overlap = overlap, + tol = sqrt(.Machine$double.eps))$minimum + } else { + 0 + } } #' Get the bounding box of an ellipse diff --git a/R/layout.R b/R/layout.R index 91e620ef..0de7deec 100644 --- a/R/layout.R +++ b/R/layout.R @@ -227,19 +227,23 @@ compress_layout <- function(fpar, id, fit) { #' @return A centered version of `pars`. #' @keywords internal center_layout <- function(pars) { - h <- pars[, 1L] - k <- pars[, 2L] - a <- pars[, 3L] - b <- pars[, 4L] - phi <- pars[, 5L] + void <- pars[, 3L] == 0 | pars[, 4L] == 0 + h <- pars[!void, 1L] + k <- pars[!void, 2L] + a <- pars[!void, 3L] + b <- pars[!void, 4L] + phi <- pars[!void, 5L] cphi <- cos(phi) sphi <- sin(phi) xlim <- range(c(h + a*cphi, h + b*cphi, h - a*cphi, h - b*cphi)) ylim <- range(c(k + a*sphi, k + b*sphi, k - a*sphi, k - b*sphi)) - pars[, 1L] <- h + abs(xlim[1L] - xlim[2L])/2 - xlim[2L] - pars[, 2L] <- k + abs(ylim[1L] - ylim[2L])/2 - ylim[2L] + pars[!void, 1L] <- h + abs(xlim[1L] - xlim[2L])/2 - xlim[2L] + pars[!void, 2L] <- k + abs(ylim[1L] - ylim[2L])/2 - ylim[2L] + if (any(!void)) { + pars[void, 1L] <- pars[!void, 1L]/sum(!void) + pars[void, 2L] <- pars[!void, 2L]/sum(!void) + } pars } - diff --git a/R/plot.euler.R b/R/plot.euler.R index 96317009..4c7dda09 100644 --- a/R/plot.euler.R +++ b/R/plot.euler.R @@ -408,6 +408,12 @@ plot.euler <- function(x, ylim <- grDevices::extendrange(ylim, f = 0.01) xrng <- abs(xlim[1L] - xlim[2L]) yrng <- abs(ylim[1L] - ylim[2L]) + + if (xrng == 0 || yrng == 0) { + xrng <- yrng <- 1 + ylim <- xlim <- c(0, 1) + } + ar <- xrng/yrng adjust <- layout[1L]/layout[2] @@ -691,6 +697,7 @@ setup_geometry <- function(x, } if (do_labels || do_quantities) { + void_sets <- apply(id, 2, function(i) all(fitted[i] == 0)) singles <- rowSums(id) == 1 empty <- abs(fitted) < sqrt(.Machine$double.eps) @@ -723,12 +730,16 @@ setup_geometry <- function(x, quantities <- list(labels = orig[quantities_centers[, 3L]]) quantities$x <- quantities_centers[, 1L] quantities$y <- quantities_centers[, 2L] + quantities$x[void_sets] <- NA + quantities$y[void_sets] <- NA } if (do_labels) { labels$x <- labels_centers[, 1L] labels$y <- labels_centers[, 2L] labels$labels <- center_labels + labels$x[void_sets] <- NA + labels$y[void_sets] <- NA } } @@ -800,9 +811,11 @@ setup_grobs <- function(x, } else { fills_grob <- vector("list", n_id) fill_id <- seq_len(n_id) + empty_cols <- colSums(id & fitted > 0) == 0 + # skip <- rowSums(id[, empty_cols, drop = FALSE]) > 0 empty <- fitted < sqrt(.Machine$double.eps) - for (i in 1:n_e) { - if (empty[i]) { + for (i in seq_len(n_e)) { + if (empty[i] && !empty_cols[i]) { idx <- id[i, ] n_idx <- sum(idx) sub_id <- rowSums(id[, idx, drop = FALSE]) == n_idx diff --git a/tests/testthat/test_inputs.R b/tests/testthat/test_inputs.R index 85cc962e..e5b14022 100644 --- a/tests/testthat/test_inputs.R +++ b/tests/testthat/test_inputs.R @@ -4,7 +4,6 @@ test_that("erroneous named numeric vectors returns errors", { expect_error(euler(c(1, 2, 3))) expect_error(euler(c())) expect_error(euler(c(A = FALSE, B = TRUE, C = FALSE))) - expect_error(euler(c(A = 0, B = 2))) expect_error(euler(c(A = 10, A = 5))) expect_error(euler(c(A = 10, 4))) }) @@ -25,7 +24,6 @@ test_that("erroneous input using by argument return errors", { expect_error(euler(dat, by = list(x, y, z))) expect_error(euler(dat, by = dat[1:50, 3])) expect_error(euler(dat, by = list(dat[, 2]))) - expect_error(euler(dat, by = 1:100)) }) test_that("arguments to print.euler are specified correctly", {