From 005e75aa56a0cdf87a43124345957f207202d272 Mon Sep 17 00:00:00 2001
From: Johan Larsson <johan.larsson@outlook.com>
Date: Sun, 25 Feb 2018 00:02:17 +0100
Subject: [PATCH] allow empty sets. closes #23

---
 NEWS.md                      |   1 +
 R/euler.R                    | 320 ++++++++++++++++++-----------------
 R/geometry.R                 |  16 +-
 R/layout.R                   |  20 ++-
 R/plot.euler.R               |  17 +-
 tests/testthat/test_inputs.R |   2 -
 6 files changed, 205 insertions(+), 171 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 3abba671..af23e821 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -8,6 +8,7 @@ are now also found in this function and `plot.eulergram()` and
 of `plot.euler()`. This change means that functions such as
 `gridExtra::grid.arrange()` now work as one would intuit on the objects
 produced by `plot.euler()`. 
+* Fitting and plotting euler diagrams with empty sets is now allowed (#23).
 
 ## Bug fixes
 
diff --git a/R/euler.R b/R/euler.R
index d172c599..ca49457e 100644
--- a/R/euler.R
+++ b/R/euler.R
@@ -178,182 +178,196 @@ euler.default <- function(
         stop("Check your set configuration. Some disjoint areas are negative.")
     }
 
-    id_sums <- rowSums(id)
-    ones <- id_sums == 1L
-    twos <- id_sums == 2L
-    two <- choose_two(1L:n)
-    r <- sqrt(areas[ones]/pi)
-
-    # Establish identities of disjoint and subset sets
-    subset <- disjoint <- matrix(FALSE, ncol = n, nrow = n)
-    distances <- area_mat <- matrix(0, ncol = n, nrow = n)
-
-    lwrtri <- lower.tri(subset)
-
-    tmp <- matrix(areas[ones][two], ncol = 2L)
-
-    subset[lwrtri] <- areas[twos] == tmp[, 1L] | areas[twos] == tmp[, 2L]
-    disjoint[lwrtri] <- areas[twos] == 0
-    distances[lwrtri] <- mapply(separate_two_discs,
-                                r1 = r[two[, 1L]],
-                                r2 = r[two[, 2L]],
-                                overlap = areas[twos],
-                                USE.NAMES = FALSE)
-
-    # Starting layout
-    loss <- Inf
-    initial_layouts <- vector("list", n_restarts)
-    bnd <- sqrt(sum(r^2*pi))
-
-    i <- 1L
-    while (loss > small && i <= n_restarts) {
-      initial_layouts[[i]] <- stats::nlm(
-        f = optim_init,
-        p = stats::runif(n*2, 0, bnd),
-        d = distances,
-        disjoint = disjoint,
-        subset = subset,
-        iterlim = 1000L
-      )
-      loss <- initial_layouts[[i]]$minimum
-      i <- i + 1L
-    }
+    if (all(areas == 0)) {
+      # all sets are zero
+      fpar <- matrix(data = rep.int(0, 5L*n),
+                     ncol = 5L,
+                     dimnames = list(setnames, c("h", "k", "a", "b", "phi")))
+      regionError <- diagError <- stress <- 0
+      orig <- fit <- areas
+      names(orig) <- names(fit) <-
+        apply(id, 1L, function(x) paste0(setnames[x], collapse = "&"))
+    } else {
+      id_sums <- rowSums(id)
+      ones <- id_sums == 1L
+      twos <- id_sums == 2L
+      two <- choose_two(1L:n)
+      r <- sqrt(areas[ones]/pi)
+
+      # Establish identities of disjoint and subset sets
+      subset <- disjoint <- matrix(FALSE, ncol = n, nrow = n)
+      distances <- area_mat <- matrix(0, ncol = n, nrow = n)
+
+      lwrtri <- lower.tri(subset)
+
+      tmp <- matrix(areas[ones][two], ncol = 2L)
+
+      subset[lwrtri] <- areas[twos] == tmp[, 1L] | areas[twos] == tmp[, 2L]
+      disjoint[lwrtri] <- areas[twos] == 0
+      distances[lwrtri] <- mapply(separate_two_discs,
+                                  r1 = r[two[, 1L]],
+                                  r2 = r[two[, 2L]],
+                                  overlap = areas[twos],
+                                  USE.NAMES = FALSE)
+
+      # Starting layout
+      loss <- Inf
+      initial_layouts <- vector("list", n_restarts)
+      bnd <- sqrt(sum(r^2*pi))
+
+      i <- 1L
+      while (loss > small && i <= n_restarts) {
+        initial_layouts[[i]] <- stats::nlm(
+          f = optim_init,
+          p = stats::runif(n*2, 0, bnd),
+          d = distances,
+          disjoint = disjoint,
+          subset = subset,
+          iterlim = 1000L
+        )
+        loss <- initial_layouts[[i]]$minimum
+        i <- i + 1L
+      }
 
-    # Find the best initial layout
-    best_init <- which.min(lapply(initial_layouts[1L:(i - 1L)],
-                                  "[[",
-                                  "minimum"))
-    initial_layout <- initial_layouts[[best_init]]
+      # Find the best initial layout
+      best_init <- which.min(lapply(initial_layouts[1L:(i - 1L)],
+                                    "[[",
+                                    "minimum"))
+      initial_layout <- initial_layouts[[best_init]]
 
-    # Final layout
-    circle <- match.arg(shape) == "circle"
+      # Final layout
+      circle <- match.arg(shape) == "circle"
 
-    if (circle) {
-      pars <- as.vector(matrix(c(initial_layout$estimate, r), 3L, byrow = TRUE))
-      lwr <- rep.int(0, 3L)
-      upr <- rep.int(bnd, 3L)
-    } else {
-      pars <- as.vector(rbind(matrix(initial_layout$estimate, 2L, byrow = TRUE),
-                              r, r, 0, deparse.level = 0L))
-      lwr <- c(rep.int(0, 4L), -2*pi)
-      upr <- c(rep.int(bnd, 4L), 2*pi)
-    }
+      if (circle) {
+        pars <- as.vector(matrix(c(initial_layout$estimate, r), 3L,
+                                 byrow = TRUE))
+        lwr <- rep.int(0, 3L)
+        upr <- rep.int(bnd, 3L)
+      } else {
+        pars <- as.vector(rbind(matrix(initial_layout$estimate, 2L,
+                                       byrow = TRUE),
+                                r, r, 0, deparse.level = 0L))
+        lwr <- c(rep.int(0, 4L), -2*pi)
+        upr <- c(rep.int(bnd, 4L), 2*pi)
+      }
 
-    orig <- areas_disjoint
-
-    # Try to find a solution using nlm() first (faster)
-    # TODO: Allow user options here?
-    nlm_solution <- stats::nlm(
-      f = optim_final_loss,
-      p = pars,
-      areas = areas_disjoint,
-      circle = circle,
-      iterlim = 1e4L
-    )$estimate
-
-    tpar <- as.data.frame(matrix(
-      data = nlm_solution,
-      ncol = if (circle) 3L else 5L,
-      dimnames = list(
-        setnames,
-        if (circle) c("h", "k", "r") else c("h", "k", "a", "b", "phi")
-      ),
-      byrow = TRUE
-    ))
-    if (circle)
-      tpar <- cbind(tpar, tpar[, 3L], 0)
-
-    # Normalize layout
-    nlm_fit <- as.vector(intersect_ellipses(nlm_solution, circle))
-
-    nlm_pars <- compress_layout(normalize_pars(tpar), id, nlm_fit)
-
-    nlm_diagError <- diagError(nlm_fit, orig)
-
-    # If inadequate solution, try with a second optimizer (slower, better)
-    if (!circle && control$extraopt &&
-        nlm_diagError > control$extraopt_threshold) {
-      # Set bounds for the parameters
-      newpars <- matrix(
-        data = as.vector(t(nlm_pars)),
-        ncol = 5L,
-        dimnames = list(setnames, c("h", "k", "a", "b", "phi")),
-        byrow = TRUE
-      )
-
-      constraints <- get_constraints(compress_layout(newpars, id, nlm_fit))
-
-      # TODO: Set up initial population in some clever fashion.
-
-      deoptim <- RcppDE::DEoptim(
-        fn = optim_final_loss,
-        lower = constraints$lwr,
-        upper = constraints$upr,
-        control = do.call(
-          RcppDE::DEoptim.control,
-          utils::modifyList(
-            list(VTR = -Inf,
-                 NP = length(newpars)*10,
-                 CR = 0.6,
-                 F = 0.2,
-                 itermax = 1000L,
-                 trace = FALSE),
-            control$extraopt_control
-          )
-        ),
-        areas = areas_disjoint,
-        circle = circle
-      )
+      orig <- areas_disjoint
 
-      # Fine tune the fit from DEoptim
-      last_ditch_effort <- stats::nlm(
+      # Try to find a solution using nlm() first (faster)
+      # TODO: Allow user options here?
+      nlm_solution <- stats::nlm(
         f = optim_final_loss,
-        p = deoptim$optim$bestmem,
+        p = pars,
         areas = areas_disjoint,
         circle = circle,
         iterlim = 1e4L
       )$estimate
 
-      last_ditch_fit <- as.vector(intersect_ellipses(last_ditch_effort, circle))
-      last_ditch_diagError <- diagError(last_ditch_fit, orig)
-
-      # Check for the best solution
-      if (last_ditch_diagError < nlm_diagError) {
-        final_par <- last_ditch_effort
-        fit <- last_ditch_fit
+      tpar <- as.data.frame(matrix(
+        data = nlm_solution,
+        ncol = if (circle) 3L else 5L,
+        dimnames = list(
+          setnames,
+          if (circle) c("h", "k", "r") else c("h", "k", "a", "b", "phi")
+        ),
+        byrow = TRUE
+      ))
+      if (circle)
+        tpar <- cbind(tpar, tpar[, 3L], 0)
+
+      # Normalize layout
+      nlm_fit <- as.vector(intersect_ellipses(nlm_solution, circle))
+
+      nlm_pars <- compress_layout(normalize_pars(tpar), id, nlm_fit)
+
+      nlm_diagError <- diagError(nlm_fit, orig)
+
+      # If inadequate solution, try with a second optimizer (slower, better)
+      if (!circle && control$extraopt &&
+          nlm_diagError > control$extraopt_threshold) {
+        # Set bounds for the parameters
+        newpars <- matrix(
+          data = as.vector(t(nlm_pars)),
+          ncol = 5L,
+          dimnames = list(setnames, c("h", "k", "a", "b", "phi")),
+          byrow = TRUE
+        )
+
+        constraints <- get_constraints(compress_layout(newpars, id, nlm_fit))
+
+        # TODO: Set up initial population in some clever fashion.
+
+        deoptim <- RcppDE::DEoptim(
+          fn = optim_final_loss,
+          lower = constraints$lwr,
+          upper = constraints$upr,
+          control = do.call(
+            RcppDE::DEoptim.control,
+            utils::modifyList(
+              list(VTR = -Inf,
+                   NP = length(newpars)*10,
+                   CR = 0.6,
+                   F = 0.2,
+                   itermax = 1000L,
+                   trace = FALSE),
+              control$extraopt_control
+            )
+          ),
+          areas = areas_disjoint,
+          circle = circle
+        )
+
+        # Fine tune the fit from DEoptim
+        last_ditch_effort <- stats::nlm(
+          f = optim_final_loss,
+          p = deoptim$optim$bestmem,
+          areas = areas_disjoint,
+          circle = circle,
+          iterlim = 1e4L
+        )$estimate
+
+        last_ditch_fit <- as.vector(intersect_ellipses(last_ditch_effort,
+                                                       circle))
+        last_ditch_diagError <- diagError(last_ditch_fit, orig)
+
+        # Check for the best solution
+        if (last_ditch_diagError < nlm_diagError) {
+          final_par <- last_ditch_effort
+          fit <- last_ditch_fit
+        } else {
+          final_par <- nlm_solution
+          fit <- nlm_fit
+        }
       } else {
         final_par <- nlm_solution
         fit <- nlm_fit
       }
-    } else {
-      final_par <- nlm_solution
-      fit <- nlm_fit
-    }
 
-    names(orig) <- names(fit) <-
-      apply(id, 1L, function(x) paste0(setnames[x], collapse = "&"))
+      names(orig) <- names(fit) <-
+        apply(id, 1L, function(x) paste0(setnames[x], collapse = "&"))
 
-    regionError <- regionError(fit, orig)
-    diagError <- diagError(regionError = regionError)
-    stress <- stress(orig, fit)
+      regionError <- regionError(fit, orig)
+      diagError <- diagError(regionError = regionError)
+      stress <- stress(orig, fit)
 
-    fpar <- matrix(data = final_par,
-                   ncol = if (circle) 3L else 5L,
-                   byrow = TRUE)
+      fpar <- matrix(data = final_par,
+                     ncol = if (circle) 3L else 5L,
+                     byrow = TRUE)
 
-    if (circle)
-      fpar <- cbind(fpar, fpar[, 3L], 0)
+      if (circle)
+        fpar <- cbind(fpar, fpar[, 3L], 0)
 
-    dimnames(fpar) <- list(setnames, c("h", "k", "a", "b", "phi"))
+      dimnames(fpar) <- list(setnames, c("h", "k", "a", "b", "phi"))
 
-    # Normalize semiaxes and rotation
-    fpar <- normalize_pars(fpar)
+      # Normalize semiaxes and rotation
+      fpar <- normalize_pars(fpar)
 
-    # Find disjoint clusters and compress the layout
-    fpar <- compress_layout(fpar, id, fit)
+      # Find disjoint clusters and compress the layout
+      fpar <- compress_layout(fpar, id, fit)
 
-    # Center the solution on the coordinate plane
-    fpar <- center_layout(fpar)
+      # Center the solution on the coordinate plane
+      fpar <- center_layout(fpar)
+    }
   } else {
     # One set
     fpar <- matrix(data = c(0, 0, sqrt(areas/pi), sqrt(areas/pi), 0),
diff --git a/R/geometry.R b/R/geometry.R
index 826251a1..8e9b7a15 100644
--- a/R/geometry.R
+++ b/R/geometry.R
@@ -24,12 +24,16 @@
 #'   area.
 #' @keywords internal
 separate_two_discs <- function(r1, r2, overlap) {
-  stats::optimize(discdisc,
-                  interval = c(abs(r1 - r2), sum(r1, r2)),
-                  r1 = r1,
-                  r2 = r2,
-                  overlap = overlap,
-                  tol = sqrt(.Machine$double.eps))$minimum
+  if (r1 > 0 && r2 > 0) {
+    stats::optimize(discdisc,
+                    interval = c(abs(r1 - r2), sum(r1, r2)),
+                    r1 = r1,
+                    r2 = r2,
+                    overlap = overlap,
+                    tol = sqrt(.Machine$double.eps))$minimum
+  } else {
+    0
+  }
 }
 
 #' Get the bounding box of an ellipse
diff --git a/R/layout.R b/R/layout.R
index 91e620ef..0de7deec 100644
--- a/R/layout.R
+++ b/R/layout.R
@@ -227,19 +227,23 @@ compress_layout <- function(fpar, id, fit) {
 #' @return A centered version of `pars`.
 #' @keywords internal
 center_layout <- function(pars) {
-  h <- pars[, 1L]
-  k <- pars[, 2L]
-  a <- pars[, 3L]
-  b <- pars[, 4L]
-  phi <- pars[, 5L]
+  void <- pars[, 3L] == 0 | pars[, 4L] == 0
+  h <- pars[!void, 1L]
+  k <- pars[!void, 2L]
+  a <- pars[!void, 3L]
+  b <- pars[!void, 4L]
+  phi <- pars[!void, 5L]
 
   cphi <- cos(phi)
   sphi <- sin(phi)
   xlim <- range(c(h + a*cphi, h + b*cphi, h - a*cphi, h - b*cphi))
   ylim <- range(c(k + a*sphi, k + b*sphi, k - a*sphi, k - b*sphi))
 
-  pars[, 1L] <- h + abs(xlim[1L] - xlim[2L])/2 - xlim[2L]
-  pars[, 2L] <- k + abs(ylim[1L] - ylim[2L])/2 - ylim[2L]
+  pars[!void, 1L] <- h + abs(xlim[1L] - xlim[2L])/2 - xlim[2L]
+  pars[!void, 2L] <- k + abs(ylim[1L] - ylim[2L])/2 - ylim[2L]
+  if (any(!void)) {
+    pars[void, 1L] <- pars[!void, 1L]/sum(!void)
+    pars[void, 2L] <- pars[!void, 2L]/sum(!void)
+  }
   pars
 }
-
diff --git a/R/plot.euler.R b/R/plot.euler.R
index 96317009..4c7dda09 100644
--- a/R/plot.euler.R
+++ b/R/plot.euler.R
@@ -408,6 +408,12 @@ plot.euler <- function(x,
   ylim <- grDevices::extendrange(ylim, f = 0.01)
   xrng <- abs(xlim[1L] - xlim[2L])
   yrng <- abs(ylim[1L] - ylim[2L])
+
+  if (xrng == 0 || yrng == 0) {
+    xrng <- yrng <- 1
+    ylim <- xlim <- c(0, 1)
+  }
+
   ar <- xrng/yrng
   adjust <- layout[1L]/layout[2]
 
@@ -691,6 +697,7 @@ setup_geometry <- function(x,
   }
 
   if (do_labels || do_quantities) {
+    void_sets <- apply(id, 2, function(i) all(fitted[i] == 0))
     singles <- rowSums(id) == 1
     empty <- abs(fitted) < sqrt(.Machine$double.eps)
 
@@ -723,12 +730,16 @@ setup_geometry <- function(x,
         quantities <- list(labels = orig[quantities_centers[, 3L]])
       quantities$x <- quantities_centers[, 1L]
       quantities$y <- quantities_centers[, 2L]
+      quantities$x[void_sets] <- NA
+      quantities$y[void_sets] <- NA
     }
 
     if (do_labels) {
       labels$x <- labels_centers[, 1L]
       labels$y <- labels_centers[, 2L]
       labels$labels <- center_labels
+      labels$x[void_sets] <- NA
+      labels$y[void_sets] <- NA
     }
   }
 
@@ -800,9 +811,11 @@ setup_grobs <- function(x,
     } else {
       fills_grob <- vector("list", n_id)
       fill_id <- seq_len(n_id)
+      empty_cols <- colSums(id & fitted > 0) == 0
+      # skip <- rowSums(id[, empty_cols, drop = FALSE]) > 0
       empty <- fitted < sqrt(.Machine$double.eps)
-      for (i in 1:n_e) {
-        if (empty[i]) {
+      for (i in seq_len(n_e)) {
+        if (empty[i] && !empty_cols[i]) {
           idx <- id[i, ]
           n_idx <- sum(idx)
           sub_id <- rowSums(id[, idx, drop = FALSE]) == n_idx
diff --git a/tests/testthat/test_inputs.R b/tests/testthat/test_inputs.R
index 85cc962e..e5b14022 100644
--- a/tests/testthat/test_inputs.R
+++ b/tests/testthat/test_inputs.R
@@ -4,7 +4,6 @@ test_that("erroneous named numeric vectors returns errors", {
   expect_error(euler(c(1, 2, 3)))
   expect_error(euler(c()))
   expect_error(euler(c(A = FALSE, B = TRUE, C = FALSE)))
-  expect_error(euler(c(A = 0, B = 2)))
   expect_error(euler(c(A = 10, A = 5)))
   expect_error(euler(c(A = 10, 4)))
 })
@@ -25,7 +24,6 @@ test_that("erroneous input using by argument return errors", {
   expect_error(euler(dat, by = list(x, y, z)))
   expect_error(euler(dat, by = dat[1:50, 3]))
   expect_error(euler(dat, by = list(dat[, 2])))
-  expect_error(euler(dat, by = 1:100))
 })
 
 test_that("arguments to print.euler are specified correctly", {