diff --git a/R/group_utils.R b/R/group_utils.R index 4859841..8209dff 100644 --- a/R/group_utils.R +++ b/R/group_utils.R @@ -63,11 +63,18 @@ calculate_groups <- function(data, groups, drop = group_by_drop_default(data)) { unique_groups <- unique(data[, groups, drop = FALSE]) is_factor <- do.call(c, lapply(unique_groups, function(x) is.factor(x))) n_comb <- nrow(unique_groups) + + temp_id <- paste(sample(letters), collapse = "") + + # Concatenate group variables + pasted_groups <- do.call(paste, c(unique_groups, sep = ".")) + data[[temp_id]] <- do.call(paste, c(data[, groups, drop = FALSE], sep = ".")) + rows <- rep(list(NA), n_comb) - data_groups <- interaction(data[, groups, drop = TRUE]) for (i in seq_len(n_comb)) { - rows[[i]] <- which(data_groups %in% interaction(unique_groups[i, groups])) + rows[[i]] <- which(data[[temp_id]] %in% pasted_groups[i]) } + data[[temp_id]] <- NULL if (!isTRUE(drop) && any(is_factor)) { na_lvls <- do.call( diff --git a/inst/tinytest/test_group_by.R b/inst/tinytest/test_group_by.R index eeb56b2..888ac18 100644 --- a/inst/tinytest/test_group_by.R +++ b/inst/tinytest/test_group_by.R @@ -154,8 +154,54 @@ df <- data.frame(x = 1:2, y = 1:2) %>% structure(class = c("grouped_df", "data.frame")) expect_true(group_by_drop_default(df), info = "group_by_drop_default() is forgiving about corrupt grouped df") + +# with NA in groups --------------------------------------------------- + +# One group res <- data.frame(x = c("apple", NA, "banana"), y = 1:3, stringsAsFactors = FALSE) %>% group_by(x) %>% group_data() -expect_identical(res$x, c("apple", "banana", NA_character_), info = "group_by() puts NA groups last in STRSXP") -expect_identical(res$.rows, list(1L, 3L, 2L), info = "group_by() puts NA groups last in STRSXP") + +expect_identical( + res$x, + c("apple", "banana", NA_character_), + info = "group_by() puts NA groups last in STRSXP" +) +expect_identical( + res$.rows, + list(1L, 3L, 2L), + info = "group_by() puts NA groups last in STRSXP" +) + +# Several groups +d <- data.frame( + orig = rep(c("France", "UK"), each = 4), + dest = rep(c("Spain", "Germany"), times = 4), + year = rep(rep(c(2010, 2011), each = 2), 2), + value = 1:8 +) +d[2, 1] <- NA +d[7, 2] <- NA + +res <- d %>% + group_by(orig, dest) %>% + group_data() + +expect_identical(nrow(res), 6L) +expect_identical( + res[5:6, 1:3], + structure( + list2DF( + list( + orig = c("UK", NA), + dest = c(NA, "Germany"), + .rows = list(7L, 2L) + ) + ), + row.names = 5:6 + ) +) +expect_identical( + vapply(res$.rows, length, FUN.VALUE = numeric(1L)), + c(1, 2, 2, 1, 1, 1) +)