From c5fc0d27212ac9d07cda8dc170881ff3fe3f587a Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Mon, 28 May 2018 14:20:28 +0200 Subject: [PATCH 1/7] Add 1 to indices at collection time, so that the rest of the group_by algorithm can still use 0-based. --- src/group_indices.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/group_indices.cpp b/src/group_indices.cpp index 41b8d0e7da..843789ca74 100644 --- a/src/group_indices.cpp +++ b/src/group_indices.cpp @@ -67,12 +67,14 @@ class IntRange { int size; }; +inline int plus_one(int i) { return i + 1; } + class ListCollecter { public: ListCollecter(List& data_): data(data_), index(0) {} int collect(const std::vector& indices) { - data[index] = indices; + data[index] = IntegerVector( indices.begin(), indices.end(), plus_one ); return index++; } @@ -455,6 +457,7 @@ SEXP build_index_cpp(const DataFrame& data, const SymbolVector& vars) { vec_groups[i] = Rf_allocVector(TYPEOF(visited_data[i]), ncases); copy_most_attributes(vec_groups[i], visited_data[i]); } + vec_groups[nvars] = indices; groups_names[nvars] = ".rows"; s->make(vec_groups, indices_collecter); From 27b968074c82e524edab8d2e11c913f6fa162ad4 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Mon, 28 May 2018 14:28:43 +0200 Subject: [PATCH 2/7] From 1-based (what's in the data) to 0-based --- inst/include/tools/SlicingIndex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/include/tools/SlicingIndex.h b/inst/include/tools/SlicingIndex.h index 66effb2311..2d55bcd32d 100644 --- a/inst/include/tools/SlicingIndex.h +++ b/inst/include/tools/SlicingIndex.h @@ -29,7 +29,7 @@ class GroupedSlicingIndex : public SlicingIndex { } virtual int operator[](int i) const { - return data[i]; + return data[i] - 1; } virtual int group() const { From 2ba8f5474ec2e0f79a9c3ee98f1006d9ae9393d0 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Mon, 28 May 2018 14:29:39 +0200 Subject: [PATCH 3/7] do.grouped_df does not need to +1 indices anymore --- R/grouped-df.r | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/grouped-df.r b/R/grouped-df.r index fbb48ad0fd..13f943b691 100644 --- a/R/grouped-df.r +++ b/R/grouped-df.r @@ -206,9 +206,9 @@ do.grouped_df <- function(.data, ...) { # usual scoping rules. group_slice <- function(value) { if (missing(value)) { - group_data[index[[`_i`]] + 1L, , drop = FALSE] + group_data[index[[`_i`]], , drop = FALSE] } else { - group_data[index[[`_i`]] + 1L, ] <<- value + group_data[index[[`_i`]], ] <<- value } } env_bind_fns(.env = env, . = group_slice, .data = group_slice) From 26b04f0d37f024a892efed92289c6aec1db8f0ae Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Mon, 28 May 2018 16:49:52 +0200 Subject: [PATCH 4/7] group_data() also uses 1-based indices for natural and rowwise data frames --- R/group_data.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/group_data.R b/R/group_data.R index 0dc70bde8e..a02e305ed6 100644 --- a/R/group_data.R +++ b/R/group_data.R @@ -31,13 +31,13 @@ group_data <- function(.data) { #' @export group_data.data.frame <- function(.data) { - rows <- list(seq2(0, nrow(.data) - 1)) + rows <- list(seq_len(nrow(.data))) tibble(.rows=rows) } #' @export group_data.rowwise_df <- function(.data) { - rows <- as.list(seq2(0, nrow(.data) - 1)) + rows <- as.list(seq_len(nrow(.data))) tibble(.rows=rows) } From d06199dd63afa1f33891cdb4bbf6d91556b5e537 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Mon, 28 May 2018 16:50:26 +0200 Subject: [PATCH 5/7] 0-based -> 1-based indices --- src/filter.cpp | 8 ++++---- src/group_indices.cpp | 6 ++++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/filter.cpp b/src/filter.cpp index 34ba1870cc..228779fb87 100644 --- a/src/filter.cpp +++ b/src/filter.cpp @@ -155,7 +155,7 @@ class GroupFilterIndices { void add_group(int i, const Index& old_idx, int n) { old_indices[i] = old_idx; - new_indices[i] = Rcpp::seq(k, k + n - 1); + new_indices[i] = Rcpp::seq(k + 1, k + n); k += n ; } @@ -236,7 +236,7 @@ class FilterVisitor { if (idx.is_dense(i)) { // in that case we can just all the data for (int j = 0; j < group_size; j++) { - out.copy(new_idx[j], old_idx[j]); + out.copy(new_idx[j] - 1, old_idx[j]); } } else { SEXP test = idx.tests[i]; @@ -257,7 +257,7 @@ class FilterVisitor { void copy_all_lgl(LogicalVector test, Data& out, int group_size, const IntegerVector& new_idx, const Index& old_idx) { for (int j = 0, k = 0; j < group_size; j++, k++) { while (test[k] != TRUE) k++ ; - out.copy(new_idx[j], old_idx[k]); + out.copy(new_idx[j] - 1, old_idx[k]); } } @@ -265,7 +265,7 @@ class FilterVisitor { SlicePositivePredicate pred(old_idx.size()); for (int j = 0, k = 0; j < group_size; j++, k++) { while (!pred(test[k])) k++ ; - out.copy(new_idx[j], old_idx[test[k] - 1]); + out.copy(new_idx[j] - 1, old_idx[test[k] - 1]); } } diff --git a/src/group_indices.cpp b/src/group_indices.cpp index 843789ca74..f8f6157470 100644 --- a/src/group_indices.cpp +++ b/src/group_indices.cpp @@ -67,14 +67,16 @@ class IntRange { int size; }; -inline int plus_one(int i) { return i + 1; } +inline int plus_one(int i) { + return i + 1; +} class ListCollecter { public: ListCollecter(List& data_): data(data_), index(0) {} int collect(const std::vector& indices) { - data[index] = IntegerVector( indices.begin(), indices.end(), plus_one ); + data[index] = IntegerVector(indices.begin(), indices.end(), plus_one); return index++; } From a5929e477c3ba1bda5c939e4389e2b3e6ad3590b Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Mon, 28 May 2018 16:50:51 +0200 Subject: [PATCH 6/7] Update tests for 1-based indices --- tests/testthat/test-arrange.r | 2 +- tests/testthat/test-group_data.R | 12 ++++++------ tests/testthat/test-sets.R | 6 +++--- tests/testthat/test-slice.r | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/testthat/test-arrange.r b/tests/testthat/test-arrange.r index ac553d879e..b10b085c65 100644 --- a/tests/testthat/test-arrange.r +++ b/tests/testthat/test-arrange.r @@ -84,7 +84,7 @@ test_that("arrange keeps the grouping structure (#605)", { res <- dat %>% group_by(g) %>% arrange(x) expect_is(res, "grouped_df") expect_equal(res$x, 1:4) - expect_equal(group_rows(res), list(c(1, 3), c(0, 2))) + expect_equal(group_rows(res), list(c(2, 4), c(1, 3))) }) test_that("arrange handles complex vectors", { diff --git a/tests/testthat/test-group_data.R b/tests/testthat/test-group_data.R index 0ca575f1d1..e6660e1d17 100644 --- a/tests/testthat/test-group_data.R +++ b/tests/testthat/test-group_data.R @@ -2,9 +2,9 @@ context("group_data") test_that("group_rows works for 3 most important subclasses (#3489)", { df <- data.frame(x=c(1,1,2,2)) - expect_equal(group_rows(df), list(0:3)) - expect_equal(group_rows(group_by(df,x)), list(0:1, 2:3)) - expect_equal(group_rows(rowwise(df)), as.list(0:3)) + expect_equal(group_rows(df), list(1:4)) + expect_equal(group_rows(group_by(df,x)), list(1:2, 3:4)) + expect_equal(group_rows(rowwise(df)), as.list(1:4)) }) test_that("group_data returns a tidy tibble (#3489)", { @@ -12,17 +12,17 @@ test_that("group_data returns a tidy tibble (#3489)", { expect_identical( group_data(df), - tibble(.rows=list(0:3)) + tibble(.rows=list(1:4)) ) expect_identical( group_by(df,x) %>% group_data(), - tibble(x = c(1,2), .rows = list(0:1, 2:3)) + tibble(x = c(1,2), .rows = list(1:2, 3:4)) ) expect_identical( rowwise(df) %>% group_data(), - tibble(.rows = as.list(0:3)) + tibble(.rows = as.list(1:4)) ) }) diff --git a/tests/testthat/test-sets.R b/tests/testthat/test-sets.R index cb7ddfb56e..fcc5c92123 100644 --- a/tests/testthat/test-sets.R +++ b/tests/testthat/test-sets.R @@ -73,7 +73,7 @@ test_that("set operations reconstruct grouping metadata (#3587)", { df1 <- tibble(x = 1:4, g = rep(1:2, each = 2)) %>% group_by(g) df2 <- tibble(x = 3:6, g = rep(2:3, each = 2)) - expect_equal( setdiff(df1, df2) %>% group_rows(), list(0:1)) - expect_equal( intersect(df1, df2) %>% group_rows(), list(0:1)) - expect_equal( union(df1, df2) %>% group_rows(), list(4:5, 2:3, 0:1)) + expect_equal( setdiff(df1, df2) %>% group_rows(), list(1:2)) + expect_equal( intersect(df1, df2) %>% group_rows(), list(1:2)) + expect_equal( union(df1, df2) %>% group_rows(), list(5:6, 3:4, 1:2)) }) diff --git a/tests/testthat/test-slice.r b/tests/testthat/test-slice.r index a6ef59c177..98e0a2fe0c 100644 --- a/tests/testthat/test-slice.r +++ b/tests/testthat/test-slice.r @@ -100,7 +100,7 @@ test_that("slice works fine if n > nrow(df) (#1269)", { test_that("slice strips grouped indices (#1405)", { res <- mtcars %>% group_by(cyl) %>% slice(1) %>% mutate(mpgplus = mpg + 1) expect_equal(nrow(res), 3L) - expect_equal(group_rows(res), as.list(0:2)) + expect_equal(group_rows(res), as.list(1:3)) }) test_that("slice works with zero-column data frames (#2490)", { From 04ff3979e44904f6c936a5b94fea7495955216ce Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Mon, 28 May 2018 19:19:35 +0200 Subject: [PATCH 7/7] no longer 0-based --- R/group_data.R | 2 +- man/group_data.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/group_data.R b/R/group_data.R index a02e305ed6..9ae2a60aef 100644 --- a/R/group_data.R +++ b/R/group_data.R @@ -9,7 +9,7 @@ group_rows <- function(.data) { #' @param .data a tibble #' #' @return `group_data()` return a tibble with one row per group. The last column, always called `.rows` -#' is a list of (0-based) integer vectors indicating the rows for each group. +#' is a list of integer vectors indicating the rows for each group. #' If `.data` is a grouped data frame the first columns are the grouping variables. #' `group_rows()` just returns the list of indices. #' diff --git a/man/group_data.Rd b/man/group_data.Rd index 9997518c13..7c91120007 100644 --- a/man/group_data.Rd +++ b/man/group_data.Rd @@ -14,7 +14,7 @@ group_data(.data) } \value{ \code{group_data()} return a tibble with one row per group. The last column, always called \code{.rows} -is a list of (0-based) integer vectors indicating the rows for each group. +is a list of integer vectors indicating the rows for each group. If \code{.data} is a grouped data frame the first columns are the grouping variables. \code{group_rows()} just returns the list of indices. }