Merge pull request #3610 from tidyverse/f-3605-1based-indices

Using 1-based indices in grouping metadata
tidyverse · May 29, 2018 · c454a8f · c454a8f
2 parents fde94cc + 04ff397
commit c454a8f
Show file tree

Hide file tree

Showing 10 changed files with 28 additions and 23 deletions.
diff --git a/R/group_data.R b/R/group_data.R
@@ -9,7 +9,7 @@ group_rows <- function(.data) {
 #' @param .data a tibble
 #'
 #' @return `group_data()` return a tibble with one row per group. The last column, always called `.rows`
-#' is a list of (0-based) integer vectors indicating the rows for each group.
+#' is a list of integer vectors indicating the rows for each group.
 #' If `.data` is a grouped data frame the first columns are the grouping variables.
 #' `group_rows()` just returns the list of indices.
 #'
@@ -31,13 +31,13 @@ group_data <- function(.data) {
 
 #' @export
 group_data.data.frame <- function(.data) {
-  rows <- list(seq2(0, nrow(.data) - 1))
+  rows <- list(seq_len(nrow(.data)))
   tibble(.rows=rows)
 }
 
 #' @export
 group_data.rowwise_df <- function(.data) {
-  rows <- as.list(seq2(0, nrow(.data) - 1))
+  rows <- as.list(seq_len(nrow(.data)))
   tibble(.rows=rows)
 }
 

diff --git a/R/grouped-df.r b/R/grouped-df.r
@@ -206,9 +206,9 @@ do.grouped_df <- function(.data, ...) {
   # usual scoping rules.
   group_slice <- function(value) {
     if (missing(value)) {
-      group_data[index[[`_i`]] + 1L, , drop = FALSE]
+      group_data[index[[`_i`]], , drop = FALSE]
     } else {
-      group_data[index[[`_i`]] + 1L, ] <<- value
+      group_data[index[[`_i`]], ] <<- value
     }
   }
   env_bind_fns(.env = env, . = group_slice, .data = group_slice)

diff --git a/inst/include/tools/SlicingIndex.h b/inst/include/tools/SlicingIndex.h
@@ -29,7 +29,7 @@ class GroupedSlicingIndex : public SlicingIndex {
   }
 
   virtual int operator[](int i) const {
-    return data[i];
+    return data[i] - 1;
   }
 
   virtual int group() const {

diff --git a/man/group_data.Rd b/man/group_data.Rd
diff --git a/src/filter.cpp b/src/filter.cpp
@@ -155,7 +155,7 @@ class GroupFilterIndices {
 
   void add_group(int i, const Index& old_idx, int n) {
     old_indices[i] = old_idx;
-    new_indices[i] = Rcpp::seq(k, k + n - 1);
+    new_indices[i] = Rcpp::seq(k + 1, k + n);
     k += n ;
   }
 
@@ -236,7 +236,7 @@ class FilterVisitor {
         if (idx.is_dense(i)) {
           // in that case we can just all the data
           for (int j = 0; j < group_size; j++) {
-            out.copy(new_idx[j], old_idx[j]);
+            out.copy(new_idx[j] - 1, old_idx[j]);
           }
         } else {
           SEXP test = idx.tests[i];
@@ -257,15 +257,15 @@ class FilterVisitor {
   void copy_all_lgl(LogicalVector test, Data<RTYPE>& out, int group_size, const IntegerVector& new_idx, const Index& old_idx) {
     for (int j = 0, k = 0; j < group_size; j++, k++) {
       while (test[k] != TRUE) k++ ;
-      out.copy(new_idx[j], old_idx[k]);
+      out.copy(new_idx[j] - 1, old_idx[k]);
     }
   }
 
   void copy_all_int(IntegerVector test, Data<RTYPE>& out, int group_size, const IntegerVector& new_idx, const Index& old_idx) {
     SlicePositivePredicate pred(old_idx.size());
     for (int j = 0, k = 0; j < group_size; j++, k++) {
       while (!pred(test[k])) k++ ;
-      out.copy(new_idx[j], old_idx[test[k] - 1]);
+      out.copy(new_idx[j] - 1, old_idx[test[k] - 1]);
     }
   }
 

diff --git a/src/group_indices.cpp b/src/group_indices.cpp
@@ -67,12 +67,16 @@ class IntRange {
   int size;
 };
 
+inline int plus_one(int i) {
+  return i + 1;
+}
+
 class ListCollecter {
 public:
   ListCollecter(List& data_): data(data_), index(0) {}
 
   int collect(const std::vector<int>& indices) {
-    data[index] = indices;
+    data[index] = IntegerVector(indices.begin(), indices.end(), plus_one);
     return index++;
   }
 
@@ -455,6 +459,7 @@ SEXP build_index_cpp(const DataFrame& data, const SymbolVector& vars) {
     vec_groups[i] = Rf_allocVector(TYPEOF(visited_data[i]), ncases);
     copy_most_attributes(vec_groups[i], visited_data[i]);
   }
+
   vec_groups[nvars] = indices;
   groups_names[nvars] = ".rows";
   s->make(vec_groups, indices_collecter);

diff --git a/tests/testthat/test-arrange.r b/tests/testthat/test-arrange.r
@@ -84,7 +84,7 @@ test_that("arrange keeps the grouping structure (#605)", {
   res <- dat %>% group_by(g) %>% arrange(x)
   expect_is(res, "grouped_df")
   expect_equal(res$x, 1:4)
-  expect_equal(group_rows(res), list(c(1, 3), c(0, 2)))
+  expect_equal(group_rows(res), list(c(2, 4), c(1, 3)))
 })
 
 test_that("arrange handles complex vectors", {

diff --git a/tests/testthat/test-group_data.R b/tests/testthat/test-group_data.R
@@ -2,27 +2,27 @@ context("group_data")
 
 test_that("group_rows works for 3 most important subclasses (#3489)", {
   df <- data.frame(x=c(1,1,2,2))
-  expect_equal(group_rows(df), list(0:3))
-  expect_equal(group_rows(group_by(df,x)), list(0:1, 2:3))
-  expect_equal(group_rows(rowwise(df)), as.list(0:3))
+  expect_equal(group_rows(df), list(1:4))
+  expect_equal(group_rows(group_by(df,x)), list(1:2, 3:4))
+  expect_equal(group_rows(rowwise(df)), as.list(1:4))
 })
 
 test_that("group_data returns a tidy tibble (#3489)", {
   df <- tibble(x = c(1,1,2,2))
 
   expect_identical(
     group_data(df),
-    tibble(.rows=list(0:3))
+    tibble(.rows=list(1:4))
   )
 
   expect_identical(
     group_by(df,x) %>% group_data(),
-    tibble(x = c(1,2), .rows = list(0:1, 2:3))
+    tibble(x = c(1,2), .rows = list(1:2, 3:4))
   )
 
   expect_identical(
     rowwise(df) %>% group_data(),
-    tibble(.rows = as.list(0:3))
+    tibble(.rows = as.list(1:4))
   )
 })
 

diff --git a/tests/testthat/test-sets.R b/tests/testthat/test-sets.R
@@ -73,7 +73,7 @@ test_that("set operations reconstruct grouping metadata (#3587)", {
   df1 <- tibble(x = 1:4, g = rep(1:2, each = 2)) %>% group_by(g)
   df2 <- tibble(x = 3:6, g = rep(2:3, each = 2))
 
-  expect_equal( setdiff(df1, df2) %>% group_rows(), list(0:1))
-  expect_equal( intersect(df1, df2) %>% group_rows(), list(0:1))
-  expect_equal( union(df1, df2) %>% group_rows(), list(4:5, 2:3, 0:1))
+  expect_equal( setdiff(df1, df2) %>% group_rows(), list(1:2))
+  expect_equal( intersect(df1, df2) %>% group_rows(), list(1:2))
+  expect_equal( union(df1, df2) %>% group_rows(), list(5:6, 3:4, 1:2))
 })
diff --git a/tests/testthat/test-slice.r b/tests/testthat/test-slice.r
@@ -100,7 +100,7 @@ test_that("slice works fine if n > nrow(df) (#1269)", {
 test_that("slice strips grouped indices (#1405)", {
   res <- mtcars %>% group_by(cyl) %>% slice(1) %>% mutate(mpgplus = mpg + 1)
   expect_equal(nrow(res), 3L)
-  expect_equal(group_rows(res), as.list(0:2))
+  expect_equal(group_rows(res), as.list(1:3))
 })
 
 test_that("slice works with zero-column data frames (#2490)", {