Skip to content

Commit

Permalink
Merge pull request #3610 from tidyverse/f-3605-1based-indices
Browse files Browse the repository at this point in the history
Using 1-based indices in grouping metadata
  • Loading branch information
romainfrancois authored May 29, 2018
2 parents fde94cc + 04ff397 commit c454a8f
Show file tree
Hide file tree
Showing 10 changed files with 28 additions and 23 deletions.
6 changes: 3 additions & 3 deletions R/group_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ group_rows <- function(.data) {
#' @param .data a tibble
#'
#' @return `group_data()` return a tibble with one row per group. The last column, always called `.rows`
#' is a list of (0-based) integer vectors indicating the rows for each group.
#' is a list of integer vectors indicating the rows for each group.
#' If `.data` is a grouped data frame the first columns are the grouping variables.
#' `group_rows()` just returns the list of indices.
#'
Expand All @@ -31,13 +31,13 @@ group_data <- function(.data) {

#' @export
group_data.data.frame <- function(.data) {
rows <- list(seq2(0, nrow(.data) - 1))
rows <- list(seq_len(nrow(.data)))
tibble(.rows=rows)
}

#' @export
group_data.rowwise_df <- function(.data) {
rows <- as.list(seq2(0, nrow(.data) - 1))
rows <- as.list(seq_len(nrow(.data)))
tibble(.rows=rows)
}

Expand Down
4 changes: 2 additions & 2 deletions R/grouped-df.r
Original file line number Diff line number Diff line change
Expand Up @@ -206,9 +206,9 @@ do.grouped_df <- function(.data, ...) {
# usual scoping rules.
group_slice <- function(value) {
if (missing(value)) {
group_data[index[[`_i`]] + 1L, , drop = FALSE]
group_data[index[[`_i`]], , drop = FALSE]
} else {
group_data[index[[`_i`]] + 1L, ] <<- value
group_data[index[[`_i`]], ] <<- value
}
}
env_bind_fns(.env = env, . = group_slice, .data = group_slice)
Expand Down
2 changes: 1 addition & 1 deletion inst/include/tools/SlicingIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class GroupedSlicingIndex : public SlicingIndex {
}

virtual int operator[](int i) const {
return data[i];
return data[i] - 1;
}

virtual int group() const {
Expand Down
2 changes: 1 addition & 1 deletion man/group_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions src/filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ class GroupFilterIndices {

void add_group(int i, const Index& old_idx, int n) {
old_indices[i] = old_idx;
new_indices[i] = Rcpp::seq(k, k + n - 1);
new_indices[i] = Rcpp::seq(k + 1, k + n);
k += n ;
}

Expand Down Expand Up @@ -236,7 +236,7 @@ class FilterVisitor {
if (idx.is_dense(i)) {
// in that case we can just all the data
for (int j = 0; j < group_size; j++) {
out.copy(new_idx[j], old_idx[j]);
out.copy(new_idx[j] - 1, old_idx[j]);
}
} else {
SEXP test = idx.tests[i];
Expand All @@ -257,15 +257,15 @@ class FilterVisitor {
void copy_all_lgl(LogicalVector test, Data<RTYPE>& out, int group_size, const IntegerVector& new_idx, const Index& old_idx) {
for (int j = 0, k = 0; j < group_size; j++, k++) {
while (test[k] != TRUE) k++ ;
out.copy(new_idx[j], old_idx[k]);
out.copy(new_idx[j] - 1, old_idx[k]);
}
}

void copy_all_int(IntegerVector test, Data<RTYPE>& out, int group_size, const IntegerVector& new_idx, const Index& old_idx) {
SlicePositivePredicate pred(old_idx.size());
for (int j = 0, k = 0; j < group_size; j++, k++) {
while (!pred(test[k])) k++ ;
out.copy(new_idx[j], old_idx[test[k] - 1]);
out.copy(new_idx[j] - 1, old_idx[test[k] - 1]);
}
}

Expand Down
7 changes: 6 additions & 1 deletion src/group_indices.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,16 @@ class IntRange {
int size;
};

inline int plus_one(int i) {
return i + 1;
}

class ListCollecter {
public:
ListCollecter(List& data_): data(data_), index(0) {}

int collect(const std::vector<int>& indices) {
data[index] = indices;
data[index] = IntegerVector(indices.begin(), indices.end(), plus_one);
return index++;
}

Expand Down Expand Up @@ -455,6 +459,7 @@ SEXP build_index_cpp(const DataFrame& data, const SymbolVector& vars) {
vec_groups[i] = Rf_allocVector(TYPEOF(visited_data[i]), ncases);
copy_most_attributes(vec_groups[i], visited_data[i]);
}

vec_groups[nvars] = indices;
groups_names[nvars] = ".rows";
s->make(vec_groups, indices_collecter);
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-arrange.r
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ test_that("arrange keeps the grouping structure (#605)", {
res <- dat %>% group_by(g) %>% arrange(x)
expect_is(res, "grouped_df")
expect_equal(res$x, 1:4)
expect_equal(group_rows(res), list(c(1, 3), c(0, 2)))
expect_equal(group_rows(res), list(c(2, 4), c(1, 3)))
})

test_that("arrange handles complex vectors", {
Expand Down
12 changes: 6 additions & 6 deletions tests/testthat/test-group_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,27 @@ context("group_data")

test_that("group_rows works for 3 most important subclasses (#3489)", {
df <- data.frame(x=c(1,1,2,2))
expect_equal(group_rows(df), list(0:3))
expect_equal(group_rows(group_by(df,x)), list(0:1, 2:3))
expect_equal(group_rows(rowwise(df)), as.list(0:3))
expect_equal(group_rows(df), list(1:4))
expect_equal(group_rows(group_by(df,x)), list(1:2, 3:4))
expect_equal(group_rows(rowwise(df)), as.list(1:4))
})

test_that("group_data returns a tidy tibble (#3489)", {
df <- tibble(x = c(1,1,2,2))

expect_identical(
group_data(df),
tibble(.rows=list(0:3))
tibble(.rows=list(1:4))
)

expect_identical(
group_by(df,x) %>% group_data(),
tibble(x = c(1,2), .rows = list(0:1, 2:3))
tibble(x = c(1,2), .rows = list(1:2, 3:4))
)

expect_identical(
rowwise(df) %>% group_data(),
tibble(.rows = as.list(0:3))
tibble(.rows = as.list(1:4))
)
})

Expand Down
6 changes: 3 additions & 3 deletions tests/testthat/test-sets.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ test_that("set operations reconstruct grouping metadata (#3587)", {
df1 <- tibble(x = 1:4, g = rep(1:2, each = 2)) %>% group_by(g)
df2 <- tibble(x = 3:6, g = rep(2:3, each = 2))

expect_equal( setdiff(df1, df2) %>% group_rows(), list(0:1))
expect_equal( intersect(df1, df2) %>% group_rows(), list(0:1))
expect_equal( union(df1, df2) %>% group_rows(), list(4:5, 2:3, 0:1))
expect_equal( setdiff(df1, df2) %>% group_rows(), list(1:2))
expect_equal( intersect(df1, df2) %>% group_rows(), list(1:2))
expect_equal( union(df1, df2) %>% group_rows(), list(5:6, 3:4, 1:2))
})
2 changes: 1 addition & 1 deletion tests/testthat/test-slice.r
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ test_that("slice works fine if n > nrow(df) (#1269)", {
test_that("slice strips grouped indices (#1405)", {
res <- mtcars %>% group_by(cyl) %>% slice(1) %>% mutate(mpgplus = mpg + 1)
expect_equal(nrow(res), 3L)
expect_equal(group_rows(res), as.list(0:2))
expect_equal(group_rows(res), as.list(1:3))
})

test_that("slice works with zero-column data frames (#2490)", {
Expand Down

0 comments on commit c454a8f

Please sign in to comment.