Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Using 1-based indices in grouping metadata #3610

Merged
merged 7 commits into from
May 29, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions R/group_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ group_rows <- function(.data) {
#' @param .data a tibble
#'
#' @return `group_data()` return a tibble with one row per group. The last column, always called `.rows`
#' is a list of (0-based) integer vectors indicating the rows for each group.
#' is a list of integer vectors indicating the rows for each group.
#' If `.data` is a grouped data frame the first columns are the grouping variables.
#' `group_rows()` just returns the list of indices.
#'
Expand All @@ -31,13 +31,13 @@ group_data <- function(.data) {

#' @export
group_data.data.frame <- function(.data) {
rows <- list(seq2(0, nrow(.data) - 1))
rows <- list(seq_len(nrow(.data)))
tibble(.rows=rows)
}

#' @export
group_data.rowwise_df <- function(.data) {
rows <- as.list(seq2(0, nrow(.data) - 1))
rows <- as.list(seq_len(nrow(.data)))
tibble(.rows=rows)
}

Expand Down
4 changes: 2 additions & 2 deletions R/grouped-df.r
Original file line number Diff line number Diff line change
Expand Up @@ -206,9 +206,9 @@ do.grouped_df <- function(.data, ...) {
# usual scoping rules.
group_slice <- function(value) {
if (missing(value)) {
group_data[index[[`_i`]] + 1L, , drop = FALSE]
group_data[index[[`_i`]], , drop = FALSE]
} else {
group_data[index[[`_i`]] + 1L, ] <<- value
group_data[index[[`_i`]], ] <<- value
}
}
env_bind_fns(.env = env, . = group_slice, .data = group_slice)
Expand Down
2 changes: 1 addition & 1 deletion inst/include/tools/SlicingIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class GroupedSlicingIndex : public SlicingIndex {
}

virtual int operator[](int i) const {
return data[i];
return data[i] - 1;
}

virtual int group() const {
Expand Down
2 changes: 1 addition & 1 deletion man/group_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions src/filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ class GroupFilterIndices {

void add_group(int i, const Index& old_idx, int n) {
old_indices[i] = old_idx;
new_indices[i] = Rcpp::seq(k, k + n - 1);
new_indices[i] = Rcpp::seq(k + 1, k + n);
k += n ;
}

Expand Down Expand Up @@ -236,7 +236,7 @@ class FilterVisitor {
if (idx.is_dense(i)) {
// in that case we can just all the data
for (int j = 0; j < group_size; j++) {
out.copy(new_idx[j], old_idx[j]);
out.copy(new_idx[j] - 1, old_idx[j]);
}
} else {
SEXP test = idx.tests[i];
Expand All @@ -257,15 +257,15 @@ class FilterVisitor {
void copy_all_lgl(LogicalVector test, Data<RTYPE>& out, int group_size, const IntegerVector& new_idx, const Index& old_idx) {
for (int j = 0, k = 0; j < group_size; j++, k++) {
while (test[k] != TRUE) k++ ;
out.copy(new_idx[j], old_idx[k]);
out.copy(new_idx[j] - 1, old_idx[k]);
}
}

void copy_all_int(IntegerVector test, Data<RTYPE>& out, int group_size, const IntegerVector& new_idx, const Index& old_idx) {
SlicePositivePredicate pred(old_idx.size());
for (int j = 0, k = 0; j < group_size; j++, k++) {
while (!pred(test[k])) k++ ;
out.copy(new_idx[j], old_idx[test[k] - 1]);
out.copy(new_idx[j] - 1, old_idx[test[k] - 1]);
}
}

Expand Down
7 changes: 6 additions & 1 deletion src/group_indices.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,16 @@ class IntRange {
int size;
};

inline int plus_one(int i) {
return i + 1;
}

class ListCollecter {
public:
ListCollecter(List& data_): data(data_), index(0) {}

int collect(const std::vector<int>& indices) {
data[index] = indices;
data[index] = IntegerVector(indices.begin(), indices.end(), plus_one);
return index++;
}

Expand Down Expand Up @@ -455,6 +459,7 @@ SEXP build_index_cpp(const DataFrame& data, const SymbolVector& vars) {
vec_groups[i] = Rf_allocVector(TYPEOF(visited_data[i]), ncases);
copy_most_attributes(vec_groups[i], visited_data[i]);
}

vec_groups[nvars] = indices;
groups_names[nvars] = ".rows";
s->make(vec_groups, indices_collecter);
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-arrange.r
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ test_that("arrange keeps the grouping structure (#605)", {
res <- dat %>% group_by(g) %>% arrange(x)
expect_is(res, "grouped_df")
expect_equal(res$x, 1:4)
expect_equal(group_rows(res), list(c(1, 3), c(0, 2)))
expect_equal(group_rows(res), list(c(2, 4), c(1, 3)))
})

test_that("arrange handles complex vectors", {
Expand Down
12 changes: 6 additions & 6 deletions tests/testthat/test-group_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,27 @@ context("group_data")

test_that("group_rows works for 3 most important subclasses (#3489)", {
df <- data.frame(x=c(1,1,2,2))
expect_equal(group_rows(df), list(0:3))
expect_equal(group_rows(group_by(df,x)), list(0:1, 2:3))
expect_equal(group_rows(rowwise(df)), as.list(0:3))
expect_equal(group_rows(df), list(1:4))
expect_equal(group_rows(group_by(df,x)), list(1:2, 3:4))
expect_equal(group_rows(rowwise(df)), as.list(1:4))
})

test_that("group_data returns a tidy tibble (#3489)", {
df <- tibble(x = c(1,1,2,2))

expect_identical(
group_data(df),
tibble(.rows=list(0:3))
tibble(.rows=list(1:4))
)

expect_identical(
group_by(df,x) %>% group_data(),
tibble(x = c(1,2), .rows = list(0:1, 2:3))
tibble(x = c(1,2), .rows = list(1:2, 3:4))
)

expect_identical(
rowwise(df) %>% group_data(),
tibble(.rows = as.list(0:3))
tibble(.rows = as.list(1:4))
)
})

Expand Down
6 changes: 3 additions & 3 deletions tests/testthat/test-sets.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ test_that("set operations reconstruct grouping metadata (#3587)", {
df1 <- tibble(x = 1:4, g = rep(1:2, each = 2)) %>% group_by(g)
df2 <- tibble(x = 3:6, g = rep(2:3, each = 2))

expect_equal( setdiff(df1, df2) %>% group_rows(), list(0:1))
expect_equal( intersect(df1, df2) %>% group_rows(), list(0:1))
expect_equal( union(df1, df2) %>% group_rows(), list(4:5, 2:3, 0:1))
expect_equal( setdiff(df1, df2) %>% group_rows(), list(1:2))
expect_equal( intersect(df1, df2) %>% group_rows(), list(1:2))
expect_equal( union(df1, df2) %>% group_rows(), list(5:6, 3:4, 1:2))
})
2 changes: 1 addition & 1 deletion tests/testthat/test-slice.r
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ test_that("slice works fine if n > nrow(df) (#1269)", {
test_that("slice strips grouped indices (#1405)", {
res <- mtcars %>% group_by(cyl) %>% slice(1) %>% mutate(mpgplus = mpg + 1)
expect_equal(nrow(res), 3L)
expect_equal(group_rows(res), as.list(0:2))
expect_equal(group_rows(res), as.list(1:3))
})

test_that("slice works with zero-column data frames (#2490)", {
Expand Down