Skip to content

Commit

Permalink
Make groupvalues(gd) private again, replaced by keys(gd)
Browse files Browse the repository at this point in the history
  • Loading branch information
jlumpe committed Dec 3, 2019
1 parent 28aa827 commit 67b5f74
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 111 deletions.
1 change: 0 additions & 1 deletion src/DataFrames.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ export AbstractDataFrame,
dropmissing!,
groupby,
groupindices,
groupvalues,
groupvars,
insertcols!,
mapcols,
Expand Down
90 changes: 16 additions & 74 deletions src/groupeddataframe/grouping.jl
Original file line number Diff line number Diff line change
Expand Up @@ -238,76 +238,15 @@ function _groupvar_idx(gd::GroupedDataFrame, name::Symbol, strict::Bool)
return i
end

"""
groupvalues([T::Type], gd::GroupedDataFrame)
groupvalues([T::Type], gd::GroupedDataFrame, i)
groupvalues(gd::GroupedDataFrame, i, col)
Get the values of the grouping columns for `gd`, optionally specifying the group
index `i` and grouping column `col`. `col` may be an integer index (of the
grouping columns passed to [`groupby`](@ref)) or the column name itself. The
optional first argument to the first two forms specifies the return type, which
may be `NamedTuple` (default) or `Tuple`.
# Returns
# Get values of grouping columns for all groups
_groupvalues(gd::GroupedDataFrame) = [_groupvalues(gd, i) for i in 1:length(gd)]

Iterator over `Tuple`s/`NamedTuple`s, a single `Tuple`/`NamedTuple`, or a
grouping column value.
# Examples
# Get values of grouping columns for single group
_groupvalues(gd::GroupedDataFrame, i::Integer) = gd.parent[gd.idx[gd.starts[i]], gd.cols]

```jldoctest
julia> df = DataFrame(a = repeat([:foo, :bar, :baz], outer=[4]),
b = repeat([2, 1], outer=[6]),
c = 1:12);
julia> gd = groupby(df, [:a, :b])
GroupedDataFrame with 6 groups based on keys: a, b
First Group (2 rows): a = :foo, b = 2
│ Row │ a │ b │ c │
│ │ Symbol │ Int64 │ Int64 │
├─────┼────────┼───────┼───────┤
│ 1 │ foo │ 2 │ 1 │
│ 2 │ foo │ 2 │ 7 │
Last Group (2 rows): a = :baz, b = 1
│ Row │ a │ b │ c │
│ │ Symbol │ Int64 │ Int64 │
├─────┼────────┼───────┼───────┤
│ 1 │ baz │ 1 │ 6 │
│ 2 │ baz │ 1 │ 12 │
julia> collect(groupvalues(gd))
6-element Array{NamedTuple{(:a, :b),Tuple{Symbol,Int64}},1}:
(a = :foo, b = 2)
(a = :bar, b = 1)
(a = :baz, b = 2)
(a = :foo, b = 1)
(a = :bar, b = 2)
(a = :baz, b = 1)
julia> groupvalues(gd, 2)
(a = :bar, b = 1)
julia> groupvalues(gd, 2, 1)
:bar
julia> groupvalues(gd, 2, :a)
:bar
julia> groupvalues(Tuple, gd, 2)
(:bar, 1)
```
"""
groupvalues(T::Type, gd::GroupedDataFrame) = (groupvalues(T, gd, i) for i in 1:length(gd))
groupvalues(gd::GroupedDataFrame) = groupvalues(NamedTuple, gd)

groupvalues(::Type{Tuple}, gd::GroupedDataFrame, i::Integer) = Tuple(gd.parent[gd.idx[gd.starts[i]], gd.cols])
groupvalues(::Type{NamedTuple}, gd::GroupedDataFrame, i::Integer) = NamedTuple{Tuple(groupvars(gd))}(groupvalues(Tuple, gd, i))
groupvalues(gd::GroupedDataFrame, i::Integer) = groupvalues(NamedTuple, gd, i)

groupvalues(gd::GroupedDataFrame, i::Integer, col::Integer) = gd.parent[gd.idx[gd.starts[i]], gd.cols[col]]
groupvalues(gd::GroupedDataFrame, i::Integer, col::Symbol) = groupvalues(gd, i, _groupvar_idx(gd, col, true))
# Get values of single grouping column for single group
_groupvalues(gd::GroupedDataFrame, i::Integer, col::Integer) = gd.parent[gd.idx[gd.starts[i]], gd.cols[col]]
_groupvalues(gd::GroupedDataFrame, i::Integer, col::Symbol) = _groupvalues(gd, i, _groupvar_idx(gd, col, true))

# Eltypes of the grouping columns
_grouptypes(gd::GroupedDataFrame) = Tuple(eltype(gd.parent[!, c]) for c in gd.cols)
Expand Down Expand Up @@ -342,15 +281,15 @@ Base.keys(key::GroupKey) = Tuple(groupvars(parent(key)))
Base.names(key::GroupKey) = groupvars(parent(key))
# Private fields are never exposed since they can conflict with column names
Base.propertynames(key::GroupKey, private::Bool=false) = keys(key)
Base.values(key::GroupKey) = groupvalues(Tuple, parent(key), getfield(key, :idx))
Base.values(key::GroupKey) = Tuple(_groupvalues(parent(key), getfield(key, :idx)))

Base.iterate(key::GroupKey, i::Integer=1) = i <= length(key) ? (key[i], i + 1) : nothing

Base.getindex(key::GroupKey, i::Integer) = groupvalues(parent(key), getfield(key, :idx), i)
Base.getindex(key::GroupKey, i::Integer) = _groupvalues(parent(key), getfield(key, :idx), i)

function Base.getindex(key::GroupKey, n::Symbol)
try
return groupvalues(parent(key), getfield(key, :idx), n)
return _groupvalues(parent(key), getfield(key, :idx), n)
catch e
throw(KeyError(n))
end
Expand All @@ -364,7 +303,10 @@ function Base.getproperty(key::GroupKey, p::Symbol)
end
end

Base.NamedTuple(key::GroupKey) = groupvalues(NamedTuple, parent(key), getfield(key, :idx))
function Base.NamedTuple(key::GroupKey)
N = NamedTuple{Tuple(groupvars(parent(key)))}
N(_groupvalues(parent(key), getfield(key, :idx)))
end
Base.Tuple(key::GroupKey) = values(key)


Expand Down Expand Up @@ -484,8 +426,8 @@ end

# Index with tuple
function Base.getindex(gd::GroupedDataFrame, key::Tuple)
for (i, v) in enumerate(groupvalues(Tuple, gd))
isequal(v, key) && return gd[i]
for i in 1:length(gd)
isequal(Tuple(_groupvalues(gd, i)), key) && return gd[i]
end
throw(KeyError(key))
end
Expand Down
40 changes: 4 additions & 36 deletions test/grouping.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1230,38 +1230,6 @@ end
@test groupvars(gd2) == [:A, :B]
end

@testset "groupvalues" begin
df = DataFrame(A=repeat([missing, :A, :B, :A, :B, missing], outer=2),
B=repeat([:X, :Y], inner=6),
C=1:12)

cols = [:A, :B]
gd = groupby_checked(df, cols)

expected =
[(A=missing, B=:X), (A=:A, B=:X), (A=:B, B=:X), (A=missing, B=:Y), (A=:A, B=:Y), (A=:B, B=:Y)]

# All groups
@test collect(groupvalues(gd)) expected
@test collect(groupvalues(NamedTuple, gd)) expected
@test collect(groupvalues(Tuple, gd)) map(values, expected)

# Single group
for (i, ex) in enumerate(expected)
@test groupvalues(gd, i) ex
@test groupvalues(NamedTuple, gd, i) ex
@test groupvalues(Tuple, gd, i) values(ex)

# Single group, column
for (j, col) in enumerate(cols)
@test groupvalues(gd, i, j) ex[j]
@test groupvalues(gd, i, col) ex[j]
end
end

@test_throws ArgumentError groupvalues(gd, 1, :foo)
end

@testset "by skipmissing and sort" begin
df = DataFrame(a=[2, 2, missing, missing, 1, 1, 3, 3], b=1:8)
for dosort in (false, true), doskipmissing in (false, true)
Expand Down Expand Up @@ -1419,8 +1387,8 @@ end
[(a=:A, b=:X), (a=:B, b=:X), (a=missing, b=:X), (a=:A, b=:Y), (a=:B, b=:Y), (a=missing, b=:Y)]
@test gd[(a=:A, b=:X)] gd[1]
@test gd[keys(gd)[1]] gd[1]
@test groupvalues(gd, 1) == (a=:A, b=:X)
@test groupvalues(gd, 1, :a) == :A
@test NamedTuple(keys(gd)[1]) == (a=:A, b=:X)
@test keys(gd)[1].a == :A

names!(df, [:d, :e, :f])

Expand All @@ -1430,8 +1398,8 @@ end
[(d=:A, e=:X), (d=:B, e=:X), (d=missing, e=:X), (d=:A, e=:Y), (d=:B, e=:Y), (d=missing, e=:Y)]
@test gd[(d=:A, e=:X)] gd[1]
@test gd[keys(gd)[1]] gd[1]
@test groupvalues(gd, 1) == (d=:A, e=:X)
@test groupvalues(gd, 1, :d) == :A
@test NamedTuple(keys(gd)[1]) == (d=:A, e=:X)
@test keys(gd)[1].d == :A
@test_throws KeyError gd[(a=:A, b=:X)]
end

Expand Down

0 comments on commit 67b5f74

Please sign in to comment.