Skip to content

Commit

Permalink
sql: support max/min aggregates on collated strings
Browse files Browse the repository at this point in the history
Fixes #45846.
Fixes #46685.

This PR allows for performing the max/min aggregations on collated
strings. Currently, our aggregates would not support container types
like `AnyCollatedString`. This PR extends the aggregates infrastructure
in DistSQL that caused the limitation to allow for container types,
which unblocks #48370.

Release note (sql change): Support the max/min aggregations on collated
strings.
  • Loading branch information
rohany committed May 26, 2020
1 parent 8f7cd61 commit 974be22
Show file tree
Hide file tree
Showing 8 changed files with 121 additions and 12 deletions.
4 changes: 4 additions & 0 deletions docs/generated/sql/aggregates.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@
</span></td></tr>
<tr><td><a name="max"></a><code>max(arg1: <a href="uuid.html">uuid</a>) &rarr; <a href="uuid.html">uuid</a></code></td><td><span class="funcdesc"><p>Identifies the maximum selected value.</p>
</span></td></tr>
<tr><td><a name="max"></a><code>max(arg1: collatedstring{*}) &rarr; anyelement</code></td><td><span class="funcdesc"><p>Identifies the maximum selected value.</p>
</span></td></tr>
<tr><td><a name="max"></a><code>max(arg1: geography) &rarr; geography</code></td><td><span class="funcdesc"><p>Identifies the maximum selected value.</p>
</span></td></tr>
<tr><td><a name="max"></a><code>max(arg1: geometry) &rarr; geometry</code></td><td><span class="funcdesc"><p>Identifies the maximum selected value.</p>
Expand Down Expand Up @@ -143,6 +145,8 @@
</span></td></tr>
<tr><td><a name="min"></a><code>min(arg1: <a href="uuid.html">uuid</a>) &rarr; <a href="uuid.html">uuid</a></code></td><td><span class="funcdesc"><p>Identifies the minimum selected value.</p>
</span></td></tr>
<tr><td><a name="min"></a><code>min(arg1: collatedstring{*}) &rarr; anyelement</code></td><td><span class="funcdesc"><p>Identifies the minimum selected value.</p>
</span></td></tr>
<tr><td><a name="min"></a><code>min(arg1: geography) &rarr; geography</code></td><td><span class="funcdesc"><p>Identifies the minimum selected value.</p>
</span></td></tr>
<tr><td><a name="min"></a><code>min(arg1: geometry) &rarr; geometry</code></td><td><span class="funcdesc"><p>Identifies the minimum selected value.</p>
Expand Down
4 changes: 1 addition & 3 deletions pkg/sql/distsql_physical_planner.go
Original file line number Diff line number Diff line change
Expand Up @@ -1574,9 +1574,7 @@ func (dsp *DistSQLPlanner) addAggregators(
// the current aggregation e.
argTypes[i] = intermediateTypes[argIdxs[i]]
}
_, outputType, err := execinfrapb.GetAggregateInfo(
finalInfo.Fn, argTypes...,
)
_, outputType, err := execinfrapb.GetAggregateInfo(finalInfo.Fn, argTypes...)
if err != nil {
return err
}
Expand Down
19 changes: 19 additions & 0 deletions pkg/sql/execinfrapb/processors.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,25 @@ func GetAggregateInfo(
}

colTyp := b.FixedReturnType()
// If the output type of the aggregation depends on its inputs, then
// the output of FixedReturnType will be ambiguous. In the ambiguous
// cases, use the information about the input types to construct the
// appropriate output type. The tree.ReturnTyper interface is
// []tree.TypedExpr -> *types.T, so construct the []tree.TypedExpr
// from the types that we know are the inputs. Note that we don't
// try to create datums of each input type, and instead use this
// "TypedDummy" construct. This is because some types don't have resident
// members (like an ENUM with no values), and we shouldn't error out
// trying to pick an aggregate spec for those cases.
if colTyp.IsAmbiguous() {
args := make([]tree.TypedExpr, len(inputTypes))
for i, t := range inputTypes {
args[i] = &tree.TypedDummy{Typ: t}
}
// Evaluate ReturnType with the fake input set of arguments.
colTyp = b.ReturnType(args)
}

return constructAgg, colTyp, nil
}
}
Expand Down
15 changes: 15 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/aggregate
Original file line number Diff line number Diff line change
Expand Up @@ -2554,3 +2554,18 @@ SELECT percentile_disc(0.50) FROM osagg

statement error ordered-set aggregations must have a WITHIN GROUP clause containing one ORDER BY column
SELECT percentile_cont(0.50) FROM osagg

# Tests for min/max on collated strings.
statement ok
CREATE TABLE t_collate (x STRING COLLATE en_us);
INSERT INTO t_collate VALUES ('hi' COLLATE en_us), ('hello' COLLATE en_us), ('howdy' COLLATE en_us)

query TT
SELECT min(x), max(x) FROM t_collate
----
hello howdy

query TT
SELECT min(NULL::STRING COLLATE en_us), max(NULL::STRING COLLATE en_us)
----
NULL NULL
4 changes: 4 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/pg_catalog
Original file line number Diff line number Diff line change
Expand Up @@ -2094,6 +2094,7 @@ oid oprname aggsortop
3234851498 > 3234851498
2318307066 > 2318307066
1737252658 > 1737252658
1737252658 > 1737252658
1383827510 > 1383827510
2105536758 > 2105536758
1928531314 > 1928531314
Expand All @@ -2102,6 +2103,7 @@ oid oprname aggsortop
530358714 > 530358714
3802002898 > 3802002898
1737252658 > 1737252658
1737252658 > 1737252658
1064453514 > 1064453514
1778355034 > 1778355034
256681770 > 256681770
Expand All @@ -2124,6 +2126,7 @@ oid oprname aggsortop
2457977576 < 2457977576
2790955336 < 2790955336
235310192 < 235310192
235310192 < 235310192
2011297100 < 2011297100
2104629996 < 2104629996
3942776496 < 3942776496
Expand All @@ -2132,6 +2135,7 @@ oid oprname aggsortop
1494969736 < 1494969736
3842027408 < 3842027408
235310192 < 235310192
235310192 < 235310192
2300570720 < 2300570720
3675947880 < 3675947880
426663592 < 426663592
Expand Down
18 changes: 15 additions & 3 deletions pkg/sql/opt/memo/typing_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,14 +204,26 @@ func TestTypingAggregateAssumptions(t *testing.T) {

// Check for fixed return types.
retType := overload.ReturnType(nil)
if retType == tree.UnknownReturnType {
t.Errorf("return type is not fixed for %s: %+v", name, overload.Types.Types())
}

// As per rule 3, max and min have slightly different rules. We allow
// max and min to have non-fixed return types to allow defining aggregate
// overloads that have container types as arguments.
if name == "min" || name == "max" {
// Evaluate the return typer.
types := overload.Types.Types()
args := make([]tree.TypedExpr, len(types))
for i, t := range types {
args[i] = &tree.TypedDummy{Typ: t}
}
retType = overload.ReturnType(args)
if retType != overload.Types.Types()[0] {
t.Errorf("return type differs from arg type for %s: %+v", name, overload.Types.Types())
}
continue
}

if retType == tree.UnknownReturnType {
t.Errorf("return type is not fixed for %s: %+v", name, overload.Types.Types())
}
}
}
Expand Down
35 changes: 29 additions & 6 deletions pkg/sql/sem/builtins/aggregate_builtins.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,13 @@ func aggPropsNullableArgs() tree.FunctionProperties {
return f
}

// allMaxMinAggregateTypes contains extra types that aren't in
// types.Scalar that the max/min aggregate functions are defined on.
var allMaxMinAggregateTypes = append(
[]*types.T{types.AnyCollatedString},
types.Scalar...,
)

// aggregates are a special class of builtin functions that are wrapped
// at execution in a bucketing layer to combine (aggregate) the result
// of the function being run over many rows.
Expand Down Expand Up @@ -204,16 +211,32 @@ var aggregates = map[string]builtinDefinition{
"Calculates the boolean value of `AND`ing all selected values.", tree.VolatilityImmutable),
),

"max": collectOverloads(aggProps(), types.Scalar,
"max": collectOverloads(aggProps(), allMaxMinAggregateTypes,
func(t *types.T) tree.Overload {
return makeAggOverload([]*types.T{t}, t, newMaxAggregate,
"Identifies the maximum selected value.", tree.VolatilityImmutable)
info := "Identifies the maximum selected value."
vol := tree.VolatilityImmutable
// If t is an ambiguous type (like AnyCollatedString), then our aggregate
// does not have a fixed return type.
if t.IsAmbiguous() {
return makeAggOverloadWithReturnType(
[]*types.T{t}, tree.FirstNonNullReturnType(), newMaxAggregate, info, vol,
)
}
return makeAggOverload([]*types.T{t}, t, newMaxAggregate, info, vol)
}),

"min": collectOverloads(aggProps(), types.Scalar,
"min": collectOverloads(aggProps(), allMaxMinAggregateTypes,
func(t *types.T) tree.Overload {
return makeAggOverload([]*types.T{t}, t, newMinAggregate,
"Identifies the minimum selected value.", tree.VolatilityImmutable)
info := "Identifies the minimum selected value."
vol := tree.VolatilityImmutable
// If t is an ambiguous type (like AnyCollatedString), then our aggregate
// does not have a fixed return type.
if t.IsAmbiguous() {
return makeAggOverloadWithReturnType(
[]*types.T{t}, tree.FirstNonNullReturnType(), newMinAggregate, info, vol,
)
}
return makeAggOverload([]*types.T{t}, t, newMinAggregate, info, vol)
}),

"string_agg": makeBuiltin(aggPropsNullableArgs(),
Expand Down
34 changes: 34 additions & 0 deletions pkg/sql/sem/tree/expr.go
Original file line number Diff line number Diff line change
Expand Up @@ -1062,6 +1062,40 @@ func (node *Subquery) Format(ctx *FmtCtx) {
}
}

// TypedDummy is a dummy expression that represents a dummy value with
// a specified type. It can be used in situations where TypedExprs of a
// particular type are required for semantic analysis.
type TypedDummy struct {
Typ *types.T
}

func (node *TypedDummy) String() string {
return AsString(node)
}

// Format implements the NodeFormatter interface.
func (node *TypedDummy) Format(ctx *FmtCtx) {
ctx.WriteString("dummyvalof(")
ctx.FormatTypeReference(node.Typ)
ctx.WriteString(")")
}

// ResolvedType implements the TypedExpr interface.
func (node *TypedDummy) ResolvedType() *types.T {
return node.Typ
}

// TypeCheck implements the Expr interface.
func (node *TypedDummy) TypeCheck(*SemaContext, *types.T) (TypedExpr, error) { return node, nil }

// Walk implements the Expr interface.
func (node *TypedDummy) Walk(Visitor) Expr { return node }

// Eval implements the TypedExpr interface.
func (node *TypedDummy) Eval(*EvalContext) (Datum, error) {
return nil, errors.AssertionFailedf("should not eval typed dummy")
}

// BinaryOperator represents a binary operator.
type BinaryOperator int

Expand Down

0 comments on commit 974be22

Please sign in to comment.