From 1903032a3c3ff92b5b97eef6927dbe1f171dc36f Mon Sep 17 00:00:00 2001 From: Arenatlx <314806019@qq.com> Date: Mon, 29 Jul 2024 14:46:18 +0800 Subject: [PATCH 1/2] This is an automated cherry-pick of #54988 Signed-off-by: ti-chi-bot --- pkg/planner/core/logical_expand.go | 393 ++++++++++++++++++ .../integrationtest/r/executor/expand.result | 326 +++++++++++++++ tests/integrationtest/t/executor/expand.test | 152 +++++++ 3 files changed, 871 insertions(+) create mode 100644 pkg/planner/core/logical_expand.go create mode 100644 tests/integrationtest/r/executor/expand.result create mode 100644 tests/integrationtest/t/executor/expand.test diff --git a/pkg/planner/core/logical_expand.go b/pkg/planner/core/logical_expand.go new file mode 100644 index 0000000000000..83260219596c2 --- /dev/null +++ b/pkg/planner/core/logical_expand.go @@ -0,0 +1,393 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package core + +import ( + "bytes" + "fmt" + + "github.com/pingcap/tidb/pkg/expression" + "github.com/pingcap/tidb/pkg/planner/core/base" + "github.com/pingcap/tidb/pkg/planner/core/operator/logicalop" + fd "github.com/pingcap/tidb/pkg/planner/funcdep" + "github.com/pingcap/tidb/pkg/planner/property" + "github.com/pingcap/tidb/pkg/planner/util/optimizetrace" + "github.com/pingcap/tidb/pkg/planner/util/optimizetrace/logicaltrace" + "github.com/pingcap/tidb/pkg/types" + "github.com/pingcap/tidb/pkg/util/dbterror/plannererrors" + "github.com/pingcap/tidb/pkg/util/plancodec" + "github.com/pingcap/tipb/go-tipb" +) + +// LogicalExpand represents a logical Expand OP serves for data replication requirement. +type LogicalExpand struct { + logicalop.LogicalSchemaProducer + + // distinct group by columns. (maybe projected below if it's a non-col) + DistinctGroupByCol []*expression.Column + DistinctGbyColNames []*types.FieldName + // keep the old gbyExprs for resolve cases like grouping(a+b), the args: + // a+b should be resolved to new projected gby col according to ref pos. + DistinctGbyExprs []expression.Expression + + // rollup grouping sets. + DistinctSize int + RollupGroupingSets expression.GroupingSets + RollupID2GIDS map[int]map[uint64]struct{} + RollupGroupingIDs []uint64 + + // The level projections is generated from grouping sets,make execution more clearly. + LevelExprs [][]expression.Expression + + // The generated column names. Eg: "grouping_id" and so on. + ExtraGroupingColNames []string + + // GroupingMode records the grouping id allocation mode. + GroupingMode tipb.GroupingMode + + // The GID and GPos column generated by logical expand if any. + GID *expression.Column + GIDName *types.FieldName + GPos *expression.Column + GPosName *types.FieldName +} + +// Init initializes LogicalProjection. +func (p LogicalExpand) Init(ctx base.PlanContext, offset int) *LogicalExpand { + p.BaseLogicalPlan = logicalop.NewBaseLogicalPlan(ctx, plancodec.TypeExpand, &p, offset) + return &p +} + +// *************************** start implementation of logicalPlan interface *************************** + +// HashCode inherits BaseLogicalPlan.LogicalPlan.<0th> implementation. + +// PredicatePushDown implements base.LogicalPlan.<1st> interface. +func (p *LogicalExpand) PredicatePushDown(predicates []expression.Expression, opt *optimizetrace.LogicalOptimizeOp) (ret []expression.Expression, retPlan base.LogicalPlan) { + // Note that, grouping column related predicates can't be pushed down, since grouping column has nullability change after Expand OP itself. + // condition related with grouping column shouldn't be pushed down through it. + // currently, since expand is adjacent to aggregate, any filter above aggregate wanted to be push down through expand only have two cases: + // 1. agg function related filters. (these condition is always above aggregate) + // 2. group-by item related filters. (there condition is always related with grouping sets columns, which can't be pushed down) + // As a whole, we banned all the predicates pushing-down logic here that remained in Expand OP, and constructing a new selection above it if any. + remained, child := p.BaseLogicalPlan.PredicatePushDown(nil, opt) + return append(remained, predicates...), child +} + +// PruneColumns implement the base.LogicalPlan.<2nd> interface. +// logicExpand is built in the logical plan building phase, where all the column prune is not done yet. So the +// expand projection expressions is meaningless if it built at that time. (we only maintain its schema, while +// the level projection expressions construction is left to the last logical optimize rule) +// +// so when do the rule_column_pruning here, we just prune the schema is enough. +func (p *LogicalExpand) PruneColumns(parentUsedCols []*expression.Column, opt *optimizetrace.LogicalOptimizeOp) (base.LogicalPlan, error) { + // Expand need those extra redundant distinct group by columns projected from underlying projection. + // distinct GroupByCol must be used by aggregate above, to make sure this, append DistinctGroupByCol again. + parentUsedCols = append(parentUsedCols, p.DistinctGroupByCol...) + used := expression.GetUsedList(p.SCtx().GetExprCtx().GetEvalCtx(), parentUsedCols, p.Schema()) + prunedColumns := make([]*expression.Column, 0) + for i := len(used) - 1; i >= 0; i-- { + if !used[i] { + prunedColumns = append(prunedColumns, p.Schema().Columns[i]) + p.Schema().Columns = append(p.Schema().Columns[:i], p.Schema().Columns[i+1:]...) + p.SetOutputNames(append(p.OutputNames()[:i], p.OutputNames()[i+1:]...)) + } + } + logicaltrace.AppendColumnPruneTraceStep(p, prunedColumns, opt) + // Underlying still need to keep the distinct group by columns and parent used columns. + var err error + p.Children()[0], err = p.Children()[0].PruneColumns(parentUsedCols, opt) + if err != nil { + return nil, err + } + return p, nil +} + +// FindBestTask inherits BaseLogicalPlan.LogicalPlan.<3rd> implementation. + +// BuildKeyInfo inherits BaseLogicalPlan.LogicalPlan.<4th> implementation. + +// PushDownTopN inherits BaseLogicalPlan.LogicalPlan.<5th> implementation. + +// DeriveTopN inherits BaseLogicalPlan.LogicalPlan.<6th> implementation. + +// PredicateSimplification inherits BaseLogicalPlan.LogicalPlan.<7th> implementation. + +// ConstantPropagation inherits BaseLogicalPlan.LogicalPlan.<8th> implementation. + +// PullUpConstantPredicates inherits BaseLogicalPlan.LogicalPlan.<9th> implementation. + +// RecursiveDeriveStats inherits BaseLogicalPlan.LogicalPlan.<10th> implementation. + +// DeriveStats inherits BaseLogicalPlan.LogicalPlan.<11th> implementation. + +// ExtractColGroups inherits BaseLogicalPlan.LogicalPlan.<12th> implementation. + +// PreparePossibleProperties inherits BaseLogicalPlan.LogicalPlan.<13th> implementation. + +// ExhaustPhysicalPlans implements base.LogicalPlan.<14th> interface. +func (p *LogicalExpand) ExhaustPhysicalPlans(prop *property.PhysicalProperty) ([]base.PhysicalPlan, bool, error) { + return exhaustPhysicalPlans4LogicalExpand(p, prop) +} + +// ExtractCorrelatedCols implements base.LogicalPlan.<15th> interface. +func (p *LogicalExpand) ExtractCorrelatedCols() []*expression.CorrelatedColumn { + // if p.LevelExprs is nil, it means the GenLevelProjections has not been called yet, + // which is done in logical optimizing phase. While for building correlated subquery + // plan, the ExtractCorrelatedCols will be called once after building, so we should + // distinguish the case here. + if p.LevelExprs == nil { + // since level projections generation don't produce any correlated columns, just + // return nil. + return nil + } + corCols := make([]*expression.CorrelatedColumn, 0, len(p.LevelExprs[0])) + for _, lExpr := range p.LevelExprs { + for _, expr := range lExpr { + corCols = append(corCols, expression.ExtractCorColumns(expr)...) + } + } + return corCols +} + +// MaxOneRow inherits BaseLogicalPlan.LogicalPlan.<16th> implementation. + +// Children inherits BaseLogicalPlan.LogicalPlan.<17th> implementation. + +// SetChildren inherits BaseLogicalPlan.LogicalPlan.<18th> implementation. + +// SetChild inherits BaseLogicalPlan.LogicalPlan.<19th> implementation. + +// RollBackTaskMap inherits BaseLogicalPlan.LogicalPlan.<20th> implementation. + +// CanPushToCop inherits BaseLogicalPlan.LogicalPlan.<21st> implementation. + +// ExtractFD implements the base.LogicalPlan.<22nd> interface, extracting the FD from bottom up. +func (p *LogicalExpand) ExtractFD() *fd.FDSet { + // basically extract the children's fdSet. + return p.LogicalSchemaProducer.ExtractFD() +} + +// GetBaseLogicalPlan inherits BaseLogicalPlan.LogicalPlan.<23rd> implementation. + +// ConvertOuterToInnerJoin inherits BaseLogicalPlan.LogicalPlan.<24th> implementation. + +// *************************** end implementation of logicalPlan interface *************************** + +// GetUsedCols extracts all of the Columns used by proj. +func (*LogicalExpand) GetUsedCols() (usedCols []*expression.Column) { + // be careful that, expand OP itself, shouldn't output its own used cols, because + // it just replicates the child's schema by defined grouping sets. (pass down what + // the parent's used is enough here) + return usedCols +} + +// GenLevelProjections is used to generate level projections after all the necessary logical +// optimization is done such as column pruning. +func (p *LogicalExpand) GenLevelProjections() { + // get all the grouping cols. + groupingSetCols := p.RollupGroupingSets.AllSetsColIDs() + p.DistinctSize, p.RollupGroupingIDs, p.RollupID2GIDS = p.RollupGroupingSets.DistinctSize() + hasDuplicateGroupingSet := len(p.RollupGroupingSets) != p.DistinctSize + schemaCols := p.Schema().Columns + // last two schema col is about gid and gpos if any. + nonGenCols := schemaCols[:len(schemaCols)-1] + gidCol := schemaCols[len(schemaCols)-1] + if hasDuplicateGroupingSet { + // last two schema col is about gid and gpos. + nonGenCols = schemaCols[:len(schemaCols)-2] + gidCol = schemaCols[len(schemaCols)-2] + } + + // for every rollup grouping set, gen its level projection. + for offset, curGroupingSet := range p.RollupGroupingSets { + levelProj := make([]expression.Expression, 0, p.Schema().Len()) + for _, oneCol := range nonGenCols { + // if this col is in the grouping-set-cols and this col is not needed by current grouping-set, just set it as null value with specified fieldType. + if groupingSetCols.Has(int(oneCol.UniqueID)) { + if curGroupingSet.AllColIDs().Has(int(oneCol.UniqueID)) { + // needed col in current grouping set: project it as col-ref. + levelProj = append(levelProj, oneCol) + } else { + // un-needed col in current grouping set: project it as null value. + nullValue := expression.NewNullWithFieldType(oneCol.RetType.Clone()) + levelProj = append(levelProj, nullValue) + } + } else { + // other un-related cols: project it as col-ref. + levelProj = append(levelProj, oneCol) + } + } + // generate the grouping_id projection expr, project it as uint64. + gid := p.GenerateGroupingIDModeBitAnd(curGroupingSet) + if p.GroupingMode == tipb.GroupingMode_ModeNumericSet { + gid = p.GenerateGroupingIDIncrementModeNumericSet(offset) + } + gidValue := expression.NewUInt64ConstWithFieldType(gid, gidCol.RetType.Clone()) + levelProj = append(levelProj, gidValue) + + // generate the grouping_pos projection expr, project it as uint64 if any. + if hasDuplicateGroupingSet { + gposCol := schemaCols[len(schemaCols)-1] + // gpos value can equal the grouping set index offset. + gpos := expression.NewUInt64ConstWithFieldType(uint64(offset), gposCol.RetType.Clone()) + // gen-col: project it as uint64. + levelProj = append(levelProj, gpos) + } + p.LevelExprs = append(p.LevelExprs, levelProj) + } +} + +// GenerateGroupingMarks generate the groupingMark for the source column specified in grouping function. +func (p *LogicalExpand) GenerateGroupingMarks(sourceCols []*expression.Column) []map[uint64]struct{} { + // Since grouping function may have multi args like grouping(a,b), so the source columns may greater than 1. + // reference: https://dev.mysql.com/blog-archive/mysql-8-0-grouping-function/ + // Let's say GROUPING(b,a) group by a,b with rollup. (Note the b,a sequence is reversed from gby item) + // if GROUPING (b,a) returns 3, it means that NULL in column “b” and NULL in column “a” for that row is + // produce by a ROLLUP operation. If result is 2, NULL in column “a” alone is a result of ROLLUP operation. + // + // Formula: GROUPING(x,y,z) = GROUPING(x) << 2 + GROUPING(y) << 1 + GROUPING(z) + // + // so for the multi args GROUPING FUNCTION, we should return all the simple col grouping marks. When evaluating, + // after all grouping marks are & with gid in sequence, the final res is derived as the formula said. This also + // means that the grouping function accepts a maximum of 64 parameters. + resSliceMap := make([]map[uint64]struct{}, 0, len(sourceCols)) + if p.GroupingMode == tipb.GroupingMode_ModeBitAnd { + for _, oneCol := range sourceCols { + resMap := make(map[uint64]struct{}, 1) + res := uint64(0) + // from high pos to low pos. + for i := len(p.DistinctGroupByCol) - 1; i >= 0; i-- { + // left shift. + res = res << 1 + if p.DistinctGroupByCol[i].UniqueID == oneCol.UniqueID { + // fill the corresponding col pos as 1 as bitMark. + // eg: say distinctGBY [x,y,z] and GROUPING(x) with '100'. + // When any groupingID & 100 > 0 means the source column x + // is needed in this grouping set and is not grouped, so res = 0. + res = res | 1 + } + } + resMap[res] = struct{}{} + resSliceMap = append(resSliceMap, resMap) + } + return resSliceMap + } + // For GroupingMode_ModeNumericSet mode, for every simple col, its grouping marks is an id slice rather than a bit map. + // For example, GROUPING(x,y,z) returns 6 it means: GROUPING(x) is 1, GROUPING(y) is 1 and GROUPING(z) is 0, in which + // we should also return all these three single column grouping marks as function meta to GROUPING FUNCTION. + for _, oneCol := range sourceCols { + resSliceMap = append(resSliceMap, p.RollupID2GIDS[int(oneCol.UniqueID)]) + } + return resSliceMap +} + +func (p *LogicalExpand) trySubstituteExprWithGroupingSetCol(expr expression.Expression) (expression.Expression, bool) { + // since all the original group items has been projected even single col, + // let's check the origin gby expression here, and map it to new gby col. + for i, oneExpr := range p.DistinctGbyExprs { + if bytes.Equal(expr.CanonicalHashCode(), oneExpr.CanonicalHashCode()) { + // found + return p.DistinctGroupByCol[i], true + } + } + // not found. + return expr, false +} + +// CheckGroupingFuncArgsInGroupBy checks whether grouping function args is in grouping items. +func (p *LogicalExpand) resolveGroupingFuncArgsInGroupBy(groupingFuncArgs []expression.Expression) ([]*expression.Column, error) { + // build GBYColMap + distinctGBYColMap := make(map[int64]struct{}, len(p.DistinctGroupByCol)) + for _, oneDistinctGBYCol := range p.DistinctGroupByCol { + distinctGBYColMap[oneDistinctGBYCol.UniqueID] = struct{}{} + } + var refPos int + rewrittenArgCols := make([]*expression.Column, 0, len(groupingFuncArgs)) + for argIdx, oneArg := range groupingFuncArgs { + refPos = -1 + // since all the original group items has been projected even single col, + // let's check the origin gby expression here, and map it to new gby col. + for i, oneExpr := range p.DistinctGbyExprs { + if bytes.Equal(oneArg.CanonicalHashCode(), oneExpr.CanonicalHashCode()) { + refPos = i + break + } + } + if refPos != -1 { + // directly ref original group by expressions. + rewrittenArgCols = append(rewrittenArgCols, p.DistinctGroupByCol[refPos]) + } else { + // case for refPos == -1 + // since for case like: select year from t group by year, country with rollup order by grouping(year) + // when encountering build grouping(year), the args it received has already been substituted as grouping + // set column year' rather than the original year anymore via first projection select item with pos 0. just check it! + find := false + if argCol, ok1 := oneArg.(*expression.Column); ok1 { + if _, ok2 := distinctGBYColMap[argCol.UniqueID]; ok2 { + rewrittenArgCols = append(rewrittenArgCols, argCol) + find = true + } + } + if !find { + return nil, plannererrors.ErrFieldInGroupingNotGroupBy.GenWithStackByArgs(fmt.Sprintf("#%d", argIdx)) + } + } + } + return rewrittenArgCols, nil +} + +// GenerateGroupingIDModeBitAnd is used to generate convenient groupingID for quick computation of grouping function. +// A bit in the bitmask is corresponding to an attribute in the group by attributes sequence, the selected attribute +// has corresponding bit set to 0 and otherwise set to 1. Example, if we have GroupBy attributes(a,b,c,d), the bitmask +// 5 (whose binary form is 0101) represents grouping set (a,c). +func (p *LogicalExpand) GenerateGroupingIDModeBitAnd(oneSet expression.GroupingSet) uint64 { + // say distinctGbyCols : a, b, c + // bit pos index : 0, 1, 2 + // current grouping set is : {a, c} + // +---- mark the corresponding pos as 1 then get ---> 101 + // for special case : {a,a,c} and {a,c}: this two logical same grouping set naturally share the same gid bits: 101 + idsNeeded := oneSet.AllColIDs() + res := uint64(0) + // from high pos to low pos. + for i := len(p.DistinctGroupByCol) - 1; i >= 0; i-- { + // left shift. + res = res << 1 + if idsNeeded.Has(int(p.DistinctGroupByCol[i].UniqueID)) { + // col is needed, fill the corresponding pos as 1. + res = res | 1 + } + } + // how to use it, eg: when encountering a grouping function like: grouping(a), we can know the column a's pos index in distinctGbyCols + // is about 0, then we can get the mask as 001 which will be returned back as this grouping function's meta when rewriting it, then we + // can use the bit mask to BitAnd(OP) groupingID column when evaluating, when the result is not 0, then for this row, it's column 'a' + // is not grouped, marking them as 0, otherwise marking them as 1. + return res +} + +// GenerateGroupingIDIncrementModeNumericSet is used to generate grouping ids when the num of grouping sets is greater than 64. +// Under this circumstance, bitAnd uint64 doesn't have enough capacity to set those bits, so incremental grouping ID set is chosen. +func (p *LogicalExpand) GenerateGroupingIDIncrementModeNumericSet(oneSetOffset int) uint64 { + // say distinctGbyCols : a, b, c + // say grouping sets : {a,b,c}, {a,b}, {a}, {} <----+ (store the mapping as grouping sets meta) + // we can just set its gid : 0, 1 2 3 <----+ + // just keep this mapping logic stored as meta, and return the defined id back generated from this defined rule. + // for special case : {a,a,c} and {a,c}: this two logical same grouping set naturally share the same gid allocation! + return p.RollupGroupingIDs[oneSetOffset] + // how to use it, eg: when encountering a grouping function like: grouping(a), we should dig down to related Expand operator and + // found it in meta that: column 'a' is in grouping set {a,b,c}, {a,b}, {a}, and its correspondent mapping grouping ids is about + // {0,1,2}. This grouping id set is returned back as this grouping function's specified meta when rewriting the grouping function, + // and the evaluating logic is quite simple as IN compare. +} diff --git a/tests/integrationtest/r/executor/expand.result b/tests/integrationtest/r/executor/expand.result new file mode 100644 index 0000000000000..6f71f6142f1a4 --- /dev/null +++ b/tests/integrationtest/r/executor/expand.result @@ -0,0 +1,326 @@ +CREATE TABLE t1( +product VARCHAR(32), +country_id INTEGER NOT NULL, +year INTEGER, +profit INTEGER); +INSERT INTO t1 VALUES ( 'Computer', 2,2000, 1200), +( 'TV', 1, 1999, 150), +( 'Calculator', 1, 1999,50), +( 'Computer', 1, 1999,1500), +( 'Computer', 1, 2000,1500), +( 'TV', 1, 2000, 150), +( 'TV', 2, 2000, 100), +( 'TV', 2, 2000, 100), +( 'Calculator', 1, 2000,75), +( 'Calculator', 2, 2000,75), +( 'TV', 1, 1999, 100), +( 'Computer', 1, 1999,1200), +( 'Computer', 2, 2000,1500), +( 'Calculator', 2, 2000,75), +( 'Phone', 3, 2003,10) +; +CREATE TABLE t2 ( +country_id INTEGER PRIMARY KEY, +country CHAR(20) NOT NULL); +INSERT INTO t2 VALUES (1, 'USA'),(2,'India'), (3,'Finland'); +# First simple rollups, with just grand total + +SELECT product, SUM(profit) FROM t1 GROUP BY product; +product SUM(profit) +Calculator 275 +Computer 6900 +Phone 10 +TV 600 + +SELECT product, SUM(profit) FROM t1 GROUP BY product WITH ROLLUP; +product SUM(profit) +NULL 7785 +Calculator 275 +Computer 6900 +Phone 10 +TV 600 + +SELECT product, SUM(profit) FROM t1 GROUP BY 1 WITH ROLLUP; +product SUM(profit) +NULL 7785 +Calculator 275 +Computer 6900 +Phone 10 +TV 600 + +SELECT product, SUM(profit),AVG(profit) FROM t1 GROUP BY product WITH ROLLUP; +product SUM(profit) AVG(profit) +NULL 7785 519.0000 +Calculator 275 68.7500 +Computer 6900 1380.0000 +Phone 10 10.0000 +TV 600 120.0000 + +# Sub totals +SELECT product, country_id , year, SUM(profit) FROM t1 +GROUP BY product, country_id, year; +product country_id year SUM(profit) +Calculator 1 1999 50 +Calculator 1 2000 75 +Calculator 2 2000 150 +Computer 1 1999 2700 +Computer 1 2000 1500 +Computer 2 2000 2700 +Phone 3 2003 10 +TV 1 1999 250 +TV 1 2000 150 +TV 2 2000 200 + +SELECT product, country_id , year, SUM(profit) FROM t1 +GROUP BY product, country_id, year WITH ROLLUP; +product country_id year SUM(profit) +NULL NULL NULL 7785 +Calculator NULL NULL 275 +Calculator 1 NULL 125 +Calculator 1 1999 50 +Calculator 1 2000 75 +Calculator 2 NULL 150 +Calculator 2 2000 150 +Computer NULL NULL 6900 +Computer 1 NULL 4200 +Computer 1 1999 2700 +Computer 1 2000 1500 +Computer 2 NULL 2700 +Computer 2 2000 2700 +Phone NULL NULL 10 +Phone 3 NULL 10 +Phone 3 2003 10 +TV NULL NULL 600 +TV 1 NULL 400 +TV 1 1999 250 +TV 1 2000 150 +TV 2 NULL 200 +TV 2 2000 200 + +ANALYZE TABLE t1; + +EXPLAIN FORMAT='brief' SELECT product, country_id , year, SUM(profit) +FROM t1 GROUP BY product, country_id, year WITH ROLLUP; +id estRows task access object operator info +Projection 8000.00 root Column#6->Column#11, Column#7->Column#12, Column#8->Column#13, Column#10 +└─HashAgg 8000.00 root group by:Column#16, Column#17, Column#18, Column#19, funcs:sum(Column#15)->Column#10, funcs:firstrow(Column#16)->Column#6, funcs:firstrow(Column#17)->Column#7, funcs:firstrow(Column#18)->Column#8 + └─Projection 10000.00 root cast(executor__expand.t1.profit, decimal(10,0) BINARY)->Column#15, Column#6->Column#16, Column#7->Column#17, Column#8->Column#18, gid->Column#19 + └─Expand 10000.00 root level-projection:[executor__expand.t1.profit, ->Column#6, ->Column#7, ->Column#8, 0->gid],[executor__expand.t1.profit, Column#6, ->Column#7, ->Column#8, 1->gid],[executor__expand.t1.profit, Column#6, Column#7, ->Column#8, 3->gid],[executor__expand.t1.profit, Column#6, Column#7, Column#8, 7->gid]; schema: [executor__expand.t1.profit,Column#6,Column#7,Column#8,gid] + └─Projection 10000.00 root executor__expand.t1.profit, executor__expand.t1.product->Column#6, executor__expand.t1.country_id->Column#7, executor__expand.t1.year->Column#8 + └─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo + +SELECT product, country_id , SUM(profit) FROM t1 +GROUP BY product, country_id WITH ROLLUP +ORDER BY product DESC, country_id; +product country_id SUM(profit) +TV NULL 600 +TV 1 400 +TV 2 200 +Phone NULL 10 +Phone 3 10 +Computer NULL 6900 +Computer 1 4200 +Computer 2 2700 +Calculator NULL 275 +Calculator 1 125 +Calculator 2 150 +NULL NULL 7785 + +# limit +SELECT product, country_id , year, SUM(profit) FROM t1 +GROUP BY product, country_id, year WITH ROLLUP +ORDER BY product, country_id, year LIMIT 5; +product country_id year SUM(profit) +NULL NULL NULL 7785 +Calculator NULL NULL 275 +Calculator 1 NULL 125 +Calculator 1 1999 50 +Calculator 1 2000 75 + +SELECT product, country_id , year, SUM(profit) FROM t1 +GROUP BY product, country_id, year WITH ROLLUP +ORDER BY product, country_id, year limit 3,3; +product country_id year SUM(profit) +Calculator 1 1999 50 +Calculator 1 2000 75 +Calculator 2 NULL 150 + +SELECT product, country_id, COUNT(*), COUNT(distinct year) +FROM t1 GROUP BY product, country_id; +product country_id COUNT(*) COUNT(distinct year) +Calculator 1 2 2 +Calculator 2 2 1 +Computer 1 3 2 +Computer 2 2 1 +Phone 3 1 1 +TV 1 3 2 +TV 2 2 1 + +SELECT product, country_id, COUNT(*), COUNT(distinct year) +FROM t1 GROUP BY product, country_id WITH ROLLUP; +product country_id COUNT(*) COUNT(distinct year) +NULL NULL 15 3 +Calculator NULL 4 2 +Calculator 1 2 2 +Calculator 2 2 1 +Computer NULL 5 2 +Computer 1 3 2 +Computer 2 2 1 +Phone NULL 1 1 +Phone 3 1 1 +TV NULL 5 2 +TV 1 3 2 +TV 2 2 1 + +# Test of having +SELECT product, country_id , year, SUM(profit) FROM t1 +GROUP BY product, country_id, year WITH ROLLUP HAVING country_id = 1; +product country_id year SUM(profit) +Calculator 1 NULL 125 +Calculator 1 1999 50 +Calculator 1 2000 75 +Computer 1 NULL 4200 +Computer 1 1999 2700 +Computer 1 2000 1500 +TV 1 NULL 400 +TV 1 1999 250 +TV 1 2000 150 + +SELECT product, country_id , year, SUM(profit) FROM t1 +GROUP BY product, country_id, year WITH ROLLUP HAVING SUM(profit) > 200; +product country_id year SUM(profit) +NULL NULL NULL 7785 +Calculator NULL NULL 275 +Computer NULL NULL 6900 +Computer 1 NULL 4200 +Computer 1 1999 2700 +Computer 1 2000 1500 +Computer 2 NULL 2700 +Computer 2 2000 2700 +TV NULL NULL 600 +TV 1 NULL 400 +TV 1 1999 250 + +SELECT product, country_id , year, SUM(profit) FROM t1 +GROUP BY product, country_id, year WITH ROLLUP HAVING SUM(profit) > 7000; +product country_id year SUM(profit) +NULL NULL NULL 7785 + +# Functions +SELECT CONCAT(product,':',country_id) AS 'prod', CONCAT(":",year,":") AS 'year', +1+1, SUM(profit)/COUNT(*) FROM t1 GROUP BY 1,2 WITH ROLLUP; +prod year 1+1 SUM(profit)/COUNT(*) +NULL NULL 2 519.0000 +Calculator:1 NULL 2 62.5000 +Calculator:1 :1999: 2 50.0000 +Calculator:1 :2000: 2 75.0000 +Calculator:2 NULL 2 75.0000 +Calculator:2 :2000: 2 75.0000 +Computer:1 NULL 2 1400.0000 +Computer:1 :1999: 2 1350.0000 +Computer:1 :2000: 2 1500.0000 +Computer:2 NULL 2 1350.0000 +Computer:2 :2000: 2 1350.0000 +Phone:3 NULL 2 10.0000 +Phone:3 :2003: 2 10.0000 +TV:1 NULL 2 133.3333 +TV:1 :1999: 2 125.0000 +TV:1 :2000: 2 150.0000 +TV:2 NULL 2 100.0000 +TV:2 :2000: 2 100.0000 + +SELECT product, SUM(profit)/COUNT(*) FROM t1 GROUP BY product WITH ROLLUP; +product SUM(profit)/COUNT(*) +NULL 519.0000 +Calculator 68.7500 +Computer 1380.0000 +Phone 10.0000 +TV 120.0000 + +SELECT LEFT(product,4) AS prod, SUM(profit)/COUNT(*) FROM t1 +GROUP BY prod WITH ROLLUP; +prod SUM(profit)/COUNT(*) +NULL 519.0000 +Calc 68.7500 +Comp 1380.0000 +Phon 10.0000 +TV 120.0000 + +SELECT CONCAT(product,':',country_id), 1+1, SUM(profit)/COUNT(*) FROM t1 +GROUP BY CONCAT(product,':',country_id) WITH ROLLUP; +CONCAT(product,':',country_id) 1+1 SUM(profit)/COUNT(*) +NULL 2 519.0000 +Calculator:1 2 62.5000 +Calculator:2 2 75.0000 +Computer:1 2 1400.0000 +Computer:2 2 1350.0000 +Phone:3 2 10.0000 +TV:1 2 133.3333 +TV:2 2 100.0000 +SET @saved_sql_mode = @@session.sql_mode; +SET SESSION sql_mode= ''; + +SELECT UPPER(product) AS prod, +SUM(profit)/COUNT(*) +FROM t1 GROUP BY prod WITH ROLLUP HAVING prod='COMPUTER' ; +prod SUM(profit)/COUNT(*) +COMPUTER 1380.0000 +SET SESSION sql_mode= @saved_sql_mode; + +# Joins +SELECT product, country , year, SUM(profit) FROM t1,t2 WHERE +t1.country_id=t2.country_id GROUP BY product, country, year WITH ROLLUP; +product country year SUM(profit) +NULL NULL NULL 7785 +Calculator NULL NULL 275 +Calculator India NULL 150 +Calculator India 2000 150 +Calculator USA NULL 125 +Calculator USA 1999 50 +Calculator USA 2000 75 +Computer NULL NULL 6900 +Computer India NULL 2700 +Computer India 2000 2700 +Computer USA NULL 4200 +Computer USA 1999 2700 +Computer USA 2000 1500 +Phone NULL NULL 10 +Phone Finland NULL 10 +Phone Finland 2003 10 +TV NULL NULL 600 +TV India NULL 200 +TV India 2000 200 +TV USA NULL 400 +TV USA 1999 250 +TV USA 2000 150 + +SELECT product, `SUM` FROM (SELECT product, SUM(profit) AS 'sum' FROM t1 +GROUP BY product WITH ROLLUP) AS tmp +WHERE product is null; +product SUM +NULL 7785 + +SELECT product FROM t1 WHERE EXISTS +(SELECT product, country_id , SUM(profit) FROM t1 AS t2 +WHERE t1.product=t2.product GROUP BY product, country_id WITH ROLLUP +HAVING SUM(profit) > 6000); +product +Computer +Computer +Computer +Computer +Computer + +SELECT product, country_id , year, SUM(profit) FROM t1 +GROUP BY product, country_id, year HAVING country_id is NULL; +product country_id year SUM(profit) + +SELECT CONCAT(':',product,':'), SUM(profit), AVG(profit) FROM t1 +GROUP BY product WITH ROLLUP; +CONCAT(':',product,':') SUM(profit) AVG(profit) +NULL 7785 519.0000 +:Calculator: 275 68.7500 +:Computer: 6900 1380.0000 +:Phone: 10 10.0000 +:TV: 600 120.0000 diff --git a/tests/integrationtest/t/executor/expand.test b/tests/integrationtest/t/executor/expand.test new file mode 100644 index 0000000000000..da83285249d7b --- /dev/null +++ b/tests/integrationtest/t/executor/expand.test @@ -0,0 +1,152 @@ +CREATE TABLE t1( +product VARCHAR(32), +country_id INTEGER NOT NULL, +year INTEGER, +profit INTEGER); + +INSERT INTO t1 VALUES ( 'Computer', 2,2000, 1200), +( 'TV', 1, 1999, 150), +( 'Calculator', 1, 1999,50), +( 'Computer', 1, 1999,1500), +( 'Computer', 1, 2000,1500), +( 'TV', 1, 2000, 150), +( 'TV', 2, 2000, 100), +( 'TV', 2, 2000, 100), +( 'Calculator', 1, 2000,75), +( 'Calculator', 2, 2000,75), +( 'TV', 1, 1999, 100), +( 'Computer', 1, 1999,1200), +( 'Computer', 2, 2000,1500), +( 'Calculator', 2, 2000,75), +( 'Phone', 3, 2003,10) +; + +CREATE TABLE t2 ( +country_id INTEGER PRIMARY KEY, +country CHAR(20) NOT NULL); + +INSERT INTO t2 VALUES (1, 'USA'),(2,'India'), (3,'Finland'); + +--echo # First simple rollups, with just grand total +--echo +--sorted_result +SELECT product, SUM(profit) FROM t1 GROUP BY product; +--echo +--sorted_result +SELECT product, SUM(profit) FROM t1 GROUP BY product WITH ROLLUP; +--echo +--sorted_result +SELECT product, SUM(profit) FROM t1 GROUP BY 1 WITH ROLLUP; +--echo +--sorted_result +SELECT product, SUM(profit),AVG(profit) FROM t1 GROUP BY product WITH ROLLUP; + +--echo +--echo # Sub totals +--sorted_result +SELECT product, country_id , year, SUM(profit) FROM t1 +GROUP BY product, country_id, year; +--echo +--sorted_result +SELECT product, country_id , year, SUM(profit) FROM t1 +GROUP BY product, country_id, year WITH ROLLUP; +--echo +ANALYZE TABLE t1; +--echo +EXPLAIN FORMAT='brief' SELECT product, country_id , year, SUM(profit) +FROM t1 GROUP BY product, country_id, year WITH ROLLUP; +--echo +SELECT product, country_id , SUM(profit) FROM t1 +GROUP BY product, country_id WITH ROLLUP +ORDER BY product DESC, country_id; + +--echo +--echo # limit +--sorted_result +SELECT product, country_id , year, SUM(profit) FROM t1 +GROUP BY product, country_id, year WITH ROLLUP +ORDER BY product, country_id, year LIMIT 5; +--echo +--sorted_result +SELECT product, country_id , year, SUM(profit) FROM t1 +GROUP BY product, country_id, year WITH ROLLUP +ORDER BY product, country_id, year limit 3,3; +--echo +--sorted_result +SELECT product, country_id, COUNT(*), COUNT(distinct year) +FROM t1 GROUP BY product, country_id; +--echo +--sorted_result +SELECT product, country_id, COUNT(*), COUNT(distinct year) +FROM t1 GROUP BY product, country_id WITH ROLLUP; + +--echo +--echo # Test of having +--sorted_result +SELECT product, country_id , year, SUM(profit) FROM t1 +GROUP BY product, country_id, year WITH ROLLUP HAVING country_id = 1; +--echo +--sorted_result +SELECT product, country_id , year, SUM(profit) FROM t1 +GROUP BY product, country_id, year WITH ROLLUP HAVING SUM(profit) > 200; +--echo +--sorted_result +SELECT product, country_id , year, SUM(profit) FROM t1 +GROUP BY product, country_id, year WITH ROLLUP HAVING SUM(profit) > 7000; + +--echo +--echo # Functions +--sorted_result +SELECT CONCAT(product,':',country_id) AS 'prod', CONCAT(":",year,":") AS 'year', +1+1, SUM(profit)/COUNT(*) FROM t1 GROUP BY 1,2 WITH ROLLUP; +--echo +--sorted_result +SELECT product, SUM(profit)/COUNT(*) FROM t1 GROUP BY product WITH ROLLUP; +--echo +--sorted_result +SELECT LEFT(product,4) AS prod, SUM(profit)/COUNT(*) FROM t1 +GROUP BY prod WITH ROLLUP; +--echo +--sorted_result +SELECT CONCAT(product,':',country_id), 1+1, SUM(profit)/COUNT(*) FROM t1 +GROUP BY CONCAT(product,':',country_id) WITH ROLLUP; + +SET @saved_sql_mode = @@session.sql_mode; +SET SESSION sql_mode= ''; +--echo +--sorted_result +SELECT UPPER(product) AS prod, + SUM(profit)/COUNT(*) + FROM t1 GROUP BY prod WITH ROLLUP HAVING prod='COMPUTER' ; +SET SESSION sql_mode= @saved_sql_mode; + +--echo +--echo # Joins +--sorted_result +SELECT product, country , year, SUM(profit) FROM t1,t2 WHERE +t1.country_id=t2.country_id GROUP BY product, country, year WITH ROLLUP; + +--echo +--sorted_result +SELECT product, `SUM` FROM (SELECT product, SUM(profit) AS 'sum' FROM t1 + GROUP BY product WITH ROLLUP) AS tmp +WHERE product is null; + +--echo +--sorted_result +SELECT product FROM t1 WHERE EXISTS +(SELECT product, country_id , SUM(profit) FROM t1 AS t2 + WHERE t1.product=t2.product GROUP BY product, country_id WITH ROLLUP + HAVING SUM(profit) > 6000); + +--echo +--sorted_result +# The following does not return the expected answer, but this is a limitation +# in the implementation so we should just document it +SELECT product, country_id , year, SUM(profit) FROM t1 +GROUP BY product, country_id, year HAVING country_id is NULL; + +--echo +--sorted_result +SELECT CONCAT(':',product,':'), SUM(profit), AVG(profit) FROM t1 +GROUP BY product WITH ROLLUP; From 082a620cc9c8bd535a78bf0fcfd4f58fb0e7db32 Mon Sep 17 00:00:00 2001 From: AilinKid <314806019@qq.com> Date: Tue, 30 Jul 2024 12:06:10 +0800 Subject: [PATCH 2/2] . Signed-off-by: AilinKid <314806019@qq.com> --- pkg/planner/core/logical_expand.go | 393 ------------------ pkg/planner/core/logical_plans.go | 9 + .../integrationtest/r/executor/expand.result | 326 --------------- tests/integrationtest/t/executor/expand.test | 152 ------- 4 files changed, 9 insertions(+), 871 deletions(-) delete mode 100644 pkg/planner/core/logical_expand.go delete mode 100644 tests/integrationtest/r/executor/expand.result delete mode 100644 tests/integrationtest/t/executor/expand.test diff --git a/pkg/planner/core/logical_expand.go b/pkg/planner/core/logical_expand.go deleted file mode 100644 index 83260219596c2..0000000000000 --- a/pkg/planner/core/logical_expand.go +++ /dev/null @@ -1,393 +0,0 @@ -// Copyright 2024 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package core - -import ( - "bytes" - "fmt" - - "github.com/pingcap/tidb/pkg/expression" - "github.com/pingcap/tidb/pkg/planner/core/base" - "github.com/pingcap/tidb/pkg/planner/core/operator/logicalop" - fd "github.com/pingcap/tidb/pkg/planner/funcdep" - "github.com/pingcap/tidb/pkg/planner/property" - "github.com/pingcap/tidb/pkg/planner/util/optimizetrace" - "github.com/pingcap/tidb/pkg/planner/util/optimizetrace/logicaltrace" - "github.com/pingcap/tidb/pkg/types" - "github.com/pingcap/tidb/pkg/util/dbterror/plannererrors" - "github.com/pingcap/tidb/pkg/util/plancodec" - "github.com/pingcap/tipb/go-tipb" -) - -// LogicalExpand represents a logical Expand OP serves for data replication requirement. -type LogicalExpand struct { - logicalop.LogicalSchemaProducer - - // distinct group by columns. (maybe projected below if it's a non-col) - DistinctGroupByCol []*expression.Column - DistinctGbyColNames []*types.FieldName - // keep the old gbyExprs for resolve cases like grouping(a+b), the args: - // a+b should be resolved to new projected gby col according to ref pos. - DistinctGbyExprs []expression.Expression - - // rollup grouping sets. - DistinctSize int - RollupGroupingSets expression.GroupingSets - RollupID2GIDS map[int]map[uint64]struct{} - RollupGroupingIDs []uint64 - - // The level projections is generated from grouping sets,make execution more clearly. - LevelExprs [][]expression.Expression - - // The generated column names. Eg: "grouping_id" and so on. - ExtraGroupingColNames []string - - // GroupingMode records the grouping id allocation mode. - GroupingMode tipb.GroupingMode - - // The GID and GPos column generated by logical expand if any. - GID *expression.Column - GIDName *types.FieldName - GPos *expression.Column - GPosName *types.FieldName -} - -// Init initializes LogicalProjection. -func (p LogicalExpand) Init(ctx base.PlanContext, offset int) *LogicalExpand { - p.BaseLogicalPlan = logicalop.NewBaseLogicalPlan(ctx, plancodec.TypeExpand, &p, offset) - return &p -} - -// *************************** start implementation of logicalPlan interface *************************** - -// HashCode inherits BaseLogicalPlan.LogicalPlan.<0th> implementation. - -// PredicatePushDown implements base.LogicalPlan.<1st> interface. -func (p *LogicalExpand) PredicatePushDown(predicates []expression.Expression, opt *optimizetrace.LogicalOptimizeOp) (ret []expression.Expression, retPlan base.LogicalPlan) { - // Note that, grouping column related predicates can't be pushed down, since grouping column has nullability change after Expand OP itself. - // condition related with grouping column shouldn't be pushed down through it. - // currently, since expand is adjacent to aggregate, any filter above aggregate wanted to be push down through expand only have two cases: - // 1. agg function related filters. (these condition is always above aggregate) - // 2. group-by item related filters. (there condition is always related with grouping sets columns, which can't be pushed down) - // As a whole, we banned all the predicates pushing-down logic here that remained in Expand OP, and constructing a new selection above it if any. - remained, child := p.BaseLogicalPlan.PredicatePushDown(nil, opt) - return append(remained, predicates...), child -} - -// PruneColumns implement the base.LogicalPlan.<2nd> interface. -// logicExpand is built in the logical plan building phase, where all the column prune is not done yet. So the -// expand projection expressions is meaningless if it built at that time. (we only maintain its schema, while -// the level projection expressions construction is left to the last logical optimize rule) -// -// so when do the rule_column_pruning here, we just prune the schema is enough. -func (p *LogicalExpand) PruneColumns(parentUsedCols []*expression.Column, opt *optimizetrace.LogicalOptimizeOp) (base.LogicalPlan, error) { - // Expand need those extra redundant distinct group by columns projected from underlying projection. - // distinct GroupByCol must be used by aggregate above, to make sure this, append DistinctGroupByCol again. - parentUsedCols = append(parentUsedCols, p.DistinctGroupByCol...) - used := expression.GetUsedList(p.SCtx().GetExprCtx().GetEvalCtx(), parentUsedCols, p.Schema()) - prunedColumns := make([]*expression.Column, 0) - for i := len(used) - 1; i >= 0; i-- { - if !used[i] { - prunedColumns = append(prunedColumns, p.Schema().Columns[i]) - p.Schema().Columns = append(p.Schema().Columns[:i], p.Schema().Columns[i+1:]...) - p.SetOutputNames(append(p.OutputNames()[:i], p.OutputNames()[i+1:]...)) - } - } - logicaltrace.AppendColumnPruneTraceStep(p, prunedColumns, opt) - // Underlying still need to keep the distinct group by columns and parent used columns. - var err error - p.Children()[0], err = p.Children()[0].PruneColumns(parentUsedCols, opt) - if err != nil { - return nil, err - } - return p, nil -} - -// FindBestTask inherits BaseLogicalPlan.LogicalPlan.<3rd> implementation. - -// BuildKeyInfo inherits BaseLogicalPlan.LogicalPlan.<4th> implementation. - -// PushDownTopN inherits BaseLogicalPlan.LogicalPlan.<5th> implementation. - -// DeriveTopN inherits BaseLogicalPlan.LogicalPlan.<6th> implementation. - -// PredicateSimplification inherits BaseLogicalPlan.LogicalPlan.<7th> implementation. - -// ConstantPropagation inherits BaseLogicalPlan.LogicalPlan.<8th> implementation. - -// PullUpConstantPredicates inherits BaseLogicalPlan.LogicalPlan.<9th> implementation. - -// RecursiveDeriveStats inherits BaseLogicalPlan.LogicalPlan.<10th> implementation. - -// DeriveStats inherits BaseLogicalPlan.LogicalPlan.<11th> implementation. - -// ExtractColGroups inherits BaseLogicalPlan.LogicalPlan.<12th> implementation. - -// PreparePossibleProperties inherits BaseLogicalPlan.LogicalPlan.<13th> implementation. - -// ExhaustPhysicalPlans implements base.LogicalPlan.<14th> interface. -func (p *LogicalExpand) ExhaustPhysicalPlans(prop *property.PhysicalProperty) ([]base.PhysicalPlan, bool, error) { - return exhaustPhysicalPlans4LogicalExpand(p, prop) -} - -// ExtractCorrelatedCols implements base.LogicalPlan.<15th> interface. -func (p *LogicalExpand) ExtractCorrelatedCols() []*expression.CorrelatedColumn { - // if p.LevelExprs is nil, it means the GenLevelProjections has not been called yet, - // which is done in logical optimizing phase. While for building correlated subquery - // plan, the ExtractCorrelatedCols will be called once after building, so we should - // distinguish the case here. - if p.LevelExprs == nil { - // since level projections generation don't produce any correlated columns, just - // return nil. - return nil - } - corCols := make([]*expression.CorrelatedColumn, 0, len(p.LevelExprs[0])) - for _, lExpr := range p.LevelExprs { - for _, expr := range lExpr { - corCols = append(corCols, expression.ExtractCorColumns(expr)...) - } - } - return corCols -} - -// MaxOneRow inherits BaseLogicalPlan.LogicalPlan.<16th> implementation. - -// Children inherits BaseLogicalPlan.LogicalPlan.<17th> implementation. - -// SetChildren inherits BaseLogicalPlan.LogicalPlan.<18th> implementation. - -// SetChild inherits BaseLogicalPlan.LogicalPlan.<19th> implementation. - -// RollBackTaskMap inherits BaseLogicalPlan.LogicalPlan.<20th> implementation. - -// CanPushToCop inherits BaseLogicalPlan.LogicalPlan.<21st> implementation. - -// ExtractFD implements the base.LogicalPlan.<22nd> interface, extracting the FD from bottom up. -func (p *LogicalExpand) ExtractFD() *fd.FDSet { - // basically extract the children's fdSet. - return p.LogicalSchemaProducer.ExtractFD() -} - -// GetBaseLogicalPlan inherits BaseLogicalPlan.LogicalPlan.<23rd> implementation. - -// ConvertOuterToInnerJoin inherits BaseLogicalPlan.LogicalPlan.<24th> implementation. - -// *************************** end implementation of logicalPlan interface *************************** - -// GetUsedCols extracts all of the Columns used by proj. -func (*LogicalExpand) GetUsedCols() (usedCols []*expression.Column) { - // be careful that, expand OP itself, shouldn't output its own used cols, because - // it just replicates the child's schema by defined grouping sets. (pass down what - // the parent's used is enough here) - return usedCols -} - -// GenLevelProjections is used to generate level projections after all the necessary logical -// optimization is done such as column pruning. -func (p *LogicalExpand) GenLevelProjections() { - // get all the grouping cols. - groupingSetCols := p.RollupGroupingSets.AllSetsColIDs() - p.DistinctSize, p.RollupGroupingIDs, p.RollupID2GIDS = p.RollupGroupingSets.DistinctSize() - hasDuplicateGroupingSet := len(p.RollupGroupingSets) != p.DistinctSize - schemaCols := p.Schema().Columns - // last two schema col is about gid and gpos if any. - nonGenCols := schemaCols[:len(schemaCols)-1] - gidCol := schemaCols[len(schemaCols)-1] - if hasDuplicateGroupingSet { - // last two schema col is about gid and gpos. - nonGenCols = schemaCols[:len(schemaCols)-2] - gidCol = schemaCols[len(schemaCols)-2] - } - - // for every rollup grouping set, gen its level projection. - for offset, curGroupingSet := range p.RollupGroupingSets { - levelProj := make([]expression.Expression, 0, p.Schema().Len()) - for _, oneCol := range nonGenCols { - // if this col is in the grouping-set-cols and this col is not needed by current grouping-set, just set it as null value with specified fieldType. - if groupingSetCols.Has(int(oneCol.UniqueID)) { - if curGroupingSet.AllColIDs().Has(int(oneCol.UniqueID)) { - // needed col in current grouping set: project it as col-ref. - levelProj = append(levelProj, oneCol) - } else { - // un-needed col in current grouping set: project it as null value. - nullValue := expression.NewNullWithFieldType(oneCol.RetType.Clone()) - levelProj = append(levelProj, nullValue) - } - } else { - // other un-related cols: project it as col-ref. - levelProj = append(levelProj, oneCol) - } - } - // generate the grouping_id projection expr, project it as uint64. - gid := p.GenerateGroupingIDModeBitAnd(curGroupingSet) - if p.GroupingMode == tipb.GroupingMode_ModeNumericSet { - gid = p.GenerateGroupingIDIncrementModeNumericSet(offset) - } - gidValue := expression.NewUInt64ConstWithFieldType(gid, gidCol.RetType.Clone()) - levelProj = append(levelProj, gidValue) - - // generate the grouping_pos projection expr, project it as uint64 if any. - if hasDuplicateGroupingSet { - gposCol := schemaCols[len(schemaCols)-1] - // gpos value can equal the grouping set index offset. - gpos := expression.NewUInt64ConstWithFieldType(uint64(offset), gposCol.RetType.Clone()) - // gen-col: project it as uint64. - levelProj = append(levelProj, gpos) - } - p.LevelExprs = append(p.LevelExprs, levelProj) - } -} - -// GenerateGroupingMarks generate the groupingMark for the source column specified in grouping function. -func (p *LogicalExpand) GenerateGroupingMarks(sourceCols []*expression.Column) []map[uint64]struct{} { - // Since grouping function may have multi args like grouping(a,b), so the source columns may greater than 1. - // reference: https://dev.mysql.com/blog-archive/mysql-8-0-grouping-function/ - // Let's say GROUPING(b,a) group by a,b with rollup. (Note the b,a sequence is reversed from gby item) - // if GROUPING (b,a) returns 3, it means that NULL in column “b” and NULL in column “a” for that row is - // produce by a ROLLUP operation. If result is 2, NULL in column “a” alone is a result of ROLLUP operation. - // - // Formula: GROUPING(x,y,z) = GROUPING(x) << 2 + GROUPING(y) << 1 + GROUPING(z) - // - // so for the multi args GROUPING FUNCTION, we should return all the simple col grouping marks. When evaluating, - // after all grouping marks are & with gid in sequence, the final res is derived as the formula said. This also - // means that the grouping function accepts a maximum of 64 parameters. - resSliceMap := make([]map[uint64]struct{}, 0, len(sourceCols)) - if p.GroupingMode == tipb.GroupingMode_ModeBitAnd { - for _, oneCol := range sourceCols { - resMap := make(map[uint64]struct{}, 1) - res := uint64(0) - // from high pos to low pos. - for i := len(p.DistinctGroupByCol) - 1; i >= 0; i-- { - // left shift. - res = res << 1 - if p.DistinctGroupByCol[i].UniqueID == oneCol.UniqueID { - // fill the corresponding col pos as 1 as bitMark. - // eg: say distinctGBY [x,y,z] and GROUPING(x) with '100'. - // When any groupingID & 100 > 0 means the source column x - // is needed in this grouping set and is not grouped, so res = 0. - res = res | 1 - } - } - resMap[res] = struct{}{} - resSliceMap = append(resSliceMap, resMap) - } - return resSliceMap - } - // For GroupingMode_ModeNumericSet mode, for every simple col, its grouping marks is an id slice rather than a bit map. - // For example, GROUPING(x,y,z) returns 6 it means: GROUPING(x) is 1, GROUPING(y) is 1 and GROUPING(z) is 0, in which - // we should also return all these three single column grouping marks as function meta to GROUPING FUNCTION. - for _, oneCol := range sourceCols { - resSliceMap = append(resSliceMap, p.RollupID2GIDS[int(oneCol.UniqueID)]) - } - return resSliceMap -} - -func (p *LogicalExpand) trySubstituteExprWithGroupingSetCol(expr expression.Expression) (expression.Expression, bool) { - // since all the original group items has been projected even single col, - // let's check the origin gby expression here, and map it to new gby col. - for i, oneExpr := range p.DistinctGbyExprs { - if bytes.Equal(expr.CanonicalHashCode(), oneExpr.CanonicalHashCode()) { - // found - return p.DistinctGroupByCol[i], true - } - } - // not found. - return expr, false -} - -// CheckGroupingFuncArgsInGroupBy checks whether grouping function args is in grouping items. -func (p *LogicalExpand) resolveGroupingFuncArgsInGroupBy(groupingFuncArgs []expression.Expression) ([]*expression.Column, error) { - // build GBYColMap - distinctGBYColMap := make(map[int64]struct{}, len(p.DistinctGroupByCol)) - for _, oneDistinctGBYCol := range p.DistinctGroupByCol { - distinctGBYColMap[oneDistinctGBYCol.UniqueID] = struct{}{} - } - var refPos int - rewrittenArgCols := make([]*expression.Column, 0, len(groupingFuncArgs)) - for argIdx, oneArg := range groupingFuncArgs { - refPos = -1 - // since all the original group items has been projected even single col, - // let's check the origin gby expression here, and map it to new gby col. - for i, oneExpr := range p.DistinctGbyExprs { - if bytes.Equal(oneArg.CanonicalHashCode(), oneExpr.CanonicalHashCode()) { - refPos = i - break - } - } - if refPos != -1 { - // directly ref original group by expressions. - rewrittenArgCols = append(rewrittenArgCols, p.DistinctGroupByCol[refPos]) - } else { - // case for refPos == -1 - // since for case like: select year from t group by year, country with rollup order by grouping(year) - // when encountering build grouping(year), the args it received has already been substituted as grouping - // set column year' rather than the original year anymore via first projection select item with pos 0. just check it! - find := false - if argCol, ok1 := oneArg.(*expression.Column); ok1 { - if _, ok2 := distinctGBYColMap[argCol.UniqueID]; ok2 { - rewrittenArgCols = append(rewrittenArgCols, argCol) - find = true - } - } - if !find { - return nil, plannererrors.ErrFieldInGroupingNotGroupBy.GenWithStackByArgs(fmt.Sprintf("#%d", argIdx)) - } - } - } - return rewrittenArgCols, nil -} - -// GenerateGroupingIDModeBitAnd is used to generate convenient groupingID for quick computation of grouping function. -// A bit in the bitmask is corresponding to an attribute in the group by attributes sequence, the selected attribute -// has corresponding bit set to 0 and otherwise set to 1. Example, if we have GroupBy attributes(a,b,c,d), the bitmask -// 5 (whose binary form is 0101) represents grouping set (a,c). -func (p *LogicalExpand) GenerateGroupingIDModeBitAnd(oneSet expression.GroupingSet) uint64 { - // say distinctGbyCols : a, b, c - // bit pos index : 0, 1, 2 - // current grouping set is : {a, c} - // +---- mark the corresponding pos as 1 then get ---> 101 - // for special case : {a,a,c} and {a,c}: this two logical same grouping set naturally share the same gid bits: 101 - idsNeeded := oneSet.AllColIDs() - res := uint64(0) - // from high pos to low pos. - for i := len(p.DistinctGroupByCol) - 1; i >= 0; i-- { - // left shift. - res = res << 1 - if idsNeeded.Has(int(p.DistinctGroupByCol[i].UniqueID)) { - // col is needed, fill the corresponding pos as 1. - res = res | 1 - } - } - // how to use it, eg: when encountering a grouping function like: grouping(a), we can know the column a's pos index in distinctGbyCols - // is about 0, then we can get the mask as 001 which will be returned back as this grouping function's meta when rewriting it, then we - // can use the bit mask to BitAnd(OP) groupingID column when evaluating, when the result is not 0, then for this row, it's column 'a' - // is not grouped, marking them as 0, otherwise marking them as 1. - return res -} - -// GenerateGroupingIDIncrementModeNumericSet is used to generate grouping ids when the num of grouping sets is greater than 64. -// Under this circumstance, bitAnd uint64 doesn't have enough capacity to set those bits, so incremental grouping ID set is chosen. -func (p *LogicalExpand) GenerateGroupingIDIncrementModeNumericSet(oneSetOffset int) uint64 { - // say distinctGbyCols : a, b, c - // say grouping sets : {a,b,c}, {a,b}, {a}, {} <----+ (store the mapping as grouping sets meta) - // we can just set its gid : 0, 1 2 3 <----+ - // just keep this mapping logic stored as meta, and return the defined id back generated from this defined rule. - // for special case : {a,a,c} and {a,c}: this two logical same grouping set naturally share the same gid allocation! - return p.RollupGroupingIDs[oneSetOffset] - // how to use it, eg: when encountering a grouping function like: grouping(a), we should dig down to related Expand operator and - // found it in meta that: column 'a' is in grouping set {a,b,c}, {a,b}, {a}, and its correspondent mapping grouping ids is about - // {0,1,2}. This grouping id set is returned back as this grouping function's specified meta when rewriting the grouping function, - // and the evaluating logic is quite simple as IN compare. -} diff --git a/pkg/planner/core/logical_plans.go b/pkg/planner/core/logical_plans.go index c507dd69f1df3..5681d2c70c825 100644 --- a/pkg/planner/core/logical_plans.go +++ b/pkg/planner/core/logical_plans.go @@ -564,6 +564,15 @@ func (p *LogicalExpand) ExtractFD() *fd.FDSet { // ExtractCorrelatedCols implements LogicalPlan interface. func (p *LogicalExpand) ExtractCorrelatedCols() []*expression.CorrelatedColumn { + // if p.LevelExprs is nil, it means the GenLevelProjections has not been called yet, + // which is done in logical optimizing phase. While for building correlated subquery + // plan, the ExtractCorrelatedCols will be called once after building, so we should + // distinguish the case here. + if p.LevelExprs == nil { + // since level projections generation don't produce any correlated columns, just + // return nil. + return nil + } corCols := make([]*expression.CorrelatedColumn, 0, len(p.LevelExprs[0])) for _, lExpr := range p.LevelExprs { for _, expr := range lExpr { diff --git a/tests/integrationtest/r/executor/expand.result b/tests/integrationtest/r/executor/expand.result deleted file mode 100644 index 6f71f6142f1a4..0000000000000 --- a/tests/integrationtest/r/executor/expand.result +++ /dev/null @@ -1,326 +0,0 @@ -CREATE TABLE t1( -product VARCHAR(32), -country_id INTEGER NOT NULL, -year INTEGER, -profit INTEGER); -INSERT INTO t1 VALUES ( 'Computer', 2,2000, 1200), -( 'TV', 1, 1999, 150), -( 'Calculator', 1, 1999,50), -( 'Computer', 1, 1999,1500), -( 'Computer', 1, 2000,1500), -( 'TV', 1, 2000, 150), -( 'TV', 2, 2000, 100), -( 'TV', 2, 2000, 100), -( 'Calculator', 1, 2000,75), -( 'Calculator', 2, 2000,75), -( 'TV', 1, 1999, 100), -( 'Computer', 1, 1999,1200), -( 'Computer', 2, 2000,1500), -( 'Calculator', 2, 2000,75), -( 'Phone', 3, 2003,10) -; -CREATE TABLE t2 ( -country_id INTEGER PRIMARY KEY, -country CHAR(20) NOT NULL); -INSERT INTO t2 VALUES (1, 'USA'),(2,'India'), (3,'Finland'); -# First simple rollups, with just grand total - -SELECT product, SUM(profit) FROM t1 GROUP BY product; -product SUM(profit) -Calculator 275 -Computer 6900 -Phone 10 -TV 600 - -SELECT product, SUM(profit) FROM t1 GROUP BY product WITH ROLLUP; -product SUM(profit) -NULL 7785 -Calculator 275 -Computer 6900 -Phone 10 -TV 600 - -SELECT product, SUM(profit) FROM t1 GROUP BY 1 WITH ROLLUP; -product SUM(profit) -NULL 7785 -Calculator 275 -Computer 6900 -Phone 10 -TV 600 - -SELECT product, SUM(profit),AVG(profit) FROM t1 GROUP BY product WITH ROLLUP; -product SUM(profit) AVG(profit) -NULL 7785 519.0000 -Calculator 275 68.7500 -Computer 6900 1380.0000 -Phone 10 10.0000 -TV 600 120.0000 - -# Sub totals -SELECT product, country_id , year, SUM(profit) FROM t1 -GROUP BY product, country_id, year; -product country_id year SUM(profit) -Calculator 1 1999 50 -Calculator 1 2000 75 -Calculator 2 2000 150 -Computer 1 1999 2700 -Computer 1 2000 1500 -Computer 2 2000 2700 -Phone 3 2003 10 -TV 1 1999 250 -TV 1 2000 150 -TV 2 2000 200 - -SELECT product, country_id , year, SUM(profit) FROM t1 -GROUP BY product, country_id, year WITH ROLLUP; -product country_id year SUM(profit) -NULL NULL NULL 7785 -Calculator NULL NULL 275 -Calculator 1 NULL 125 -Calculator 1 1999 50 -Calculator 1 2000 75 -Calculator 2 NULL 150 -Calculator 2 2000 150 -Computer NULL NULL 6900 -Computer 1 NULL 4200 -Computer 1 1999 2700 -Computer 1 2000 1500 -Computer 2 NULL 2700 -Computer 2 2000 2700 -Phone NULL NULL 10 -Phone 3 NULL 10 -Phone 3 2003 10 -TV NULL NULL 600 -TV 1 NULL 400 -TV 1 1999 250 -TV 1 2000 150 -TV 2 NULL 200 -TV 2 2000 200 - -ANALYZE TABLE t1; - -EXPLAIN FORMAT='brief' SELECT product, country_id , year, SUM(profit) -FROM t1 GROUP BY product, country_id, year WITH ROLLUP; -id estRows task access object operator info -Projection 8000.00 root Column#6->Column#11, Column#7->Column#12, Column#8->Column#13, Column#10 -└─HashAgg 8000.00 root group by:Column#16, Column#17, Column#18, Column#19, funcs:sum(Column#15)->Column#10, funcs:firstrow(Column#16)->Column#6, funcs:firstrow(Column#17)->Column#7, funcs:firstrow(Column#18)->Column#8 - └─Projection 10000.00 root cast(executor__expand.t1.profit, decimal(10,0) BINARY)->Column#15, Column#6->Column#16, Column#7->Column#17, Column#8->Column#18, gid->Column#19 - └─Expand 10000.00 root level-projection:[executor__expand.t1.profit, ->Column#6, ->Column#7, ->Column#8, 0->gid],[executor__expand.t1.profit, Column#6, ->Column#7, ->Column#8, 1->gid],[executor__expand.t1.profit, Column#6, Column#7, ->Column#8, 3->gid],[executor__expand.t1.profit, Column#6, Column#7, Column#8, 7->gid]; schema: [executor__expand.t1.profit,Column#6,Column#7,Column#8,gid] - └─Projection 10000.00 root executor__expand.t1.profit, executor__expand.t1.product->Column#6, executor__expand.t1.country_id->Column#7, executor__expand.t1.year->Column#8 - └─TableReader 10000.00 root data:TableFullScan - └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo - -SELECT product, country_id , SUM(profit) FROM t1 -GROUP BY product, country_id WITH ROLLUP -ORDER BY product DESC, country_id; -product country_id SUM(profit) -TV NULL 600 -TV 1 400 -TV 2 200 -Phone NULL 10 -Phone 3 10 -Computer NULL 6900 -Computer 1 4200 -Computer 2 2700 -Calculator NULL 275 -Calculator 1 125 -Calculator 2 150 -NULL NULL 7785 - -# limit -SELECT product, country_id , year, SUM(profit) FROM t1 -GROUP BY product, country_id, year WITH ROLLUP -ORDER BY product, country_id, year LIMIT 5; -product country_id year SUM(profit) -NULL NULL NULL 7785 -Calculator NULL NULL 275 -Calculator 1 NULL 125 -Calculator 1 1999 50 -Calculator 1 2000 75 - -SELECT product, country_id , year, SUM(profit) FROM t1 -GROUP BY product, country_id, year WITH ROLLUP -ORDER BY product, country_id, year limit 3,3; -product country_id year SUM(profit) -Calculator 1 1999 50 -Calculator 1 2000 75 -Calculator 2 NULL 150 - -SELECT product, country_id, COUNT(*), COUNT(distinct year) -FROM t1 GROUP BY product, country_id; -product country_id COUNT(*) COUNT(distinct year) -Calculator 1 2 2 -Calculator 2 2 1 -Computer 1 3 2 -Computer 2 2 1 -Phone 3 1 1 -TV 1 3 2 -TV 2 2 1 - -SELECT product, country_id, COUNT(*), COUNT(distinct year) -FROM t1 GROUP BY product, country_id WITH ROLLUP; -product country_id COUNT(*) COUNT(distinct year) -NULL NULL 15 3 -Calculator NULL 4 2 -Calculator 1 2 2 -Calculator 2 2 1 -Computer NULL 5 2 -Computer 1 3 2 -Computer 2 2 1 -Phone NULL 1 1 -Phone 3 1 1 -TV NULL 5 2 -TV 1 3 2 -TV 2 2 1 - -# Test of having -SELECT product, country_id , year, SUM(profit) FROM t1 -GROUP BY product, country_id, year WITH ROLLUP HAVING country_id = 1; -product country_id year SUM(profit) -Calculator 1 NULL 125 -Calculator 1 1999 50 -Calculator 1 2000 75 -Computer 1 NULL 4200 -Computer 1 1999 2700 -Computer 1 2000 1500 -TV 1 NULL 400 -TV 1 1999 250 -TV 1 2000 150 - -SELECT product, country_id , year, SUM(profit) FROM t1 -GROUP BY product, country_id, year WITH ROLLUP HAVING SUM(profit) > 200; -product country_id year SUM(profit) -NULL NULL NULL 7785 -Calculator NULL NULL 275 -Computer NULL NULL 6900 -Computer 1 NULL 4200 -Computer 1 1999 2700 -Computer 1 2000 1500 -Computer 2 NULL 2700 -Computer 2 2000 2700 -TV NULL NULL 600 -TV 1 NULL 400 -TV 1 1999 250 - -SELECT product, country_id , year, SUM(profit) FROM t1 -GROUP BY product, country_id, year WITH ROLLUP HAVING SUM(profit) > 7000; -product country_id year SUM(profit) -NULL NULL NULL 7785 - -# Functions -SELECT CONCAT(product,':',country_id) AS 'prod', CONCAT(":",year,":") AS 'year', -1+1, SUM(profit)/COUNT(*) FROM t1 GROUP BY 1,2 WITH ROLLUP; -prod year 1+1 SUM(profit)/COUNT(*) -NULL NULL 2 519.0000 -Calculator:1 NULL 2 62.5000 -Calculator:1 :1999: 2 50.0000 -Calculator:1 :2000: 2 75.0000 -Calculator:2 NULL 2 75.0000 -Calculator:2 :2000: 2 75.0000 -Computer:1 NULL 2 1400.0000 -Computer:1 :1999: 2 1350.0000 -Computer:1 :2000: 2 1500.0000 -Computer:2 NULL 2 1350.0000 -Computer:2 :2000: 2 1350.0000 -Phone:3 NULL 2 10.0000 -Phone:3 :2003: 2 10.0000 -TV:1 NULL 2 133.3333 -TV:1 :1999: 2 125.0000 -TV:1 :2000: 2 150.0000 -TV:2 NULL 2 100.0000 -TV:2 :2000: 2 100.0000 - -SELECT product, SUM(profit)/COUNT(*) FROM t1 GROUP BY product WITH ROLLUP; -product SUM(profit)/COUNT(*) -NULL 519.0000 -Calculator 68.7500 -Computer 1380.0000 -Phone 10.0000 -TV 120.0000 - -SELECT LEFT(product,4) AS prod, SUM(profit)/COUNT(*) FROM t1 -GROUP BY prod WITH ROLLUP; -prod SUM(profit)/COUNT(*) -NULL 519.0000 -Calc 68.7500 -Comp 1380.0000 -Phon 10.0000 -TV 120.0000 - -SELECT CONCAT(product,':',country_id), 1+1, SUM(profit)/COUNT(*) FROM t1 -GROUP BY CONCAT(product,':',country_id) WITH ROLLUP; -CONCAT(product,':',country_id) 1+1 SUM(profit)/COUNT(*) -NULL 2 519.0000 -Calculator:1 2 62.5000 -Calculator:2 2 75.0000 -Computer:1 2 1400.0000 -Computer:2 2 1350.0000 -Phone:3 2 10.0000 -TV:1 2 133.3333 -TV:2 2 100.0000 -SET @saved_sql_mode = @@session.sql_mode; -SET SESSION sql_mode= ''; - -SELECT UPPER(product) AS prod, -SUM(profit)/COUNT(*) -FROM t1 GROUP BY prod WITH ROLLUP HAVING prod='COMPUTER' ; -prod SUM(profit)/COUNT(*) -COMPUTER 1380.0000 -SET SESSION sql_mode= @saved_sql_mode; - -# Joins -SELECT product, country , year, SUM(profit) FROM t1,t2 WHERE -t1.country_id=t2.country_id GROUP BY product, country, year WITH ROLLUP; -product country year SUM(profit) -NULL NULL NULL 7785 -Calculator NULL NULL 275 -Calculator India NULL 150 -Calculator India 2000 150 -Calculator USA NULL 125 -Calculator USA 1999 50 -Calculator USA 2000 75 -Computer NULL NULL 6900 -Computer India NULL 2700 -Computer India 2000 2700 -Computer USA NULL 4200 -Computer USA 1999 2700 -Computer USA 2000 1500 -Phone NULL NULL 10 -Phone Finland NULL 10 -Phone Finland 2003 10 -TV NULL NULL 600 -TV India NULL 200 -TV India 2000 200 -TV USA NULL 400 -TV USA 1999 250 -TV USA 2000 150 - -SELECT product, `SUM` FROM (SELECT product, SUM(profit) AS 'sum' FROM t1 -GROUP BY product WITH ROLLUP) AS tmp -WHERE product is null; -product SUM -NULL 7785 - -SELECT product FROM t1 WHERE EXISTS -(SELECT product, country_id , SUM(profit) FROM t1 AS t2 -WHERE t1.product=t2.product GROUP BY product, country_id WITH ROLLUP -HAVING SUM(profit) > 6000); -product -Computer -Computer -Computer -Computer -Computer - -SELECT product, country_id , year, SUM(profit) FROM t1 -GROUP BY product, country_id, year HAVING country_id is NULL; -product country_id year SUM(profit) - -SELECT CONCAT(':',product,':'), SUM(profit), AVG(profit) FROM t1 -GROUP BY product WITH ROLLUP; -CONCAT(':',product,':') SUM(profit) AVG(profit) -NULL 7785 519.0000 -:Calculator: 275 68.7500 -:Computer: 6900 1380.0000 -:Phone: 10 10.0000 -:TV: 600 120.0000 diff --git a/tests/integrationtest/t/executor/expand.test b/tests/integrationtest/t/executor/expand.test deleted file mode 100644 index da83285249d7b..0000000000000 --- a/tests/integrationtest/t/executor/expand.test +++ /dev/null @@ -1,152 +0,0 @@ -CREATE TABLE t1( -product VARCHAR(32), -country_id INTEGER NOT NULL, -year INTEGER, -profit INTEGER); - -INSERT INTO t1 VALUES ( 'Computer', 2,2000, 1200), -( 'TV', 1, 1999, 150), -( 'Calculator', 1, 1999,50), -( 'Computer', 1, 1999,1500), -( 'Computer', 1, 2000,1500), -( 'TV', 1, 2000, 150), -( 'TV', 2, 2000, 100), -( 'TV', 2, 2000, 100), -( 'Calculator', 1, 2000,75), -( 'Calculator', 2, 2000,75), -( 'TV', 1, 1999, 100), -( 'Computer', 1, 1999,1200), -( 'Computer', 2, 2000,1500), -( 'Calculator', 2, 2000,75), -( 'Phone', 3, 2003,10) -; - -CREATE TABLE t2 ( -country_id INTEGER PRIMARY KEY, -country CHAR(20) NOT NULL); - -INSERT INTO t2 VALUES (1, 'USA'),(2,'India'), (3,'Finland'); - ---echo # First simple rollups, with just grand total ---echo ---sorted_result -SELECT product, SUM(profit) FROM t1 GROUP BY product; ---echo ---sorted_result -SELECT product, SUM(profit) FROM t1 GROUP BY product WITH ROLLUP; ---echo ---sorted_result -SELECT product, SUM(profit) FROM t1 GROUP BY 1 WITH ROLLUP; ---echo ---sorted_result -SELECT product, SUM(profit),AVG(profit) FROM t1 GROUP BY product WITH ROLLUP; - ---echo ---echo # Sub totals ---sorted_result -SELECT product, country_id , year, SUM(profit) FROM t1 -GROUP BY product, country_id, year; ---echo ---sorted_result -SELECT product, country_id , year, SUM(profit) FROM t1 -GROUP BY product, country_id, year WITH ROLLUP; ---echo -ANALYZE TABLE t1; ---echo -EXPLAIN FORMAT='brief' SELECT product, country_id , year, SUM(profit) -FROM t1 GROUP BY product, country_id, year WITH ROLLUP; ---echo -SELECT product, country_id , SUM(profit) FROM t1 -GROUP BY product, country_id WITH ROLLUP -ORDER BY product DESC, country_id; - ---echo ---echo # limit ---sorted_result -SELECT product, country_id , year, SUM(profit) FROM t1 -GROUP BY product, country_id, year WITH ROLLUP -ORDER BY product, country_id, year LIMIT 5; ---echo ---sorted_result -SELECT product, country_id , year, SUM(profit) FROM t1 -GROUP BY product, country_id, year WITH ROLLUP -ORDER BY product, country_id, year limit 3,3; ---echo ---sorted_result -SELECT product, country_id, COUNT(*), COUNT(distinct year) -FROM t1 GROUP BY product, country_id; ---echo ---sorted_result -SELECT product, country_id, COUNT(*), COUNT(distinct year) -FROM t1 GROUP BY product, country_id WITH ROLLUP; - ---echo ---echo # Test of having ---sorted_result -SELECT product, country_id , year, SUM(profit) FROM t1 -GROUP BY product, country_id, year WITH ROLLUP HAVING country_id = 1; ---echo ---sorted_result -SELECT product, country_id , year, SUM(profit) FROM t1 -GROUP BY product, country_id, year WITH ROLLUP HAVING SUM(profit) > 200; ---echo ---sorted_result -SELECT product, country_id , year, SUM(profit) FROM t1 -GROUP BY product, country_id, year WITH ROLLUP HAVING SUM(profit) > 7000; - ---echo ---echo # Functions ---sorted_result -SELECT CONCAT(product,':',country_id) AS 'prod', CONCAT(":",year,":") AS 'year', -1+1, SUM(profit)/COUNT(*) FROM t1 GROUP BY 1,2 WITH ROLLUP; ---echo ---sorted_result -SELECT product, SUM(profit)/COUNT(*) FROM t1 GROUP BY product WITH ROLLUP; ---echo ---sorted_result -SELECT LEFT(product,4) AS prod, SUM(profit)/COUNT(*) FROM t1 -GROUP BY prod WITH ROLLUP; ---echo ---sorted_result -SELECT CONCAT(product,':',country_id), 1+1, SUM(profit)/COUNT(*) FROM t1 -GROUP BY CONCAT(product,':',country_id) WITH ROLLUP; - -SET @saved_sql_mode = @@session.sql_mode; -SET SESSION sql_mode= ''; ---echo ---sorted_result -SELECT UPPER(product) AS prod, - SUM(profit)/COUNT(*) - FROM t1 GROUP BY prod WITH ROLLUP HAVING prod='COMPUTER' ; -SET SESSION sql_mode= @saved_sql_mode; - ---echo ---echo # Joins ---sorted_result -SELECT product, country , year, SUM(profit) FROM t1,t2 WHERE -t1.country_id=t2.country_id GROUP BY product, country, year WITH ROLLUP; - ---echo ---sorted_result -SELECT product, `SUM` FROM (SELECT product, SUM(profit) AS 'sum' FROM t1 - GROUP BY product WITH ROLLUP) AS tmp -WHERE product is null; - ---echo ---sorted_result -SELECT product FROM t1 WHERE EXISTS -(SELECT product, country_id , SUM(profit) FROM t1 AS t2 - WHERE t1.product=t2.product GROUP BY product, country_id WITH ROLLUP - HAVING SUM(profit) > 6000); - ---echo ---sorted_result -# The following does not return the expected answer, but this is a limitation -# in the implementation so we should just document it -SELECT product, country_id , year, SUM(profit) FROM t1 -GROUP BY product, country_id, year HAVING country_id is NULL; - ---echo ---sorted_result -SELECT CONCAT(':',product,':'), SUM(profit), AVG(profit) FROM t1 -GROUP BY product WITH ROLLUP;