From e927c922b267c2d8a0414911ce076063c57770c6 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Mon, 29 Apr 2019 18:06:27 +0800 Subject: [PATCH 1/6] *: fix bug when unsigned pk meets feedback ranges --- distsql/request_builder.go | 1 - executor/table_reader.go | 5 ++++- planner/core/rule_column_pruning.go | 17 +++++++++++++++-- statistics/histogram.go | 10 +++++++++- 4 files changed, 28 insertions(+), 5 deletions(-) diff --git a/distsql/request_builder.go b/distsql/request_builder.go index 3fbce7e02b51a..3c2a9abe16586 100644 --- a/distsql/request_builder.go +++ b/distsql/request_builder.go @@ -183,7 +183,6 @@ func TableRangesToKVRanges(tid int64, ranges []*ranger.Range, fb *statistics.Que if fb == nil || fb.Hist() == nil { return tableRangesToKVRangesWithoutSplit(tid, ranges) } - ranges = fb.Hist().SplitRange(ranges) krs := make([]kv.KeyRange, 0, len(ranges)) feedbackRanges := make([]*ranger.Range, 0, len(ranges)) for _, ran := range ranges { diff --git a/executor/table_reader.go b/executor/table_reader.go index 634b9bbf7a799..600f391cc5643 100644 --- a/executor/table_reader.go +++ b/executor/table_reader.go @@ -27,7 +27,7 @@ import ( "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/chunk" "github.com/pingcap/tidb/util/ranger" - tipb "github.com/pingcap/tipb/go-tipb" + "github.com/pingcap/tipb/go-tipb" "golang.org/x/net/context" ) @@ -96,6 +96,9 @@ func (e *TableReaderExecutor) Open(ctx context.Context) error { } e.resultHandler = &tableResultHandler{} + if e.feedback != nil && e.feedback.Hist() != nil { + e.ranges = e.feedback.Hist().SplitRange(e.ranges) + } firstPartRanges, secondPartRanges := splitRanges(e.ranges, e.keepOrder, e.desc) firstResult, err := e.buildResp(ctx, firstPartRanges) if err != nil { diff --git a/planner/core/rule_column_pruning.go b/planner/core/rule_column_pruning.go index fccfb9b23ead8..51e99cf7c71f7 100644 --- a/planner/core/rule_column_pruning.go +++ b/planner/core/rule_column_pruning.go @@ -20,6 +20,7 @@ import ( "github.com/pingcap/parser/model" "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/infoschema" + "github.com/pingcap/tidb/mysql" ) type columnPruner struct { @@ -165,7 +166,15 @@ func (p *LogicalUnionScan) PruneColumns(parentUsedCols []*expression.Column) { // PruneColumns implements LogicalPlan interface. func (ds *DataSource) PruneColumns(parentUsedCols []*expression.Column) { used := getUsedList(parentUsedCols, ds.schema) + var ( + handleCol *expression.Column + handleColInfo *model.ColumnInfo + ) for i := len(used) - 1; i >= 0; i-- { + if ds.tableInfo.PKIsHandle && mysql.HasPriKeyFlag(ds.Columns[i].Flag) { + handleCol = ds.schema.Columns[i] + handleColInfo = ds.Columns[i] + } if !used[i] { ds.schema.Columns = append(ds.schema.Columns[:i], ds.schema.Columns[i+1:]...) ds.Columns = append(ds.Columns[:i], ds.Columns[i+1:]...) @@ -179,8 +188,12 @@ func (ds *DataSource) PruneColumns(parentUsedCols []*expression.Column) { // For SQL like `select 1 from t`, tikv's response will be empty if no column is in schema. // So we'll force to push one if schema doesn't have any column. if ds.schema.Len() == 0 && !infoschema.IsMemoryDB(ds.DBName.L) { - ds.Columns = append(ds.Columns, model.NewExtraHandleColInfo()) - ds.schema.Append(ds.newExtraHandleSchemaCol()) + if handleCol == nil { + handleCol = ds.newExtraHandleSchemaCol() + handleColInfo = model.NewExtraHandleColInfo() + } + ds.Columns = append(ds.Columns, handleColInfo) + ds.schema.Append(handleCol) } } diff --git a/statistics/histogram.go b/statistics/histogram.go index 940868a74fe1b..370e270e828d7 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -508,7 +508,15 @@ func validRange(ran *ranger.Range) bool { if ran.LowVal[0].Kind() == types.KindBytes { low, high = ran.LowVal[0].GetBytes(), ran.HighVal[0].GetBytes() } else { - low, high = codec.EncodeInt(nil, ran.LowVal[0].GetInt64()), codec.EncodeInt(nil, ran.HighVal[0].GetInt64()) + var err error + low, err = codec.EncodeKey(nil, nil, ran.LowVal[0]) + if err != nil { + return false + } + high, err = codec.EncodeKey(nil, nil, ran.HighVal[0]) + if err != nil { + return false + } } if ran.LowExclude { low = kv.Key(low).PrefixNext() From 0e3f073ccd581bccfd0f00ff2f7ed3cb2fc7d31c Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Thu, 23 May 2019 20:46:02 +0800 Subject: [PATCH 2/6] address comments --- distsql/request_builder.go | 2 +- executor/table_reader.go | 3 ++- statistics/histogram.go | 6 +++--- statistics/update_test.go | 6 +++--- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/distsql/request_builder.go b/distsql/request_builder.go index 3c2a9abe16586..da1bd1ffc0e57 100644 --- a/distsql/request_builder.go +++ b/distsql/request_builder.go @@ -269,7 +269,7 @@ func IndexRangesToKVRanges(sc *stmtctx.StatementContext, tid, idxID int64, range feedbackRanges = append(feedbackRanges, &ranger.Range{LowVal: []types.Datum{types.NewBytesDatum(low)}, HighVal: []types.Datum{types.NewBytesDatum(high)}, LowExclude: false, HighExclude: true}) } - feedbackRanges = fb.Hist().SplitRange(feedbackRanges) + feedbackRanges = fb.Hist().SplitRange(sc, feedbackRanges) krs := make([]kv.KeyRange, 0, len(feedbackRanges)) for _, ran := range feedbackRanges { low, high := ran.LowVal[0].GetBytes(), ran.HighVal[0].GetBytes() diff --git a/executor/table_reader.go b/executor/table_reader.go index 600f391cc5643..00bc3d87dc3a7 100644 --- a/executor/table_reader.go +++ b/executor/table_reader.go @@ -96,8 +96,9 @@ func (e *TableReaderExecutor) Open(ctx context.Context) error { } e.resultHandler = &tableResultHandler{} + // Split ranges here since the unsigned part and signed part will swap their position when encoding the range to kv ranges. if e.feedback != nil && e.feedback.Hist() != nil { - e.ranges = e.feedback.Hist().SplitRange(e.ranges) + e.ranges = e.feedback.Hist().SplitRange(e.ctx.GetSessionVars().StmtCtx, e.ranges) } firstPartRanges, secondPartRanges := splitRanges(e.ranges, e.keepOrder, e.desc) firstResult, err := e.buildResp(ctx, firstPartRanges) diff --git a/statistics/histogram.go b/statistics/histogram.go index 370e270e828d7..2bbc4c8a07387 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -503,7 +503,7 @@ func (hg *Histogram) getIncreaseFactor(totalCount int64) float64 { // validRange checks if the range is valid, it is used by `SplitRange` to remove the invalid range, // the possible types of range are index key range and handle key range. -func validRange(ran *ranger.Range) bool { +func validRange(sc *stmtctx.StatementContext, ran *ranger.Range) bool { var low, high []byte if ran.LowVal[0].Kind() == types.KindBytes { low, high = ran.LowVal[0].GetBytes(), ran.HighVal[0].GetBytes() @@ -530,7 +530,7 @@ func validRange(ran *ranger.Range) bool { // SplitRange splits the range according to the histogram upper bound. Note that we treat last bucket's upper bound // as inf, so all the split ranges will totally fall in one of the (-inf, u(0)], (u(0), u(1)],...(u(n-3), u(n-2)], // (u(n-2), +inf), where n is the number of buckets, u(i) is the i-th bucket's upper bound. -func (hg *Histogram) SplitRange(ranges []*ranger.Range) []*ranger.Range { +func (hg *Histogram) SplitRange(sc *stmtctx.StatementContext, ranges []*ranger.Range) []*ranger.Range { split := make([]*ranger.Range, 0, len(ranges)) for len(ranges) > 0 { // Find the last bound that greater or equal to the LowVal. @@ -574,7 +574,7 @@ func (hg *Histogram) SplitRange(ranges []*ranger.Range) []*ranger.Range { HighExclude: false}) ranges[0].LowVal[0] = upper ranges[0].LowExclude = true - if !validRange(ranges[0]) { + if !validRange(sc, ranges[0]) { ranges = ranges[1:] } } diff --git a/statistics/update_test.go b/statistics/update_test.go index 1977993a97757..816aef606d7f0 100644 --- a/statistics/update_test.go +++ b/statistics/update_test.go @@ -604,7 +604,7 @@ func (s *testStatsUpdateSuite) TestSplitRange(c *C) { HighExclude: t.exclude[i+1], }) } - ranges = h.SplitRange(ranges) + ranges = h.SplitRange(nil, ranges) var ranStrs []string for _, ran := range ranges { ranStrs = append(ranStrs, ran.String()) @@ -1344,7 +1344,7 @@ func (s *testStatsUpdateSuite) TestUnsignedFeedbackRanges(c *C) { statistics.FeedbackProbability = 1 testKit.MustExec("use test") - testKit.MustExec("create table t (a tinyint unsigned, primary key(a))") + testKit.MustExec("create table t (a bigint unsigned, primary key(a))") for i := 0; i < 20; i++ { testKit.MustExec(fmt.Sprintf("insert into t values (%d)", i)) } @@ -1371,7 +1371,7 @@ func (s *testStatsUpdateSuite) TestUnsignedFeedbackRanges(c *C) { hist: "column:1 ndv:30 totColSize:0\n" + "num: 8 lower_bound: 0 upper_bound: 7 repeats: 0\n" + "num: 8 lower_bound: 8 upper_bound: 15 repeats: 0\n" + - "num: 14 lower_bound: 16 upper_bound: 255 repeats: 0", + "num: 14 lower_bound: 16 upper_bound: 18446744073709551615 repeats: 0", }, } is := s.do.InfoSchema() From 3c4230932998d027a83b37de4fe785df62f00d39 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Fri, 24 May 2019 16:28:38 +0800 Subject: [PATCH 3/6] fix build failure --- statistics/feedback.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/statistics/feedback.go b/statistics/feedback.go index 3423eced30d84..ed80841184c43 100644 --- a/statistics/feedback.go +++ b/statistics/feedback.go @@ -1164,7 +1164,7 @@ func (q *QueryFeedback) dumpRangeFeedback(h *Handle, ran *ranger.Range, rangeCou ran.HighVal[0] = getMaxValue(q.hist.tp) } } - ranges := q.hist.SplitRange([]*ranger.Range{ran}) + ranges := q.hist.SplitRange(nil, []*ranger.Range{ran}) counts := make([]float64, 0, len(ranges)) sum := 0.0 for i, r := range ranges { From 737b5429f4f5b03f025cca64e057308caecfa4a2 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Fri, 24 May 2019 16:48:30 +0800 Subject: [PATCH 4/6] fix planner unit test --- planner/core/logical_plan_test.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/planner/core/logical_plan_test.go b/planner/core/logical_plan_test.go index c6cc67e0a4ebb..40edfe90433a5 100644 --- a/planner/core/logical_plan_test.go +++ b/planner/core/logical_plan_test.go @@ -1122,14 +1122,14 @@ func (s *testPlanSuite) TestColumnPruning(c *C) { { sql: "select count(*) from t", ans: map[int][]string{ - 1: {"test.t._tidb_rowid"}, + 1: {"test.t.a"}, }, }, { sql: "select count(*) from t a join t b where a.a < 1", ans: map[int][]string{ 1: {"test.a.a"}, - 2: {"test.t._tidb_rowid"}, + 2: {"test.b.a"}, }, }, { @@ -1170,7 +1170,7 @@ func (s *testPlanSuite) TestColumnPruning(c *C) { { sql: "select exists (select count(*) from t where b = k.a) from t k", ans: map[int][]string{ - 1: {"test.t._tidb_rowid"}, + 1: {"test.k.a"}, }, }, { @@ -1228,8 +1228,8 @@ func (s *testPlanSuite) TestColumnPruning(c *C) { 3: {"test.t21.a"}, 5: {"t2.a"}, 8: {"test.t01.a"}, - 10: {"test.t._tidb_rowid"}, - 12: {"test.t._tidb_rowid"}, + 10: {"test.t3.a"}, + 12: {"test.t4.a"}, }, }, } From db2ad609d3718c08da5df8d78c0904fc421dcc59 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Mon, 27 May 2019 13:31:01 +0800 Subject: [PATCH 5/6] tiny fix --- statistics/histogram.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/statistics/histogram.go b/statistics/histogram.go index 2bbc4c8a07387..5be059fe3f622 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -509,11 +509,11 @@ func validRange(sc *stmtctx.StatementContext, ran *ranger.Range) bool { low, high = ran.LowVal[0].GetBytes(), ran.HighVal[0].GetBytes() } else { var err error - low, err = codec.EncodeKey(nil, nil, ran.LowVal[0]) + low, err = codec.EncodeKey(sc, nil, ran.LowVal[0]) if err != nil { return false } - high, err = codec.EncodeKey(nil, nil, ran.HighVal[0]) + high, err = codec.EncodeKey(sc, nil, ran.HighVal[0]) if err != nil { return false } From 0e55cffb3edb8035829213c18bd4527e56924a22 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Mon, 27 May 2019 17:22:02 +0800 Subject: [PATCH 6/6] address comment --- statistics/feedback.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/statistics/feedback.go b/statistics/feedback.go index ed80841184c43..c0af492ec20cb 100644 --- a/statistics/feedback.go +++ b/statistics/feedback.go @@ -1141,8 +1141,8 @@ func dumpFeedbackForIndex(h *Handle, q *QueryFeedback, t *Table) error { func (q *QueryFeedback) dumpRangeFeedback(h *Handle, ran *ranger.Range, rangeCount float64) error { lowIsNull := ran.LowVal[0].IsNull() + sc := &stmtctx.StatementContext{TimeZone: time.UTC} if q.tp == indexType { - sc := &stmtctx.StatementContext{TimeZone: time.UTC} lower, err := codec.EncodeKey(sc, nil, ran.LowVal[0]) if err != nil { return errors.Trace(err) @@ -1164,7 +1164,7 @@ func (q *QueryFeedback) dumpRangeFeedback(h *Handle, ran *ranger.Range, rangeCou ran.HighVal[0] = getMaxValue(q.hist.tp) } } - ranges := q.hist.SplitRange(nil, []*ranger.Range{ran}) + ranges := q.hist.SplitRange(sc, []*ranger.Range{ran}) counts := make([]float64, 0, len(ranges)) sum := 0.0 for i, r := range ranges {