Skip to content

Commit

Permalink
planner: complete the memo/group-expression/group logic and add test …
Browse files Browse the repository at this point in the history
…for derive stats (#57973)

ref #51664
  • Loading branch information
AilinKid authored Dec 11, 2024
1 parent 7ac73e9 commit ba55be5
Show file tree
Hide file tree
Showing 13 changed files with 758 additions and 74 deletions.
6 changes: 4 additions & 2 deletions pkg/planner/cascades/memo/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@ go_library(
importpath = "github.com/pingcap/tidb/pkg/planner/cascades/memo",
visibility = ["//visibility:public"],
deps = [
"//pkg/expression",
"//pkg/planner/cascades/base",
"//pkg/planner/cascades/pattern",
"//pkg/planner/cascades/util",
"//pkg/planner/core/base",
"//pkg/planner/property",
"//pkg/sessionctx",
"//pkg/util/intest",
"@com_github_pingcap_failpoint//:failpoint",
],
)

Expand All @@ -36,9 +37,10 @@ go_test(
deps = [
"//pkg/expression",
"//pkg/planner/cascades/base",
"//pkg/planner/core",
"//pkg/planner/core/operator/logicalop",
"//pkg/testkit/testsetup",
"//pkg/util/mock",
"@com_github_pingcap_failpoint//:failpoint",
"@com_github_stretchr_testify//require",
"@org_uber_go_goleak//:goleak",
],
Expand Down
42 changes: 40 additions & 2 deletions pkg/planner/cascades/memo/group.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ package memo
import (
"container/list"
"fmt"
"strconv"

"github.com/pingcap/tidb/pkg/planner/cascades/base"
"github.com/pingcap/tidb/pkg/planner/cascades/pattern"
"github.com/pingcap/tidb/pkg/planner/cascades/util"
"github.com/pingcap/tidb/pkg/planner/property"
"github.com/pingcap/tidb/pkg/util/intest"
)

var _ base.HashEquals = &Group{}
Expand Down Expand Up @@ -127,9 +127,47 @@ func (g *Group) GetFirstElem(operand pattern.Operand) *list.Element {
return g.Operand2FirstExpr[operand]
}

// HasLogicalProperty check whether current group has the logical property.
func (g *Group) HasLogicalProperty() bool {
return g.logicalProp != nil
}

// GetLogicalProperty return this group's logical property.
func (g *Group) GetLogicalProperty() *property.LogicalProperty {
intest.Assert(g.logicalProp != nil)
return g.logicalProp
}

// SetLogicalProperty set this group's logical property.
func (g *Group) SetLogicalProperty(prop *property.LogicalProperty) {
g.logicalProp = prop
}

// IsExplored returns whether this group is explored.
func (g *Group) IsExplored() bool {
return g.explored
}

// SetExplored set the group as tagged as explored.
func (g *Group) SetExplored() {
g.explored = true
}

// String implements fmt.Stringer interface.
func (g *Group) String(w util.StrBufferWriter) {
w.WriteString(fmt.Sprintf("inputs:%s", strconv.Itoa(int(g.groupID))))
w.WriteString(fmt.Sprintf("GID:%d", int(g.groupID)))
}

// ForEachGE traverse the inside group expression with f call on them each.
func (g *Group) ForEachGE(f func(ge *GroupExpression) bool) {
var next bool
for elem := g.logicalExpressions.Front(); elem != nil; elem = elem.Next() {
expr := elem.Value.(*GroupExpression)
next = f(expr)
if !next {
break
}
}
}

// NewGroup creates a new Group with given logical prop.
Expand Down
44 changes: 44 additions & 0 deletions pkg/planner/cascades/memo/group_expr.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,13 @@
package memo

import (
"github.com/pingcap/failpoint"
"github.com/pingcap/tidb/pkg/expression"
base2 "github.com/pingcap/tidb/pkg/planner/cascades/base"
"github.com/pingcap/tidb/pkg/planner/cascades/pattern"
"github.com/pingcap/tidb/pkg/planner/cascades/util"
"github.com/pingcap/tidb/pkg/planner/core/base"
"github.com/pingcap/tidb/pkg/planner/property"
"github.com/pingcap/tidb/pkg/util/intest"
)

Expand Down Expand Up @@ -122,3 +125,44 @@ func (e *GroupExpression) Init(h base2.Hasher) {
e.Hash64(h)
e.hash64 = h.Sum64()
}

// DeriveLogicalProp derive the new group's logical property from a specific GE.
// DeriveLogicalProp is not called with recursive, because we only examine and
// init new group from bottom-up, so we can sure that this new group's children
// has already gotten its logical prop.
func (e *GroupExpression) DeriveLogicalProp() (err error) {
if e.GetGroup().HasLogicalProperty() {
return nil
}
childStats := make([]*property.StatsInfo, 0, len(e.Inputs))
childSchema := make([]*expression.Schema, 0, len(e.Inputs))
for _, childG := range e.Inputs {
childGProp := childG.GetLogicalProperty()
childStats = append(childStats, childGProp.Stats)
childSchema = append(childSchema, childGProp.Schema)
}
e.GetGroup().SetLogicalProperty(property.NewLogicalProp())
// currently the schemaProducer side logical op is still useful for group schema.
// just add this mock for a mocked logical-plan which is with the id less than 0.
// todo: functional dependency
tmpSchema := e.LogicalPlan.Schema()
tmpStats := e.LogicalPlan.StatsInfo()
// only for those new created logical op from XForm, we should rebuild their stats;
// in memo init phase, all logical ops has maintained their stats already, just use them.
if tmpStats == nil {
skipDeriveStats := false
failpoint.Inject("MockPlanSkipMemoDeriveStats", func(val failpoint.Value) {
skipDeriveStats = val.(bool)
})
if !skipDeriveStats {
// here can only derive the basic stats from bottom up, we can't pass any colGroups required by parents.
tmpStats, err = e.LogicalPlan.DeriveStats(childStats, tmpSchema, childSchema, nil)
if err != nil {
return err
}
}
}
e.GetGroup().GetLogicalProperty().Schema = tmpSchema
e.GetGroup().GetLogicalProperty().Stats = tmpStats
return nil
}
124 changes: 84 additions & 40 deletions pkg/planner/cascades/memo/memo.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,13 @@ import (

base2 "github.com/pingcap/tidb/pkg/planner/cascades/base"
"github.com/pingcap/tidb/pkg/planner/core/base"
"github.com/pingcap/tidb/pkg/sessionctx"
"github.com/pingcap/tidb/pkg/util/intest"
)

// Memo is the main structure of the memo package.
type Memo struct {
// ctx is the context of the memo.
sCtx sessionctx.Context

// groupIDGen is the incremental group id for internal usage.
groupIDGen GroupIDGenerator
groupIDGen *GroupIDGenerator

// rootGroup is the root group of the memo.
rootGroup *Group
Expand All @@ -48,84 +44,132 @@ type Memo struct {
}

// NewMemo creates a new memo.
func NewMemo(ctx sessionctx.Context) *Memo {
func NewMemo() *Memo {
return &Memo{
sCtx: ctx,
groupIDGen: GroupIDGenerator{id: 0},
groupIDGen: &GroupIDGenerator{id: 0},
groups: list.New(),
groupID2Group: make(map[GroupID]*list.Element),
hasher: base2.NewHashEqualer(),
}
}

// GetHasher gets a hasher from the memo that ready to use.
func (m *Memo) GetHasher() base2.Hasher {
m.hasher.Reset()
return m.hasher
func (mm *Memo) GetHasher() base2.Hasher {
mm.hasher.Reset()
return mm.hasher
}

// CopyIn copies a logical plan into the memo with format as GroupExpression.
func (m *Memo) CopyIn(target *Group, lp base.LogicalPlan) *GroupExpression {
// CopyIn copies a MemoExpression representation into the memo with format as GroupExpression inside.
// The generic logical forest inside memo is represented as memo group expression tree, while for entering
// and re-feeding the memo, we use the memoExpression as the currency:
//
// entering(init memo)
//
// lp ┌──────────┐
// / \ │ memo: │
// lp lp --copyIN-> │ G(ge) │
// / \ │ / \ │
// ... ... │ G G │
// └──────────┘
//
// re-feeding (intake XForm output)
//
// lp ┌──────────┐
// / \ │ memo: │
// GE lp --copyIN-> │ G(ge) │
// | │ / \ │
// GE │ G G │
// └──────────┘
//
// the bare lp means the new created logical op or that whose child has changed which invalidate it's original
// old belonged group, make it back to bare-lp for re-inserting again in copyIn.
func (mm *Memo) CopyIn(target *Group, lp base.LogicalPlan) (*GroupExpression, error) {
// Group the children first.
childGroups := make([]*Group, 0, len(lp.Children()))
for _, child := range lp.Children() {
// todo: child.getGroupExpression.GetGroup directly
groupExpr := m.CopyIn(nil, child)
group := groupExpr.group
intest.Assert(group != nil)
intest.Assert(group != target)
childGroups = append(childGroups, group)
var currentChildG *Group
if ge, ok := child.(*GroupExpression); ok {
// which means it's the earliest unchanged GroupExpression from rule XForm.
currentChildG = ge.GetGroup()
} else {
// which means it's a new/changed logical op, downward to get its input group ids to complete it.
ge, err := mm.CopyIn(nil, child)
if err != nil {
return nil, err
}
currentChildG = ge.GetGroup()
}
intest.Assert(currentChildG != nil)
intest.Assert(currentChildG != target)
childGroups = append(childGroups, currentChildG)
}

hasher := m.GetHasher()
hasher := mm.GetHasher()
groupExpr := NewGroupExpression(lp, childGroups)
groupExpr.Init(hasher)
m.insertGroupExpression(groupExpr, target)
// todo: new group need to derive the logical property.
return groupExpr
if mm.InsertGroupExpression(groupExpr, target) && target == nil {
// derive logical property for new group.
err := groupExpr.DeriveLogicalProp()
if err != nil {
return nil, err
}
}
return groupExpr, nil
}

// GetGroups gets all groups in the memo.
func (m *Memo) GetGroups() *list.List {
return m.groups
func (mm *Memo) GetGroups() *list.List {
return mm.groups
}

// GetGroupID2Group gets the map from group id to group.
func (m *Memo) GetGroupID2Group() map[GroupID]*list.Element {
return m.groupID2Group
func (mm *Memo) GetGroupID2Group() map[GroupID]*list.Element {
return mm.groupID2Group
}

// GetRootGroup gets the root group of the memo.
func (m *Memo) GetRootGroup() *Group {
return m.rootGroup
func (mm *Memo) GetRootGroup() *Group {
return mm.rootGroup
}

// InsertGroupExpression insert ge into a target group.
// @bool indicates whether the groupExpr is inserted to a new group.
func (m *Memo) insertGroupExpression(groupExpr *GroupExpression, target *Group) bool {
func (mm *Memo) InsertGroupExpression(groupExpr *GroupExpression, target *Group) bool {
// for group merge, here groupExpr is the new groupExpr with undetermined belonged group.
// we need to use groupExpr hash to find whether there is same groupExpr existed before.
// if existed and the existed groupExpr.Group is not same with target, we should merge them up.
// todo: merge group
if target == nil {
target = m.NewGroup()
m.groups.PushBack(target)
m.groupID2Group[target.groupID] = m.groups.Back()
target = mm.NewGroup()
mm.groups.PushBack(target)
mm.groupID2Group[target.groupID] = mm.groups.Back()
}
target.Insert(groupExpr)
return true
}

// NewGroup creates a new group.
func (m *Memo) NewGroup() *Group {
func (mm *Memo) NewGroup() *Group {
group := NewGroup(nil)
group.groupID = m.groupIDGen.NextGroupID()
group.groupID = mm.groupIDGen.NextGroupID()
return group
}

// Init initializes the memo with a logical plan, converting logical plan tree format into group tree.
func (m *Memo) Init(plan base.LogicalPlan) *GroupExpression {
intest.Assert(m.groups.Len() == 0)
gE := m.CopyIn(nil, plan)
m.rootGroup = gE.GetGroup()
func (mm *Memo) Init(plan base.LogicalPlan) *GroupExpression {
intest.Assert(mm.groups.Len() == 0)
gE, _ := mm.CopyIn(nil, plan)
mm.rootGroup = gE.GetGroup()
return gE
}

// ForEachGroup traverse the inside group expression with f call on them each.
func (mm *Memo) ForEachGroup(f func(g *Group) bool) {
var next bool
for elem := mm.GetGroups().Front(); elem != nil; elem = elem.Next() {
expr := elem.Value.(*Group)
next = f(expr)
if !next {
break
}
}
}
Loading

0 comments on commit ba55be5

Please sign in to comment.