Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

opt: perf improvements for large queries #85100

Merged
merged 3 commits into from
Jul 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
235 changes: 232 additions & 3 deletions pkg/sql/opt/bench/bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ func init() {
// on what each phase includes.
func BenchmarkPhases(b *testing.B) {
for _, query := range queriesToTest(b) {
h := newHarness(b, query)
h := newHarness(b, query, schemas)
b.Run(query.name, func(b *testing.B) {
b.Run("Simple", func(b *testing.B) {
for _, phase := range SimplePhases {
Expand Down Expand Up @@ -365,7 +365,7 @@ type harness struct {
optimizer xform.Optimizer
}

func newHarness(tb testing.TB, query benchQuery) *harness {
func newHarness(tb testing.TB, query benchQuery, schemas []string) *harness {
h := &harness{
ctx: context.Background(),
semaCtx: tree.MakeSemaContext(),
Expand Down Expand Up @@ -653,7 +653,7 @@ func queriesToTest(b *testing.B) []benchQuery {
func BenchmarkChain(b *testing.B) {
for i := 1; i < 20; i++ {
q := makeChain(i)
h := newHarness(b, q)
h := newHarness(b, q, schemas)
b.Run(q.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
h.runSimple(b, q, Explore)
Expand Down Expand Up @@ -697,3 +697,232 @@ func BenchmarkEndToEnd(b *testing.B) {
})
}
}

var slowSchemas = []string{
`
CREATE TABLE table64793_1 (
col1_0 CHAR NOT NULL, col1_1 BOOL NOT NULL, col1_2 REGPROC NOT NULL,
col1_3 REGPROCEDURE NOT NULL, col1_4 TIMETZ NOT NULL, col1_5 FLOAT8 NULL,
col1_6 INT2 NOT NULL, col1_7 BOOL, col1_8 BOX2D NOT NULL,
col1_9 REGNAMESPACE NOT NULL,
PRIMARY KEY (
col1_8 DESC, col1_9 DESC, col1_4 DESC, col1_1, col1_2 ASC, col1_3 DESC,
col1_0 DESC, col1_6
),
col1_10 INT2 NOT NULL AS (col1_6 + 22798:::INT8) VIRTUAL,
FAMILY (col1_4), FAMILY (col1_0, col1_5), FAMILY (col1_1),
FAMILY (col1_8, col1_3, col1_9, col1_7), FAMILY (col1_2), FAMILY (col1_6))
`,
`
CREATE TYPE greeting64793 AS ENUM ('hello', 'howdy', 'hi', 'good day', 'morning');
`,
`
CREATE TABLE seed64793 (
_int2 INT2,
_int4 INT4,
_int8 INT8,
_float4 FLOAT4,
_float8 FLOAT8,
_date DATE,
_timestamp TIMESTAMP,
_timestamptz TIMESTAMPTZ,
_interval INTERVAL,
_bool BOOL,
_decimal DECIMAL,
_string STRING,
_bytes BYTES,
_uuid UUID,
_inet INET,
_jsonb JSONB,
_enum greeting64793
);
`,
`
CREATE INDEX on seed64793 (_int8, _float8, _date);
`,
`
CREATE INVERTED INDEX on seed64793 (_jsonb);
`,
`
CREATE TABLE table64793_2 (
col1_0 "char" NOT NULL, col1_1 OID NOT NULL, col1_2 BIT(38) NOT NULL,
col1_3 BIT(18) NOT NULL, col1_4 BYTES NOT NULL, col1_5 INT8 NOT NULL,
col1_6 INTERVAL NOT NULL, col1_7 BIT(33) NOT NULL, col1_8 INTERVAL NULL,
col1_9 GEOMETRY NOT NULL, col1_10 BOOL NOT NULL, col1_11 INT2,
PRIMARY KEY (
col1_4 ASC, col1_7 DESC, col1_1 ASC, col1_2 ASC, col1_10 ASC, col1_5,
col1_0 ASC, col1_3, col1_6
),
UNIQUE (
col1_8 DESC, col1_11, col1_3 DESC, col1_7, col1_6 DESC, col1_4 ASC,
col1_1 DESC
)
);
`,
`
CREATE TABLE table64793_3 (
col2_0 NAME NOT NULL, col2_1 TIMETZ NOT NULL,
PRIMARY KEY (col2_0 ASC, col2_1),
col2_2 STRING NOT NULL AS (lower(col2_0)) VIRTUAL,
UNIQUE (col2_0 DESC, col2_2 DESC, col2_1)
WHERE (table64793_3.col2_2 > e'\U00002603':::STRING)
OR (table64793_3.col2_0 != '"':::STRING),
UNIQUE (col2_1 ASC, col2_2, col2_0),
UNIQUE (col2_0 DESC,col2_1, col2_2),
INDEX (col2_1 DESC),
UNIQUE (col2_2 DESC, col2_0 ASC)
WHERE table64793_3.col2_2 = '"':::STRING
);
`,
`
CREATE TABLE table64793_4 (
col2_0 NAME NOT NULL, col2_1 TIMETZ NOT NULL, col3_2 REGPROC NOT NULL,
col3_3 "char", col3_4 BOX2D, col3_5 INT8 NULL, col3_6 TIMESTAMP NOT NULL,
col3_7 FLOAT8, col3_8 INT4 NULL, col3_9 INET NULL, col3_10 UUID NOT NULL,
col3_11 UUID NULL, col3_12 INT2 NOT NULL, col3_13 BIT(34),
col3_14 REGPROCEDURE NULL, col3_15 FLOAT8 NULL,
PRIMARY KEY (
col2_0 ASC, col2_1, col3_11 DESC, col3_13, col3_6, col3_3 DESC,
col3_15 ASC, col3_2 ASC, col3_4 ASC, col3_9 DESC, col3_12 ASC,
col3_8 ASC, col3_5, col3_14 ASC
),
UNIQUE (col3_2, col3_8 ASC)
WHERE ((((table64793_4.col3_5 < 0:::INT8)
AND (table64793_4.col3_3 != '':::STRING))
AND (table64793_4.col2_1 < '00:00:00+15:59:00':::TIMETZ))
AND (table64793_4.col3_12 > 0:::INT8))
AND (table64793_4.col3_15 <= 1.7976931348623157e+308:::FLOAT8),
UNIQUE (col3_10 DESC, col3_3 ASC, col2_1 DESC, col3_9 ASC)
);
`,
}

var slowQueries = [...]benchQuery{
// 1. The first long-running query taken from #64793.
// 2. The most recent long-running query from #64793 (as of July 2022).
{
name: "slow-query-1",
query: `
WITH with_186941 (col_1103773, col_1103774) AS (
SELECT
*
FROM
(
VALUES
('clvl', 3 :: INT2),
(
'n',
(
SELECT
tab_455284.col1_6 AS col_1103772
FROM
table64793_1@[0] AS tab_455284
ORDER BY
tab_455284.col1_2 DESC,
tab_455284.col1_1 DESC
LIMIT
1 ::: INT8
)
),
(NULL, 6736 ::: INT8)
) AS tab_455285 (col_1103773, col_1103774)
),
with_186942 (col_1103775) AS (
SELECT
*
FROM
(
VALUES
('yk'),
(NULL)
) AS tab_455286 (col_1103775)
)
SELECT
0 ::: OID AS col_1103776,
(
(-32244820164.24410487)::: DECIMAL :: DECIMAL + tab_455291.col1_10 :: INT8
):: DECIMAL AS col_1103777,
tab_455287._bool AS col_1103778
FROM
with_186942 AS cte_ref_54113,
seed64793@[0] AS tab_455287
JOIN seed64793 AS tab_455288
JOIN seed64793 AS tab_455289 ON (tab_455288._int8) = (tab_455289._int8)
AND (tab_455288._date) = (tab_455289._date)
AND (tab_455288._float8) = (tab_455289._float8)
JOIN table64793_1@[0] AS tab_455290
JOIN table64793_1@primary AS tab_455291
JOIN table64793_1@[0] AS tab_455295
JOIN seed64793 AS tab_455296
JOIN seed64793 AS tab_455297 ON (tab_455296._int8) = (tab_455297._int8)
AND (tab_455296._date) = (tab_455297._date) ON (tab_455295.col1_5) = (tab_455297._float8)
AND (tab_455295.col1_5) = (tab_455296._float8)
AND (tab_455295.col1_5) = (tab_455297._float8)
AND (tab_455295.col1_5) = (tab_455297._float8) ON (tab_455291.col1_2) = (tab_455295.tableoid)
AND (tab_455291.col1_7) = (tab_455295.col1_1) ON (tab_455290.col1_2) = (tab_455291.col1_9)
AND (tab_455290.col1_7) = (tab_455291.col1_7) ON (tab_455289._float8) = (tab_455296._float8) ON (tab_455287._float4) = (tab_455290.col1_5)
AND (tab_455287.tableoid) = (tab_455295.col1_9)
AND (tab_455287._bool) = (tab_455295.col1_7);
`,
args: []interface{}{},
},
{
name: "slow-query-2",
query: `
WITH with_121707 (col_692430) AS (
SELECT
*
FROM
(
VALUES
(
(-0.19099748134613037)::: FLOAT8
),
(0.9743397235870361 ::: FLOAT8),
(
(-1.6944892406463623)::: FLOAT8
)
) AS tab_297691 (col_692430)
)
SELECT
'-35 years -11 mons -571 days -08:18:57.001029' ::: INTERVAL AS col_692441
FROM
table64793_2@table64793_2_col1_8_col1_11_col1_3_col1_7_col1_6_col1_4_col1_1_key AS tab_297692
JOIN table64793_3@table64793_3_col2_0_col2_1_col2_2_key AS tab_297693
JOIN table64793_2@[0] AS tab_297694
JOIN seed64793@seed64793__int8__float8__date_idx AS tab_297695
RIGHT JOIN table64793_3@[0] AS tab_297696
JOIN table64793_4@table64793_4_col3_10_col3_3_col2_1_col3_9_key AS tab_297697 ON (tab_297696.col2_0) = (tab_297697.col3_3) CROSS
JOIN table64793_4@[0] AS tab_297698
JOIN table64793_3 AS tab_297699 ON (tab_297698.col2_0) = (tab_297699.col2_0) ON TRUE
JOIN table64793_4@[0] AS tab_297700 ON (tab_297697.col3_12) = (tab_297700.col3_8) ON (tab_297694.tableoid) = (tab_297695.tableoid)
AND (tab_297694.col1_5) = (tab_297698.col3_8)
AND (tab_297694.tableoid) = (tab_297698.col3_2)
AND (tab_297694.col1_5) = (tab_297697.col3_12) ON (tab_297693.col2_2) = (tab_297700.col3_3)
AND (tab_297693.col2_1) = (tab_297698.col2_1)
AND (tab_297693.tableoid) = (tab_297699.tableoid)
AND (tab_297693.col2_1) = (tab_297697.col2_1)
AND (tab_297693.tableoid) = (tab_297694.col1_1)
AND (tab_297693.col2_2) = (tab_297695._string)
AND (tab_297693.col2_2) = (tab_297696.col2_0)
AND (tab_297693.col2_2) = (tab_297698.col3_3) ON (tab_297692.col1_11) = (tab_297694.col1_11)
ORDER BY
tab_297695._enum DESC
LIMIT
57 ::: INT8;
`,
args: []interface{}{},
},
}

func BenchmarkSlowQueries(b *testing.B) {
for _, query := range slowQueries {
h := newHarness(b, query, slowSchemas)
h.evalCtx.SessionData().ReorderJoinsLimit = 8
b.Run(query.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
h.runSimple(b, query, Explore)
}
})
}
}
2 changes: 2 additions & 0 deletions pkg/sql/opt/props/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ go_library(
srcs = [
"cardinality.go",
"col_stats_map.go",
"equiv_set.go",
"func_dep.go",
"histogram.go",
"logical.go",
Expand Down Expand Up @@ -43,6 +44,7 @@ go_test(
srcs = [
"cardinality_test.go",
"col_stats_map_test.go",
"equiv_set_test.go",
"func_dep_rand_test.go",
"func_dep_test.go",
"histogram_test.go",
Expand Down
112 changes: 112 additions & 0 deletions pkg/sql/opt/props/equiv_set.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// Copyright 2022 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package props

import "github.com/cockroachdb/cockroach/pkg/sql/opt"

// EquivSet describes a set of equivalence groups of columns. It can answer
// queries about which columns are equivalent to one another. Equivalence groups
// are always non-empty and disjoint.
//
// TODO(drewk): incorporate EquivSets into FuncDepSets.
type EquivSet struct {
buf [equalityBufferSize]opt.ColSet
groups []opt.ColSet
}

const equalityBufferSize = 1

// NewEquivSet returns a new equality set with a starting capacity of one
// equivalence group. This optimizes for the common case when only one
// equivalence group is stored.
func NewEquivSet() EquivSet {
set := EquivSet{}
set.groups = set.buf[:0]
return set
}

// Reset prepares the EquivSet for reuse, maintaining references to any
// allocated slice memory.
func (eq *EquivSet) Reset() {
for i := range eq.groups {
// Release any references to the large portion of ColSets.
eq.groups[i] = opt.ColSet{}
}
eq.groups = eq.groups[:0]
}

// Add adds the given equivalent columns to the EquivSet. If possible, the
// columns are added to an existing group. Otherwise, a new one is created.
func (eq *EquivSet) Add(equivCols opt.ColSet) {
// Attempt to add the equivalence to an existing group.
for i := range eq.groups {
if eq.groups[i].Intersects(equivCols) {
if equivCols.SubsetOf(eq.groups[i]) {
// No-op
return
}
eq.groups[i].UnionWith(equivCols)
eq.tryMergeGroups(i)
return
}
}
// Make a new equivalence group.
eq.groups = append(eq.groups, equivCols.Copy())
}

// AddFromFDs adds all equivalence relations from the given FuncDepSet to the
// EquivSet.
func (eq *EquivSet) AddFromFDs(fdset *FuncDepSet) {
for i := range fdset.deps {
fd := &fdset.deps[i]
if fd.equiv {
eq.Add(fd.from.Union(fd.to))
}
}
}

// AreColsEquiv indicates whether the given columns are equivalent.
func (eq *EquivSet) AreColsEquiv(left, right opt.ColumnID) bool {
for i := range eq.groups {
if eq.groups[i].Contains(left) {
return eq.groups[i].Contains(right)
}
if eq.groups[i].Contains(right) {
return eq.groups[i].Contains(left)
}
}
return false
}

// tryMergeGroups attempts to merge the equality group at the given index with
// any of the *following* groups. If a group can be merged, it is removed after
// its columns are added to the given group.
func (eq *EquivSet) tryMergeGroups(idx int) {
for i := idx + 1; i < len(eq.groups); i++ {
if eq.groups[idx].Intersects(eq.groups[i]) {
eq.groups[idx].UnionWith(eq.groups[i])
eq.groups[i] = eq.groups[len(eq.groups)-1]
eq.groups[len(eq.groups)-1] = opt.ColSet{}
eq.groups = eq.groups[:len(eq.groups)-1]
}
}
}

func (eq *EquivSet) String() string {
ret := "["
for i := range eq.groups {
if i > 0 {
ret += ", "
}
ret += eq.groups[i].String()
}
return ret + "]"
}
Loading