Skip to content

Commit

Permalink
Merge #44239
Browse files Browse the repository at this point in the history
44239: sql: optimize NeededColumnFamilyIDs r=solongordon a=solongordon

I made the logic for determining which column families need to be
scanned more clever in two ways:

- Previously we were always including column family 0 as a sentinel,
  since other column families have no KV entry if all their columns are
  null. This is not necessary if any of the column families being
  scanned have a NOT NULL column.
- If a needed column is indexed and not composite, it can be decoded
  from the key, so we don't need to take it into account when
  determining the needed column families.

Release note: None

Co-authored-by: Solon Gordon <[email protected]>
  • Loading branch information
craig[bot] and solongordon committed Feb 11, 2020
2 parents 58d1a57 + 2ba11ea commit 5035d70
Show file tree
Hide file tree
Showing 14 changed files with 308 additions and 44 deletions.
3 changes: 2 additions & 1 deletion pkg/sql/backfill/backfill.go
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,8 @@ func (ib *IndexBackfiller) Init(desc *sqlbase.ImmutableTableDescriptor) error {
ib.added = append(ib.added, *idx)
for i := range cols {
id := cols[i].ID
if idx.ContainsColumnID(id) || idx.EncodingType == sqlbase.PrimaryIndexEncoding {
if idx.ContainsColumnID(id) ||
idx.GetEncodingType(desc.PrimaryIndex.ID) == sqlbase.PrimaryIndexEncoding {
valNeededForCol.Add(i)
}
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/logictest/testdata/logic_test/exec_merge_join_dist
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
# Regression test for #39317.

statement ok
CREATE TABLE l (a INT PRIMARY KEY, b INT)
CREATE TABLE l (a INT PRIMARY KEY, b INT, FAMILY (a, b))

statement ok
CREATE TABLE r (a INT PRIMARY KEY, b INT)
CREATE TABLE r (a INT PRIMARY KEY, b INT, FAMILY (a, b))

statement ok
INSERT INTO l VALUES (1, 10), (2, 20), (3, 30)
Expand Down
153 changes: 153 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/family
Original file line number Diff line number Diff line change
Expand Up @@ -275,3 +275,156 @@ query I
SELECT xyz.z FROM y INNER LOOKUP JOIN xyz ON y.y = xyz.y
----
NULL

# Tests for NeededColumnFamilyIDs logic. This function is used for point lookups
# to determine the minimal set of column families which need to be scanned.
subtest needed_column_families

statement ok
CREATE TABLE t1 (
a INT PRIMARY KEY, b INT NOT NULL, c INT, d INT,
FAMILY (d), FAMILY (c), FAMILY (b), FAMILY (a)
);
INSERT INTO t1 VALUES (10, 20, 30, 40)

# A point lookup on the primary key column should use family 0 (even if the
# column is not in that family) because the column can be decoded from the key.
query I
SELECT a FROM t1 WHERE a = 10
----
10

query TT
SELECT field, description FROM [EXPLAIN SELECT a FROM t1 WHERE a = 10] WHERE field IN ('table', 'spans')
----
table t1@primary
spans /10/0-/10/1

# A point lookup on a non-nullable column allows us to scan only that column
# family.
query I
SELECT b FROM t1 WHERE a = 10
----
20

query TT
SELECT field, description FROM [EXPLAIN SELECT b FROM t1 WHERE a = 10] WHERE field IN ('table', 'spans')
----
table t1@primary
spans /10/2/1-/10/2/2

# Even if we also select the primary key column, we can still scan the single
# column family because that column can be decoded from the key.
query II
SELECT a, b FROM t1 WHERE a = 10
----
10 20

query TT
SELECT field, description FROM [EXPLAIN SELECT a, b FROM t1 WHERE a = 10] WHERE field IN ('table', 'spans')
----
table t1@primary
spans /10/2/1-/10/2/2

# A point lookup on a nullable column requires also scanning column family 0 as
# a sentinel.
query I
SELECT c FROM t1 WHERE a = 10
----
30

query TT
SELECT field, description FROM [EXPLAIN SELECT c FROM t1 WHERE a = 10] WHERE field IN ('table', 'spans')
----
table t1@primary
spans /10/0-/10/1/2

# A point lookup on two columns in non-adjacent column families results in two
# spans.
query II
SELECT b, d FROM t1 WHERE a = 10
----
20 40

query TT
SELECT field, description FROM [EXPLAIN SELECT b, d FROM t1 WHERE a = 10] WHERE field IN ('table', 'spans')
----
table t1@primary
spans /10/0-/10/1 /10/2/1-/10/2/2

# Unique secondary indexes store non-indexed primary key columns in column
# family 0.
statement ok
CREATE UNIQUE INDEX b_idx ON t1 (b) STORING (c, d)

query I
SELECT a FROM t1 WHERE b = 20
----
10

query TT
SELECT field, description FROM [EXPLAIN SELECT a FROM t1 WHERE b = 20] WHERE field IN ('table', 'spans')
----
table t1@b_idx
spans /20/0-/20/1

# If the primary key column is composite, we do need to scan its column family
# to retrieve its value.
statement ok
CREATE TABLE t2 (
a DECIMAL PRIMARY KEY, b INT, c INT NOT NULL, d INT,
FAMILY (d), FAMILY (c), FAMILY (b), FAMILY (a)
);
INSERT INTO t2 VALUES (10.00, 20, 30, 40)

# A point lookup on the primary key column should use its family.
query T
SELECT a FROM t2 WHERE a = 10
----
10.00

query TT
SELECT field, description FROM [EXPLAIN SELECT a FROM t2 WHERE a = 10] WHERE field IN ('table', 'spans')
----
table t2@primary
spans /1E+1/3/1-/1E+1/3/2

# A point lookup on `a` and `b` should scan both of their families.
query TI
SELECT a, b FROM t2 WHERE a = 10
----
10.00 20

query TT
SELECT field, description FROM [EXPLAIN SELECT a, b FROM t2 WHERE a = 10] WHERE field IN ('table', 'spans')
----
table t2@primary
spans /1E+1/2/1-/1E+1/3/2

# Secondary indexes always store their composite values in column family 0.
statement ok
CREATE UNIQUE INDEX a_idx ON t2 (a) STORING (b, c, d)

# A point lookup on the composite column should use family 0.
query TI
SELECT a, b FROM t2@a_idx WHERE a = 10
----
10.00 20

query TT
SELECT field, description FROM [EXPLAIN SELECT a FROM t2@a_idx WHERE a = 10] WHERE field IN ('table', 'spans')
----
table t2@a_idx
spans /1E+1/0-/1E+1/1

# A point lookup on `a` and `b` should use column family 0 and b's family.
query TI
SELECT a, b FROM t2@a_idx WHERE a = 10
----
10.00 20

query TT
SELECT field, description FROM [EXPLAIN SELECT a, b FROM t2@a_idx WHERE a = 10] WHERE field IN ('table', 'spans')
----
table t2@a_idx
spans /1E+1/0-/1E+1/1 /1E+1/2/1-/1E+1/2/2
3 changes: 2 additions & 1 deletion pkg/sql/logictest/testdata/logic_test/interleaved_join
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@
# Create tables #
#################

# TODO(solon): Remove the FAMILY declarations when #44699 is resolved.
statement ok
CREATE TABLE parent1 (pid1 INT PRIMARY KEY, pa1 INT)
CREATE TABLE parent1 (pid1 INT PRIMARY KEY, pa1 INT, FAMILY (pid1), FAMILY (pa1))

statement ok
CREATE TABLE parent2 (pid2 INT PRIMARY KEY, pa2 INT)
Expand Down
5 changes: 4 additions & 1 deletion pkg/sql/logictest/testdata/logic_test/vectorize
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,10 @@ SET tracing=off
# Making sure that colBatchScan operator can parallelize scans.
# This test is similar to that in testplannerlogic/select
statement ok
CREATE TABLE tpar (a INT PRIMARY KEY, item STRING, price FLOAT, UNIQUE INDEX item (item), UNIQUE INDEX p (price))
CREATE TABLE tpar (
a INT PRIMARY KEY, item STRING, price FLOAT, FAMILY (a, item, price),
UNIQUE INDEX item (item), UNIQUE INDEX p (price)
)

statement ok
ALTER TABLE tpar SPLIT AT VALUES(5)
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/opt/exec/execbuilder/testdata/fk_opt
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ SET enable_insert_fast_path = false
# -- Tests with INSERT --

statement ok
CREATE TABLE parent (p INT PRIMARY KEY, other INT UNIQUE)
CREATE TABLE parent (p INT PRIMARY KEY, other INT UNIQUE, FAMILY (p, other))

statement ok
CREATE TABLE child (c INT PRIMARY KEY, p INT NOT NULL REFERENCES parent(p))
CREATE TABLE child (c INT PRIMARY KEY, p INT NOT NULL REFERENCES parent(p), FAMILY (c, p))

query TTT
EXPLAIN INSERT INTO child VALUES (1,1), (2,2)
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/interleaved
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ CREATE TABLE level4 (
k1 INT,
k2 INT,
k3 INT,
FAMILY (k1, k2, k3),
PRIMARY KEY (k1, k2, k3),
CONSTRAINT fk3 FOREIGN KEY (k1, k2, k3) REFERENCES level3
) INTERLEAVE IN PARENT level3 (k1, k2, k3)
Expand Down
3 changes: 2 additions & 1 deletion pkg/sql/opt/exec/execbuilder/testdata/join_order
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ CREATE TABLE abc (
a INT PRIMARY KEY,
b INT,
c INT,
d INT
d INT,
FAMILY (a, b, c, d)
)

statement ok
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/opt/exec/execbuilder/testdata/select
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ EXPLAIN (VERBOSE) SELECT * FROM [53(1) AS num_ref_alias]
# Basic filter combinations.
# ------------------------------------------------------------------------------
statement ok
CREATE TABLE a (x INT PRIMARY KEY, y INT);
CREATE TABLE a (x INT PRIMARY KEY, y INT, FAMILY (x, y));

query TTTTT
EXPLAIN (VERBOSE) SELECT * FROM a WHERE x > 1
Expand Down Expand Up @@ -1237,7 +1237,7 @@ filter · · (x, y, z) +x,+z
# Verify that multi-span point lookups are parallelized.
# ------------------------------------------------------
statement ok
CREATE TABLE a (a INT PRIMARY KEY, item STRING, price FLOAT, UNIQUE INDEX item (item), UNIQUE INDEX p (price))
CREATE TABLE a (a INT PRIMARY KEY, item STRING, price FLOAT, FAMILY (a, item, price), UNIQUE INDEX item (item), UNIQUE INDEX p (price))

statement ok
CREATE TABLE b (a INT, b INT, c INT NULL, d INT NULL, PRIMARY KEY (a, b), FAMILY (a, b, c, d))
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/upsert
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ CREATE TABLE indexed (
b INT,
c INT DEFAULT(10),
d INT AS (a + c) STORED,
FAMILY (a, b, c, d),
UNIQUE INDEX secondary (d, b),
CHECK (c > 0)
)
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/row/fetcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -893,7 +893,7 @@ func (rf *Fetcher) processKV(
}

// For covering secondary indexes, allow for decoding as a primary key.
if (!table.isSecondaryIndex || table.index.EncodingType == sqlbase.PrimaryIndexEncoding) &&
if table.index.GetEncodingType(table.desc.PrimaryIndex.ID) == sqlbase.PrimaryIndexEncoding &&
len(rf.keyRemainingBytes) > 0 {
// If familyID is 0, kv.Value contains values for composite key columns.
// These columns already have a table.row value assigned above, but that value
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/span/span_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ func MakeBuilder(table *sqlbase.TableDescriptor, index *sqlbase.IndexDescriptor)
// SetNeededColumns sets the needed columns on the Builder. This information
// is used by MaybeSplitSpanIntoSeparateFamilies.
func (s *Builder) SetNeededColumns(neededCols util.FastIntSet) {
s.neededFamilies = sqlbase.NeededColumnFamilyIDs(s.table.ColumnIdxMap(), s.table.Families, neededCols)
s.neededFamilies = sqlbase.NeededColumnFamilyIDs(neededCols, s.table, s.index)
}

// UnsetNeededColumns resets the needed columns for column family specific optimizations
Expand Down Expand Up @@ -259,7 +259,7 @@ func (s *Builder) appendSpansFromConstraintSpan(
// families, only scan the relevant column families. This is disabled for
// deletions to ensure that the entire row is deleted.
if !forDelete && needed.Len() > 0 && span.Key.Equal(span.EndKey) {
neededFamilyIDs := sqlbase.NeededColumnFamilyIDs(s.table.ColumnIdxMap(), s.table.Families, needed)
neededFamilyIDs := sqlbase.NeededColumnFamilyIDs(needed, s.table, s.index)
if s.CanSplitSpanIntoSeparateFamilies(len(neededFamilyIDs), cs.StartKey().Length(), containsNull) {
return sqlbase.SplitSpanIntoSeparateFamilies(appendTo, span, neededFamilyIDs), nil
}
Expand Down
Loading

0 comments on commit 5035d70

Please sign in to comment.